In [6]:
# Importing necessary libraries
import pandas as pd

# Loading the Diabetes dataset from a URL
# Note: You might need to download the dataset manually if the URL is unavailable.
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00529/diabetes_data_upload.csv"
df = pd.read_csv(url)

# Displaying the first few rows of the dataset for an overview
print("Initial data:")
print(df.head())

# MELT FUNCTION
print()
print("melt", "function")
# Using melt to transform the data from a wide format to a long format.
melted_df = pd.melt(df, id_vars=['class'], value_vars=['Gender', 'Polyuria'], 
                    var_name='Symptoms', value_name='Presence')

print("\nMelted DataFrame:")
print(melted_df.head())

# PIVOT FUNCTION
print()
print("pivot", "function")
# Pivoting the melted data back to a wide format based on 'class' with 'Symptoms' as columns.
pivoted_df = melted_df.pivot_table(index='class', columns='Symptoms', values='Presence', aggfunc='first')

print("\nPivoted DataFrame:")
print(pivoted_df.head())

# AGGREGATION
print()
print("aggregation", "function")
# Performing aggregation on the dataset by calculating the mean and count of numerical fields per class.
# For illustration, assume that we want to aggregate 'Age' and 'Polyphagia' (treating 'Polyphagia' as binary for example).
# Replace 'Polyphagia' and 'Age' with actual numerical columns if they differ in the dataset.
agg_df = df.groupby('class').agg(
    mean_age=('Age', 'mean'),
    polyphagia_count=('Polyphagia', 'count')
)

print("\nAggregated DataFrame:")
print(agg_df)

# ITERATION
print()
print("iteration", "function")
# Iterating over rows in the DataFrame to display records where Age is over 50.
print("\nRows where Age is over 50:")
for index, row in df.iterrows():
    if row['Age'] > 50:
        print(row)

# GROUPBY FUNCTION
print()
print("groupby", "function")
# Using groupby to group data by 'class' and 'Gender', showing the count of each group.
grouped_df = df.groupby(['class', 'Gender']).size().reset_index(name='Count')

print("\nGroupby DataFrame showing counts of each gender by class:")
print(grouped_df.head())


Initial data:
   Age Gender Polyuria Polydipsia sudden weight loss weakness Polyphagia  \
0   40   Male       No        Yes                 No      Yes         No   
1   58   Male       No         No                 No      Yes         No   
2   41   Male      Yes         No                 No      Yes        Yes   
3   45   Male       No         No                Yes      Yes        Yes   
4   60   Male      Yes        Yes                Yes      Yes        Yes   

  Genital thrush visual blurring Itching Irritability delayed healing  \
0             No              No     Yes           No             Yes   
1             No             Yes      No           No              No   
2             No              No     Yes           No             Yes   
3            Yes              No     Yes           No             Yes   
4             No             Yes     Yes          Yes             Yes   

  partial paresis muscle stiffness Alopecia Obesity     class  
0              No         