1. ANOVA is a statistical test that can be used to assess the discriminative power of a \
single input variable. Using `f_classif` from `sklearn`, identify the input variables with the \
worst and best discriminative power. Plot their class-conditional probability density \
functions.

In [None]:
# Importing modules
from scipy.io.arff import loadarff
from sklearn.feature_selection import f_classif
from seaborn import kdeplot
import matplotlib.pyplot as plt
import pandas as pd


# Create dataframe from .arff database
df = pd.DataFrame(loadarff('./diabetes.arff')[0])
df['Outcome'] = df['Outcome'].str.decode('utf-8')

In [None]:
# Compute variables with lowest/highest discriminative power
x = df.drop('Outcome', axis=1) # Everything but the outcome column
y_out = df['Outcome'] # Outcome column

anova_f_value = f_classif(x, y_out)[0] # Get the f-value only

lowest_power_var = x.columns[anova_f_value.argmin()]
highest_power_var = x.columns[anova_f_value.argmax()]

print(f'''
    Input var. with lowest discriminative power: ${lowest_power_var}
    Input var. with highest discriminative power: ${highest_power_var}
''')

In [None]:
# Plotting the class-conditional probability density functions
unique_outcomes = df['Outcome'].unique()

outcome_subsets = [(df[df['Outcome'] == outcome], outcome) for outcome in unique_outcomes]

# Input var. with lowest discriminative power
plt.figure(figsize=(10, 6))
for subset, outcome_class in outcome_subsets:
    kdeplot(subset[lowest_power_var], label=f'Class {outcome_class}')
plt.xlabel(lowest_power_var)
plt.ylabel('Density')
plt.legend(title='Class')
plt.title(f'Class-Conditional Probability Density Function for {lowest_power_var}')
plt.show()

# Input var. with highest discriminative power
plt.figure(figsize=(10, 6))
for subset, outcome_class in outcome_subsets:
    kdeplot(subset[highest_power_var], label=f'Class {outcome_class}')
plt.xlabel(highest_power_var)
plt.ylabel('Density')
plt.legend(title='Class')
plt.title(f'Class-Conditional Probability Density Function for {highest_power_var}')
plt.show()