In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
import time
import warnings
warnings.filterwarnings('ignore')

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv("/kaggle/input/students-grading-dataset/Students_Grading_Dataset.csv")
df.head()
df.dtypes

In [None]:
data = df[['Attendance (%)', 'Midterm_Score', 'Participation_Score', 'Study_Hours_per_Week', 'Extracurricular_Activities', 'Family_Income_Level', 'Stress_Level (1-10)', 'Sleep_Hours_per_Night', 'Grade']]
data.head()

In [None]:
data.isnull().sum()

In [None]:
dataCleaned = data.dropna()

In [None]:
dataCleaned['Extracurricular_Activities'] = dataCleaned['Extracurricular_Activities'].replace({'No': 0, 'Yes': 1})
dataCleaned['Family_Income_Level'] = dataCleaned['Family_Income_Level'].replace({'Low': 0, 'Medium': 1, 'High': 2})
dataCleaned['Pass_Fail'] = dataCleaned['Grade'].map(lambda x: 0 if x in ['F', 'D'] else 1)
dataCleaned.head()

In [None]:
# Split the data into train and test
X = dataCleaned.drop(['Grade', 'Pass_Fail'], axis=1)
y = dataCleaned['Pass_Fail']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Create a KNN classifier by looping through and testing k values from 1 to 10. We will use accuracy as the metric we are trying to optimize for.
k_values = range(1, 11)

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    print(f'k = {k}  Accuracy: {accuracy_score(y_test, y_pred)}')

In [None]:
# evaluate the model with k value = 9 using the test data
knn = KNeighborsClassifier(n_neighbors=9)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

# calculate the accuracy of the model using the test data
accuracy = accuracy_score(y_test, y_pred)


print(f"The accuracy of the model is: {accuracy:.2f}")

In [None]:
# Empty dataframe for storing results
results = pd.DataFrame(columns=['model', 'parameters', 'f1_weighted_score', 'duration'])

# Get current time
start = time.time()

# Instantiate the model
mlp = MLPClassifier(max_iter=1000)

# Create a parameter grid for GridSearchCV
param_grid = {'hidden_layer_sizes': [(512,256,128), (256,128,64), (128,64,32), (128,), (64,), (32,), (16,)],
              'activation': ['identity', 'logistic', 'tanh', 'relu'],
              'solver': ['lbfgs', 'sgd', 'adam'],
              'alpha': np.arange(0.001, 0.01, 0.1)}

# Instantiate the GridSearchCV object
mlp_cv = GridSearchCV(mlp, param_grid, cv=5, scoring='f1_weighted', verbose=1, n_jobs=-1)

# Fit the model
mlp_cv.fit(X_train, y_train)

# Get the end time
end = time.time()

# Print the best parameters found
print(f'The best parameters are: {mlp_cv.best_params_}')

# Print the f1_score for the the model
print(f'The f1_score for the model is: {mlp_cv.best_score_}')

# Add the results to the results data frame
results = results._append({'model': 'mlp', 'parameters': mlp_cv.best_params_, 'f1_weighted_score': mlp_cv.best_score_, 'duration':end-start}, ignore_index=True)

results