In [366]:
import pandas as pd
import numpy as np

In [367]:
df = pd.read_csv("marks.csv")
marks_df = pd.read_csv("sem.csv")
final_df = pd.read_csv("final.csv")

In [368]:
df = df.drop(['PRN No.'], axis=1)
df = df.fillna(0)
df = df.replace("RR", 0)
df.drop(df.columns[df.columns.str.contains('Grace')], axis=1, inplace=True)
df.drop(df.columns[df.columns.str.contains('Condol')], axis=1, inplace=True)
df.drop(df.columns[df.columns.str.contains('Cat1')], axis=1, inplace=True)
df.drop(df.columns[df.columns.str.contains('Cat2')], axis=1, inplace=True)
df.drop(df.columns[df.columns.str.contains('Fat')], axis=1, inplace=True)
df.drop(df.columns[df.columns.str.contains('FAT')], axis=1, inplace=True)
df.drop(df.columns[df.columns.str.contains('Lab')], axis=1, inplace=True)
df = df.applymap(lambda x: int(x) if isinstance(x, str) and x.isdigit() else x)
final_df = final_df.iloc[:-3]
df = df.loc[(df.applymap(type) != str).all(axis=1)]

In [369]:
#scaling the data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
df = scaler.fit_transform(df)
marks_df = scaler.fit_transform(marks_df)

In [370]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
# train test split
X = df
y = final_df[' Grade ']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)


In [371]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import f1_score

### Random Forest

In [372]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=300, random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

#evaluation
rf_acc = accuracy_score(y_test, y_pred)
rf_mcc = matthews_corrcoef(y_test, y_pred)
rf_f1 = f1_score(y_test, y_pred, average='weighted')
print('- Accuracy: %s' % rf_acc)
print('- MCC: %s' % rf_mcc)
print('- F1 score: %s' % rf_f1)

- Accuracy: 0.7052631578947368
- MCC: 0.3710946939115025
- F1 score: 0.6965789473684212


### mlp

In [381]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(hidden_layer_sizes=(200, 50), activation='relu', solver='adam', max_iter=2500)
mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)

#evaluation
mlp_acc = accuracy_score(y_test, y_pred)
mlp_mcc = matthews_corrcoef(y_test, y_pred)
mlp_f1 = f1_score(y_test, y_pred, average='weighted')
print('- Accuracy: %s' % mlp_acc)
print('- MCC: %s' % mlp_mcc)
print('- F1 score: %s' % mlp_f1)

- Accuracy: 0.631578947368421
- MCC: 0.1926478344075154
- F1 score: 0.6184137002726029


### knn

In [352]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=14)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

#evaluation
knn_acc = accuracy_score(y_test, y_pred)
knn_mcc = matthews_corrcoef(y_test, y_pred)
knn_f1 = f1_score(y_test, y_pred, average='weighted')
print('- Accuracy: %s' % knn_acc)
print('- MCC: %s' % knn_mcc)
print('- F1 score: %s' % knn_f1)

- Accuracy: 0.7473684210526316
- MCC: 0.39181039716376265
- F1 score: 0.7096922281132807


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


### xgboost

In [382]:
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder

# train the XGBoost model
xgb = XGBClassifier()
xgb.fit(X_train, y_train)

# make predictions on the testing set
y_pred = xgb.predict(X_test)

# calculate evaluation metrics
xgb_acc = accuracy_score(y_test, y_pred)
xgb_mcc = matthews_corrcoef(y_test, y_pred)
xgb_f1 = f1_score(y_test, y_pred, average='weighted')
print('- Accuracy:', xgb_acc)
print('- MCC:', xgb_mcc)
print('- F1 Score:', xgb_f1)


- Accuracy: 0.6842105263157895
- MCC: 0.35602038805778663
- F1 Score: 0.6806657669815565


### Hybrid Model

In [387]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
# train test split
X = df
y = final_df[' Grade ']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=42)

In [389]:
#stacking
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression

models = [xgb,knn,rf,mlp]

estimator_list = [
    ('xgb',xgb),
    ('knn',knn),
    ('rf',rf),
    ('mlp',mlp) ]

stack_model = StackingClassifier(estimators=estimator_list, final_estimator=None)

stack_model.fit(X_train, y_train)
y_pred = stack_model.predict(X_train)

stack_acc = accuracy_score(y_train, y_pred) 
stack_mcc = matthews_corrcoef(y_train, y_pred) 
stack_f1 = f1_score(y_train, y_pred, average='weighted') 

print('- Accuracy: %s' % stack_acc)
print('- MCC: %s' % stack_mcc)
print('- F1 score: %s' % stack_f1)
     



- Accuracy: 0.9090909090909091
- MCC: 0.8152338451604736
- F1 score: 0.8973218733294092


### comparitives

In [390]:
acc_train_list = {'knn':knn_acc,
'xgb': xgb_acc,
'rf': rf_acc,
'mlp': mlp_acc,
'stack': stack_acc}

mcc_train_list = {'knn':knn_mcc,
'xgb': xgb_mcc,
'rf': rf_mcc,
'mlp': mlp_mcc,
'stack': stack_mcc}

f1_train_list = {'knn':knn_f1,
'xgb': xgb_f1,
'rf': rf_f1,
'mlp': mlp_f1,
'stack': stack_f1}
     

In [391]:
acc_df = pd.DataFrame.from_dict(acc_train_list, orient='index', columns=['Accuracy'])
mcc_df = pd.DataFrame.from_dict(mcc_train_list, orient='index', columns=['MCC'])
f1_df = pd.DataFrame.from_dict(f1_train_list, orient='index', columns=['F1'])
res = pd.concat([acc_df, mcc_df, f1_df], axis=1)
res

Unnamed: 0,Accuracy,MCC,F1
knn,0.747368,0.39181,0.709692
xgb,0.684211,0.35602,0.680666
rf,0.705263,0.371095,0.696579
mlp,0.631579,0.192648,0.618414
stack,0.909091,0.815234,0.897322


### predicting for new tuples

In [392]:
X = pd.read_csv("X_new.csv")
X

Unnamed: 0,DSA(CSE2001) CAT1 (50),DSA(CSE2001) CAT2 (50),DSA(CSE2001)Grade,DMS(MAT1003) CAT1 (50),DMS(MAT1003)CAT2 (50),FEEE(ECE1002)CAT1 (50),FEEE(ECE1002) CAT2 (50),FEEE(ECE1002) Total (200),FEEE(ECE1002)Grade,ED(MEC1004) CAT1 (50),...,ITC(CSE2013) CAT2 (50),ITC(CSE2013) Total (200),ITC(CSE2013) Grade,COA(ECE2002) Grade,SNA(CSE4008) CAT1 (50),SNA(CSE4008) CAT2 (50),SNA(CSE4008) Total (160),SNA(CSE4008) Grade,FOM(MGT1003) CAT1 (50),FOM(MGT1003) CAT2 (50)
0,33,30,4,44,29,49,30,151,6,40,...,29,170,6,6,70,29,123,6,49,29
1,33,21,3,45,27,35,24,117,4,40,...,27,156,6,6,70,26,119,6,70,27
2,34,20,4,41,27,37,16,100,3,39,...,28,168,6,6,70,28,121,6,42,28
3,42,23,4,36,18,0,23,0,1,44,...,24,156,6,6,70,24,116,6,70,24
4,31,29,4,32,20,12,19,84,1,30,...,28,163,6,6,70,28,120,6,46,29


In [393]:
X_new = scaler.fit_transform(X)
y_new_pred = stack_model.predict(X_new)
y_new_pred = pd.DataFrame(y_new_pred, columns=['encoded'])
mapping = {1:'F', 2:'D', 3:'C', 4:'B', 5:'A', 6:'S'}
y_new_pred['label_decoded'] = y_new_pred['encoded'].map(mapping)
y_new_pred

Unnamed: 0,encoded,label_decoded
0,5,A
1,5,A
2,4,B
3,4,B
4,4,B


In [394]:
y_new_pred = knn.predict(X_new)
y_new_pred = pd.DataFrame(y_new_pred, columns=['encoded'])
mapping = {1:'F', 2:'D', 3:'C', 4:'B', 5:'A', 6:'S'}
y_new_pred['label_decoded'] = y_new_pred['encoded'].map(mapping)
y_new_pred

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


Unnamed: 0,encoded,label_decoded
0,5,A
1,5,A
2,4,B
3,4,B
4,4,B


In [395]:
y_new_pred = xgb.predict(X_new)
y_new_pred = pd.DataFrame(y_new_pred, columns=['encoded'])
mapping = {1:'F', 2:'D', 3:'C', 4:'B', 5:'A', 6:'S'}
y_new_pred['label_decoded'] = y_new_pred['encoded'].map(mapping)
y_new_pred

Unnamed: 0,encoded,label_decoded
0,6,S
1,5,A
2,4,B
3,4,B
4,4,B


In [396]:
y_new_pred = rf.predict(X_new)
y_new_pred = pd.DataFrame(y_new_pred, columns=['encoded'])
mapping = {1:'F', 2:'D', 3:'C', 4:'B', 5:'A', 6:'S'}
y_new_pred['label_decoded'] = y_new_pred['encoded'].map(mapping)
y_new_pred

Unnamed: 0,encoded,label_decoded
0,5,A
1,5,A
2,4,B
3,4,B
4,4,B


In [397]:
y_new_pred = mlp.predict(X_new)
y_new_pred = pd.DataFrame(y_new_pred, columns=['encoded'])
mapping = {1:'F', 2:'D', 3:'C', 4:'B', 5:'A', 6:'S'}
y_new_pred['label_decoded'] = y_new_pred['encoded'].map(mapping)
y_new_pred

Unnamed: 0,encoded,label_decoded
0,6,S
1,6,S
2,4,B
3,4,B
4,5,A
