In [1]:
import torch
import numpy as np
import pandas as pd
import time
import os

from sklearn.model_selection import train_test_split

seed = 42
np.random.seed(seed)

dataset = pd.read_csv('xAPI-Edu-Data.csv')

imp_features = dataset.drop(['gender', 'NationalITy', 'Semester', 'PlaceofBirth', 'GradeID', 'Topic', 'SectionID', 'Relation'], axis=1)

stage_mapping = {"lowerlevel":0, "MiddleSchool":1, "HighSchool":2}
survey_mapping = {"No":0, "Yes":1}
satisfaction_mapping = {"Bad":0, "Good":1}
absence_mapping = {"Under-7":0, "Above-7":1}
class_mapping = {"L":0, "M":1, "H":2}

numeric_features = imp_features
numeric_features['StageID'] = imp_features['StageID'].map(stage_mapping)
numeric_features['ParentAnsweringSurvey'] = imp_features['ParentAnsweringSurvey'].map(survey_mapping)
numeric_features['ParentschoolSatisfaction'] = imp_features['ParentschoolSatisfaction'].map(satisfaction_mapping)
numeric_features['StudentAbsenceDays'] = imp_features['StudentAbsenceDays'].map(absence_mapping)
numeric_features['Class'] = imp_features['Class'].map(class_mapping)

data_np_array = numeric_features.values

X = data_np_array[:,:-1]
Y = data_np_array[:,-1]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=seed, stratify=Y)

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import LinearSVC
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

  from numpy.core.umath_tests import inner1d


In [3]:
# Random Forest
random_forest = RandomForestClassifier(n_estimators=300)
random_forest.fit(X_train, Y_train)
acc_random_forest = random_forest.score(X_test, Y_test)*100
acc_random_forest

78.125

In [4]:
#MLP
clf = MLPClassifier(solver='adam', alpha=1e-5)
clf.fit(X_train, Y_train)
acc_mlp = clf.score(X_test, Y_test)*100
acc_mlp

70.83333333333334

In [5]:
#Gaussian Naive Bayes
gaussian = GaussianNB()
gaussian.fit(X_train, Y_train)
acc_gaussian = gaussian.score(X_test, Y_test)*100
acc_gaussian

71.875

In [6]:
# Logistic Regression
logreg = LogisticRegression()
logreg.fit(X_train, Y_train)
acc_log = logreg.score(X_test, Y_test)*100
acc_log

66.66666666666666

In [7]:
#SVC
svc = SVC()
svc.fit(X_train, Y_train)
acc_svc = svc.score(X_test, Y_test)*100
acc_svc

48.95833333333333

In [8]:
#KNN
knn = KNeighborsClassifier(n_neighbors = 3) # Here 3 neighbours provides the highest accuracy
knn.fit(X_train, Y_train)
acc_knn = knn.score(X_test, Y_test)*100
acc_knn

57.291666666666664

In [9]:
# Linear SVC
linear_svc = LinearSVC()
linear_svc.fit(X_train, Y_train)
acc_linear_svc = linear_svc.score(X_test, Y_test)*100
acc_linear_svc

62.5

In [10]:
# Decision Tree
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, Y_train)
acc_decision_tree = decision_tree.score(X_test, Y_test)*100
acc_decision_tree

70.83333333333334

In [11]:
#SGD
sgd = SGDClassifier(max_iter=1000, tol=1e-3)
sgd.fit(X_train, Y_train)
acc_sgd = sgd.score(X_test, Y_test)*100
acc_sgd

47.91666666666667

In [12]:
# Perceptron
perceptron = Perceptron(max_iter=1000, tol=1e-3)
perceptron.fit(X_train, Y_train)
acc_perceptron = perceptron.score(X_test, Y_test)*100
acc_perceptron

28.125