## Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

## Importing dataset

In [2]:
dataset = pd.read_csv('../data/data-6-model.csv')
X = dataset.iloc[:, :-2].values
y = dataset.iloc[:, -2].values
print(X[0])
print(y[0])
dataset.head()

[6 'Mathematics' 'Time' 'Games' 49]
Kinesthetic


Unnamed: 0,grade,subject,lesson,class_interests,lesson_average_mark,teaching_aid_category,teaching_aid
0,6,Mathematics,Time,Games,49,Kinesthetic,Time Difference Board Game
1,6,Mathematics,Time,Videos,69,Audio Visual,Time Theory Exposition
2,6,Mathematics,Circles,Science,68,Visual,DIY Compass
3,6,Mathematics,Time,Videos,60,Audio Visual,Time Concepts Animation
4,6,Mathematics,Subtraction,Art,63,Visual,Posters


## Encode Independant variables

In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(sparse_output = False), [1, 2, 3])], remainder='passthrough')
X = ct.fit_transform(X)
print(X[0])

[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 6 49]


## Encode dependant variables

In [4]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)
print(y[0])

3


## Splitting dataset into Test set and Train set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

## Feature scaling

In [6]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train[:, 17:] = sc.fit_transform(X_train[:, 17:])
X_test[:, 17:] = sc.transform(X_test[:, 17:])
print(X_train[0])

[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 6
 -0.8109449345880856]


## Train Decision tree model on the Train set

In [7]:
from sklearn.tree import DecisionTreeClassifier
decision_tree_classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 1)
decision_tree_classifier.fit(X_train, y_train)

## Train SVM model on the Train set

In [8]:
from sklearn.svm import SVC
svm_classifier = SVC(kernel = 'linear', random_state = 0)
svm_classifier.fit(X_train, y_train)

## Train KNN model on the Train set

In [9]:
from sklearn.neighbors import KNeighborsClassifier
knn_classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
knn_classifier.fit(X_train, y_train)

## Train XGBoost model on the Train set

In [10]:
from xgboost import XGBClassifier
xgb_classifier = XGBClassifier()
xgb_classifier.fit(X_train, y_train)

## Train Random Forest model on the Train set

In [11]:
from sklearn.ensemble import RandomForestClassifier
random_forest_classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 1)
random_forest_classifier.fit(X_train, y_train)

## Predict the Test set results

In [12]:
y_pred_decision_tree = decision_tree_classifier.predict(X_test)
y_pred_svm = svm_classifier.predict(X_test)
y_pred_xgb = xgb_classifier.predict(X_test)
y_pred_random_forest = random_forest_classifier.predict(X_test)
y_pred_knn = knn_classifier.predict(X_test)

## Accuracy score

In [13]:
from sklearn.metrics import accuracy_score
print(f"Decision tree : {accuracy_score(y_test, y_pred_decision_tree)}")
print(f"SVM : {accuracy_score(y_test, y_pred_svm)}")
print(f"XGBoost : {accuracy_score(y_test, y_pred_xgb)}")
print(f"Random Forest : {accuracy_score(y_test, y_pred_random_forest)}")
print(f"KNN : {accuracy_score(y_test, y_pred_knn)}")

Decision tree : 0.9444444444444444
SVM : 0.8492063492063492
XGBoost : 0.9682539682539683
Random Forest : 0.9603174603174603
KNN : 0.9444444444444444


## Applying k-Fold Cross Validation

In [14]:
from sklearn.model_selection import cross_val_score
decision_tree_accuracies = cross_val_score(estimator = decision_tree_classifier, X = X_train, y = y_train, cv = 10)
svm_accuracies = cross_val_score(estimator = svm_classifier, X = X_train, y = y_train, cv = 10)
random_forest_accuracies = cross_val_score(estimator = random_forest_classifier, X = X_train, y = y_train, cv = 10)
knn_accuracies = cross_val_score(estimator = knn_classifier, X = X_train, y = y_train, cv = 10)
xgb_accuracies = cross_val_score(estimator = xgb_classifier, X = X_train, y = y_train, cv = 10)
print("Decision tree accuracy : {:.2f}".format(decision_tree_accuracies.mean()*100))
print("Decision tree standard Deviation : {:.2f}".format(decision_tree_accuracies.std()*100))
print("====================================================================================")
print("SVM accuracy : {:.2f}".format(svm_accuracies.mean()*100))
print("SVM standard Deviation : {:.2f}".format(svm_accuracies.std()*100))
print("====================================================================================")
print("Random Forest accuracy : {:.2f}".format(random_forest_accuracies.mean()*100))
print("Random Forest standard Deviation : {:.2f}".format(random_forest_accuracies.std()*100))
print("====================================================================================")
print("KNN accuracy : {:.2f}".format(knn_accuracies.mean()*100))
print("KNN standard Deviation : {:.2f}".format(knn_accuracies.std()*100))
print("====================================================================================")
print("XGBoost accuracy : {:.2f}".format(xgb_accuracies.mean()*100))
print("XGBoost standard Deviation : {:.2f}".format(xgb_accuracies.std()*100))

Decision tree accuracy : 92.01
Decision tree standard Deviation : 3.69
SVM accuracy : 74.85
SVM standard Deviation : 7.06
Random Forest accuracy : 93.01
Random Forest standard Deviation : 4.03
KNN accuracy : 90.82
KNN standard Deviation : 2.71
XGBoost accuracy : 93.61
XGBoost standard Deviation : 2.95
