In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import SGD
# Function to create model, required for KerasClassifier
def create_model(learn_rate=0.01, momentum=0):
	# create model
	model = Sequential()
	model.add(Dense(16, input_dim=13, activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	# Compile model
	optimizer = SGD(lr=learn_rate, momentum=momentum)
	model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
	return model
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)
# load dataset
dataset = pd.read_csv("slim-xAPI-Edu-Data.csv")
# split into input (X) and output (Y) variables

X = dataset


#y = dataset[['Class']]
#students failed if they are in class L.
dataset['Failed'] = np.where(dataset['Class'] == 'L', True, False)
dataset['gender'] = np.where(dataset['gender']=='M',1,0)
dataset['Relation'] = np.where(dataset['Relation']=='Father',1,0)
dataset['ParentAnsweringSurvey'] = np.where(dataset['ParentAnsweringSurvey'] == 'Yes', 1, 0)
dataset['ParentschoolSatisfaction'] = np.where(dataset['ParentschoolSatisfaction'] == 'Yes', 1, 0)
dataset['AbsentMoreThanWeek'] = np.where(dataset['StudentAbsenceDays'] == 'Above-7', 1, 0)
dataset['Semester'] = np.where(dataset['Semester'] == 'F', 1, 0)

#SectionID: 1=A, 2=B, 3=C
#Topic: 1=Arabic, 2=Biology, 3=Chemistry, 4=English, 5=French, 6=Geology, 7=History, 8=IT, 9=Math, 10=Quran, 11=Science, 12=Spanish
#StageID: Lowerlevel = 1, MiddleSchool = 2, HighSchool = 3
X = dataset[['raisedhands', 'VisITedResources', 'SectionID', 'Topic', 'StageID', 'AnnouncementsView', 'Semester', 'Discussion', 'gender', 'Relation', 'ParentAnsweringSurvey', 'ParentschoolSatisfaction', 'AbsentMoreThanWeek']]

Y = dataset[['Failed']]




In [5]:

# create model
model = KerasClassifier(build_fn=create_model, verbose=0, batch_size=10, epochs=100)

# define the grid search parameters
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
param_grid = dict(learn_rate=learn_rate, momentum=momentum)

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(np.array(X), np.array(Y))
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.850000 using {'learn_rate': 0.001, 'momentum': 0.6}
0.820833 (0.012843) with: {'learn_rate': 0.001, 'momentum': 0.0}
0.837500 (0.033463) with: {'learn_rate': 0.001, 'momentum': 0.2}
0.847917 (0.020624) with: {'learn_rate': 0.001, 'momentum': 0.4}
0.850000 (0.028413) with: {'learn_rate': 0.001, 'momentum': 0.6}
0.845833 (0.015590) with: {'learn_rate': 0.001, 'momentum': 0.8}
0.812500 (0.044488) with: {'learn_rate': 0.001, 'momentum': 0.9}
0.756250 (0.058852) with: {'learn_rate': 0.01, 'momentum': 0.0}
0.608333 (0.258468) with: {'learn_rate': 0.01, 'momentum': 0.2}
0.783333 (0.079276) with: {'learn_rate': 0.01, 'momentum': 0.4}
0.814583 (0.036917) with: {'learn_rate': 0.01, 'momentum': 0.6}
0.770833 (0.074361) with: {'learn_rate': 0.01, 'momentum': 0.8}
0.775000 (0.018399) with: {'learn_rate': 0.01, 'momentum': 0.9}
0.735417 (0.044585) with: {'learn_rate': 0.1, 'momentum': 0.0}
0.568750 (0.229526) with: {'learn_rate': 0.1, 'momentum': 0.2}
0.735417 (0.044585) with: {'learn_rate':