In [None]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras

import matplotlib.pyplot as plt
plt.style.use('ggplot')

In [None]:
df = pd.read_csv('../input/students-performance-in-exams/StudentsPerformance.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df['performance'] = (df['math score'] + df['reading score'] + df['writing score']) // 3
df.head()

In [None]:
df['gender'].value_counts()

In [None]:
codes, uniques = pd.factorize(df['gender'], sort=True)
print(codes[:10], uniques)

In [None]:
df['gender'] = codes
df.head()

In [None]:
codes, uniques = pd.factorize(df['race/ethnicity'], sort=True)
print(codes[:10], uniques)

In [None]:
df['race/ethnicity'] = codes
df.head()

In [None]:
df['parental level of education'].value_counts()

In [None]:
df = df[df['parental level of education'] != "master's degree"]

In [None]:
df['parental level of education'].value_counts()

In [None]:
codes, uniques = pd.factorize(df['parental level of education'])
print(codes[: 10], uniques)

In [None]:
df['parental level of education'] = codes
df.head()

In [None]:
df.head()

In [None]:
df = df.drop(['lunch', 'math score', 'reading score', 'writing score'],axis=1)
df.head()

In [None]:
df['test preparation course'].value_counts()

In [None]:
codes, uniques = pd.factorize(df['test preparation course'])
print(codes[:10], uniques)
df['test preparation course'] = codes

In [None]:
df.head()

In [None]:
df = df.sample(frac=1, random_state=8).reset_index(drop=True)

In [None]:
df.head()

In [None]:
df.shape

In [None]:
labels = df['performance']
data = df.drop('performance', axis=1)

In [None]:
train_df = data[: 705]
train_labels = labels[: 705]

valid_df = data[: 845]
valid_labels = labels[: 845]

test_df = data[845: ]
test_labels = labels[845: ]

In [None]:
train_df.head()

In [None]:
# model = keras.models.Sequential([
#     keras.layers.Dense(64, activation="relu", input_shape=train_df.shape[1:]),
#     keras.layers.Dense(1)
# ])

model = keras.models.Sequential([
    keras.layers.Dense(80, activation="relu", input_shape=train_df.shape[1:]),
    keras.layers.Dense(1)
])

In [None]:
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])

In [None]:
my_cb = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

history = model.fit(train_df, train_labels, epochs=500, validation_data=(valid_df, valid_labels), callbacks=[my_cb])

In [None]:
plt.plot(history.history['mae'])
plt.plot(history.history['val_mae'])

plt.xlabel('Epochs')
plt.ylabel('MAE')

plt.legend(['train', 'val'])

plt.tight_layout()

In [None]:
res = model.evaluate(test_df, test_labels)

In [None]:
predictions = model.predict(test_df[: 20])

In [None]:
preds = np.ndarray.flatten(predictions)
preds = np.rint(preds)

In [None]:
x = np.arange(0, 20)

plt.scatter(x, preds)
plt.scatter(x, test_labels[: 20])

plt.legend(['predictions', 'actual'])

plt.ylabel('Performance')

plt.tight_layout()