# Classification model student profile
This program is use to predict the student profile based on his grades, interests and plenty of parameters

## Importation of librairies

In [None]:
import pandas as pd
import keras
from keras.utils import FeatureSpace
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
import numpy as np

## Load dataframe and get classes names

In [None]:
dataframe = pd.read_csv('data/dataset.csv')
DATASET_SIZE = dataframe.shape[0]
CLASS_NAMES = set(dataframe.specification.unique())

## Normalization of datas

In [None]:
def normalization(dataframe):
    dataframe = dataframe.drop('student_id', axis=1)
    dataframe = dataframe.fillna("")
    label_encoder = LabelEncoder()
    dataframe.specification = label_encoder.fit_transform(dataframe.specification)
    dataframe.specification = dataframe.specification.astype('category')
    return dataframe

dataframe = normalization(dataframe)

## Split between train (80%) and test (20%) and conversion to tensorflow Dataset

In [None]:
def dataframe_to_dataset(dataframe):
    catpop = dataframe.pop("specification")
    category = pd.get_dummies(catpop, prefix='Specification')
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), category))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds

test_dataframe = dataframe.sample(frac=0.2, random_state=1337)
train_dataframe = dataframe.drop(test_dataframe.index)

test_ds = dataframe_to_dataset(test_dataframe)
train_ds = dataframe_to_dataset(train_dataframe)

test_ds = test_ds.batch(32)
train_ds = train_ds.batch(32)


## Creation of the normalization parameters

In [None]:
fs = FeatureSpace(
    features={
        "iq":"float_normalized",
        "interest_outside_school": "string_categorical",
        "interest_outside_school2": "string_categorical",
        "interest_outside_school3": "string_categorical",
        "interest_outside_school4": "string_categorical",
        "favorite_subjects_it": "string_categorical",
        "favorite_subjects_it2": "string_categorical",
        "favorite_subjects_it3": "string_categorical",
        "favorite_subjects_it4": "string_categorical",
        "name": "string_categorical",
        "hobbies": "string_categorical",
        "hobbies2": "string_categorical",
        "hobbies3": "string_categorical",
        "hobbies4": "string_categorical",
        "hobbies5": "string_categorical",
        "hobbies6": "string_categorical",
        "hobbies7": "string_categorical",
        "hobbies8": "string_categorical",
        "hobbies9": "string_categorical",
        "junior_network_administrator": "float_normalized",
        "junior_web_programmer": "float_normalized",
        "junior_programmer": "float_normalized",
    },
    output_mode="concat",
)

def extract_feature(x, _):
    return x

train_no_labels = train_ds.map(extract_feature)

fs.adapt(train_no_labels)

## Feature the datas to the FeatureSpace specifications

In [None]:
preprocessed_train_ds = train_ds.map(
    lambda x, y: (fs(x), y), num_parallel_calls=tf.data.AUTOTUNE
)
preprocessed_train_ds = preprocessed_train_ds.prefetch(tf.data.AUTOTUNE)

preprocessed_test_ds = test_ds.map(
    lambda x, y: (fs(x), y), num_parallel_calls=tf.data.AUTOTUNE
)
preprocessed_test_ds = preprocessed_test_ds.prefetch(tf.data.AUTOTUNE)

## Creation of the model

In [None]:
dataset_encoded = fs.get_encoded_features()
raw = fs.get_inputs()

x = keras.layers.Dense(32, activation="relu")(dataset_encoded)
x = keras.layers.Dense(32, activation="relu")(x)
predictions = keras.layers.Dense(len(CLASS_NAMES), activation="softmax")(x)

training_model = keras.Model(inputs=dataset_encoded, outputs=predictions)
inference_model = keras.Model(inputs=raw, outputs=predictions)


training_model.compile(loss="categorical_crossentropy",
                   optimizer="adam", metrics=['accuracy'])

inference_model.compile(loss="categorical_crossentropy",
                   optimizer="adam", metrics=['accuracy'])

## Train the model

In [None]:
training_model.fit(
    preprocessed_train_ds, epochs=30, verbose=2
)

## Evalutation of the model

In [None]:
evaluation_results = training_model.evaluate(preprocessed_test_ds, verbose=0)
loss, accuracy = evaluation_results
print('Accuracy: %.2f%%' % (accuracy * 100))
print('Loss: {}'.format(loss))

## Save the model weights

In [None]:
inference_model.save('./model/settings')