# <center>ALGORITHMES DE DETECTION DU WHEEL-SPINNING    *(EVIDENCE B)* : UN EXEMPLE D'INFERENCE SUR 5000 ELEVES ATTICUS A PARTIR DU MODELE PRE-ENTRAINE DU NOTEBOOK DE TRAINING<center/>

# Modules

In [1]:
import numpy as np
import pandas as pd
import onnxruntime as rt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences 

# Preprocessing des données à tester

In [2]:
def preprocessing_for_nn_inference(df):
    
    #preprocessing for timestep
    df["minute"] = pd.DatetimeIndex(df['ts']).minute
    df["heure"] = pd.DatetimeIndex(df['ts']).hour
    df["jour"] = pd.DatetimeIndex(df['ts']).day
    df["mois"] = pd.DatetimeIndex(df['ts']).month
    df["annee"] = pd.DatetimeIndex(df['ts']).year
    df = df.drop("homepage", axis = 1)

    #preprocessing for matrix design
    X = df[["heure", "duration", "score"]].values  
    liste_eleve = list(df["student_id"].unique())
    X_sequence = []
    for eleve in liste_eleve:
        d = df[(df["student_id"] == eleve)]
        liste_activite = list(d["activity_id"].unique())
        for activite in liste_activite:
            d2 = d[d["activity_id"] == activite]
            une_sequence_X = d2[["heure", "duration", "score"]].values
            X_sequence.append(une_sequence_X)
    X_seq = pad_sequences(X_sequence, maxlen=20, dtype="float32", padding="post", value = -10.)
    
    #normalization of testing dataset
    test_reshape = X_seq.reshape(-1, X_seq.shape[-1])
    colonnes = ["col" + str(i) for i in range(1, X_seq.shape[2] + 1)]
    test_df = pd.DataFrame(test_reshape, columns = colonnes)
    test_df_no_mask = test_df[test_df["col1"] != -10]
    
    #chargement onnx de la normalisation sur les données d'entraînement
    scaler = rt.InferenceSession("scaler.onnx")
    test_df_no_mask_norm = scaler.run(None, {'X': test_df_no_mask.values})[0]
    test_df.iloc[test_df_no_mask.index,] = test_df_no_mask_norm
    X_test = test_df.values.reshape(X_seq.shape)
    
    return X_test

# Acquisition des données à tester

In [3]:
#chargement des données
data = pd.read_csv("test_data_example.csv")

In [4]:
#chargement des données
X_test = preprocessing_for_nn_inference(data)

# Chargement du modèle pré-entraîné et inférence

In [5]:
#chargement du modèle préentrainé
model = load_model('ws_model.h5')
print(model.summary())

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
masking_5 (Masking)          (None, 20, 3)             0         
_________________________________________________________________
lstm_10 (LSTM)               (None, 20, 64)            17408     
_________________________________________________________________
lstm_11 (LSTM)               (None, 32)                12416     
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 33        
Total params: 29,857
Trainable params: 29,857
Non-trainable params: 0
_________________________________________________________________
None


# Prédiction

In [6]:
%%time
y_pred = model.predict(X_test)
prediction_class = np.where(y_pred > 0.5, 1, 0)

Wall time: 3.08 s


# Exportation des prédictions au format JSON

In [7]:
###### SCRIPT A CONTRÔLER DE PLUS PRÊT : vérifier le matching entre les prédictions obtenues et les ids de student, module, objectif, activité)
#------------------------------------------------------------------------------------------------------------------
data_expor = data.drop_duplicates(["student_id", "activity_id"])
data_expor["wheel_spinning_proba"] = y_pred
data_expor[["module_id", "student_id", "objective_id", "activity_id", "exercise_id", "wheel_spinning_proba"]].to_json("prediction.json")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_expor["wheel_spinning_proba"] = y_pred
