In [92]:

'''!pip install mlxtend
!pip install onnxmltools
!pip install onnxruntime '''


'!pip install mlxtend\n!pip install onnxmltools\n!pip install onnxruntime '

In [3]:
# pip install onnxconverter-common

In [2]:
pip install skl2onnx

Collecting skl2onnx
  Obtaining dependency information for skl2onnx from https://files.pythonhosted.org/packages/26/80/836824c62ff0923b4c3b8af8332170bdc3ccb469a220535b40405a93b4fb/skl2onnx-1.16.0-py2.py3-none-any.whl.metadata
  Using cached skl2onnx-1.16.0-py2.py3-none-any.whl.metadata (3.2 kB)
Using cached skl2onnx-1.16.0-py2.py3-none-any.whl (298 kB)
Installing collected packages: skl2onnx
Successfully installed skl2onnx-1.16.0
Note: you may need to restart the kernel to use updated packages.


In [10]:

import numpy as np
import torch
import pandas as pd
import sklearn
import random

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


import matplotlib.pyplot as plt

from mlxtend.plotting import heatmap
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader

from sklearn.metrics import r2_score

import onnxruntime as rt
import onnxmltools

from skl2onnx.common.data_types import FloatTensorType

# PARAMETERS

batch_size    = 16
learning_rate = 0.005 ## 0.001
N_Epochs      = 100

epsilon = 0.0001


# Read the data

path_data = r'data.csv'

Student_Data = pd.read_csv( path_data, delimiter=";" )

headers_list = Student_Data.columns.values.tolist()


# Data Analysis

headers_list = Student_Data.columns.values.tolist()

Student_Data_np = Student_Data.to_numpy()


X = Student_Data_np[:, :-1]


y = Student_Data_np[:, 35:36]

random_seed = int( random.random() * 100 )     ## 42

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_seed)


## fix data type

X_train = X_train.astype(  np.float32  )
X_test  = X_test.astype(   np.float32  )
y_train = y_train.astype(  np.float32  )
y_test  = y_test.astype(   np.float32  )

X_train_tr = torch.from_numpy(X_train)
X_test_tr  = torch.from_numpy(X_test)
y_train_tr = torch.from_numpy(y_train)
y_test_tr  = torch.from_numpy(y_test)


x_means      = X_train_tr.mean(0, keepdim=True ) 

x_deviations = X_train_tr.std( 0, keepdim=True) + epsilon

train_ds = TensorDataset( X_train_tr, y_train_tr  )

train_dl = DataLoader( train_ds, batch_size, shuffle=True  )


## Linear Regression

class LinRegNet(nn.Module):
    ## init the class
    def __init__(self, x_means, x_deviations):
        super().__init__()
        
        self.x_means      = x_means
        self.x_deviations = x_deviations
        
        self.linear1 = nn.Linear(11, 1)
        
    ## perform inference
    def forward(self, x):
        
        x = (x - self.x_means) / self.x_deviations
        
        y_pred = self.linear1(x)
        ## return torch.round( y_pred )
        return y_pred
    
    
    


## MLP

class MLP_Net(nn.Module):
    ## init the class
    def __init__(self, x_means, x_deviations):
        super().__init__()
        
        self.x_means      = x_means
        self.x_deviations = x_deviations
        
        self.linear1 = nn.Linear(11, 8)
        self.act1    = nn.Sigmoid()
        self.linear2 = nn.Linear(8, 1)
        self.dropout = nn.Dropout(0.25)
        
    ## perform inference
    def forward(self, x):
        
        x = (x - self.x_means) / self.x_deviations
        
        x = self.linear1(x)
        x = self.act1(x)
        x = self.dropout(x)
        y_pred = self.linear2(x)
        
        ## return torch.round( y_pred )
        return y_pred
        

        

## Deep Learning with 2 hidden layers

class DL_Net(nn.Module):
    ## init the class
    def __init__(self, x_means, x_deviations):
        super().__init__()
        
        self.x_means      = x_means
        self.x_deviations = x_deviations
        
        self.linear1 = nn.Linear(36, 10)
        self.act1    = nn.ReLU()
        self.linear2 = nn.Linear(10, 6)
        self.act2    = nn.ReLU()
        self.linear3 = nn.Linear(6, 1)
        self.dropout = nn.Dropout(0.25)
        
    ## perform inference
    def forward(self, x):
        
        x = (x - self.x_means) / self.x_deviations
        
        x = self.linear1(x)
        x = self.act1(x)
        x = self.linear2(x)
        x = self.act2(x)
        ## x = self.dropout(x)
        y_pred = self.linear3(x)
        
        ## return torch.round( y_pred )
        return y_pred



## Linear plus Nonlinear
## f1 + f2

class LinearPlusNonLinear_Net(nn.Module):
    ## init the class
    def __init__(self, x_means, x_deviations):
        super().__init__()
        
        self.x_means      = x_means
        self.x_deviations = x_deviations
        
        ## F1
        self.f1_linear1 = nn.Linear(11, 1)
        
        ## F2
        self.f2_linear1 = nn.Linear(11, 14)
        self.f2_act1    = nn.Sigmoid()
        self.f2_linear2 = nn.Linear(14, 1)
        
    
    ## perform inference
    def forward(self, x):
        
        x = (x - self.x_means) / self.x_deviations
        
        ## F1
        f1 = self.f1_linear(x)
        
        ## F2
        f2 = self.f2_linear1(x)
        f2 = self.f2_act1(f2)
        f2 = self.f2_linear2(f2)
        
        y_pred = f1 + f2
        
        ## return torch.round( y_pred )
        return y_pred



def training_loop( N_Epochs, model, loss_fn, opt  ):
    
    for epoch in range(N_Epochs):
        for xb, yb in train_dl:
            
            y_pred = model(xb)
            loss   = loss_fn(y_pred, yb)
            
            opt.zero_grad()
            loss.backward()
            opt.step()
            
        if epoch % 20 == 0:
            print(epoch, "loss=", loss)


## model = LinRegNet( x_means, x_deviations  )
model = DL_Net( x_means, x_deviations  )

opt     = torch.optim.Adam(    model.parameters(), lr=learning_rate )
loss_fn = F.mse_loss

training_loop(  N_Epochs, model, loss_fn, opt  )



# Evaluate the Model

y_pred_test = model( X_test_tr )



0 loss= tensor(0.0916, grad_fn=<MseLossBackward0>)
20 loss= tensor(0.0006, grad_fn=<MseLossBackward0>)
40 loss= tensor(0.0024, grad_fn=<MseLossBackward0>)
60 loss= tensor(0.0005, grad_fn=<MseLossBackward0>)
80 loss= tensor(0.0002, grad_fn=<MseLossBackward0>)


In [11]:
list_preds = []
list_reals = []

for i in range(len(X_test_tr)):
    print("************************************")
    print("pred, real")
    np_real =   y_test_tr[i].detach().numpy()
    np_pred = y_pred_test[i].detach().numpy()
    print(( np_pred  , np_real))
    list_preds.append(np_pred[0])
    list_reals.append(np_real[0])

************************************
pred, real
(array([1.7297809], dtype=float32), array([1.74], dtype=float32))
************************************
pred, real
(array([-0.9075897], dtype=float32), array([-0.92], dtype=float32))
************************************
pred, real
(array([1.7480125], dtype=float32), array([1.74], dtype=float32))
************************************
pred, real
(array([-4.0211234], dtype=float32), array([-4.06], dtype=float32))
************************************
pred, real
(array([1.7707403], dtype=float32), array([1.79], dtype=float32))
************************************
pred, real
(array([-3.1130538], dtype=float32), array([-3.12], dtype=float32))
************************************
pred, real
(array([1.7458451], dtype=float32), array([1.74], dtype=float32))
************************************
pred, real
(array([0.30407813], dtype=float32), array([0.32], dtype=float32))
************************************
pred, real
(array([-4.03842], dtype=float32)

In [18]:

model.eval()

dummy_input = torch.randn(1, 36)

input_names  = ["input1"]
output_names = ["output1"]

torch.onnx.export(
        model, 
        dummy_input,
        "DLnet_StudentData.onnx",
        verbose=False,
        input_names = input_names,
        output_names = output_names
        
)


In [13]:

import xgboost as xgb

regressor = xgb.XGBRegressor(

        n_estimators=100,
        reg_lambda=1,
        gamma=0,
        max_depth=3
)


regressor.fit(X_train, y_train)

y_pred = regressor.predict(X_test)


In [14]:

initial_types = [(
          'float_input',
          FloatTensorType(  [None, 36 ]  )

)]

onnx_model = onnxmltools.convert_xgboost(regressor, initial_types=initial_types)

onnxmltools.utils.save_model(onnx_model, 'Student_Dropout.onnx')


In [15]:
sess = rt.InferenceSession('Student_Dropout.onnx')

input_name = sess.get_inputs()[0].name

label_name = sess.get_outputs()[0].name

pred_ort = sess.run(  [label_name], {input_name: X_test.astype(np.float32)}  )


In [16]:
Student_Data

Unnamed: 0,Marital status,Application mode,Application order,Course,Daytime/evening attendance\t,Previous qualification,Previous qualification (grade),Nacionality,Mother's qualification,Father's qualification,...,Curricular units 2nd sem (credited),Curricular units 2nd sem (enrolled),Curricular units 2nd sem (evaluations),Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),Curricular units 2nd sem (without evaluations),Unemployment rate,Inflation rate,GDP,Target
0,1,17,5,171,1,1,122.0,1,19,12,...,0,0,0,0,0.000000,0,10.8,1.4,1.74,Dropout
1,1,15,1,9254,1,1,160.0,1,1,3,...,0,6,6,6,13.666667,0,13.9,-0.3,0.79,Graduate
2,1,1,5,9070,1,1,122.0,1,37,37,...,0,6,0,0,0.000000,0,10.8,1.4,1.74,Dropout
3,1,17,2,9773,1,1,122.0,1,38,37,...,0,6,10,5,12.400000,0,9.4,-0.8,-3.12,Graduate
4,2,39,1,8014,0,1,100.0,1,37,38,...,0,6,6,6,13.000000,0,13.9,-0.3,0.79,Graduate
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4419,1,1,6,9773,1,1,125.0,1,1,1,...,0,6,8,5,12.666667,0,15.5,2.8,-4.06,Graduate
4420,1,1,2,9773,1,1,120.0,105,1,1,...,0,6,6,2,11.000000,0,11.1,0.6,2.02,Dropout
4421,1,1,1,9500,1,1,154.0,1,37,37,...,0,8,9,1,13.500000,0,13.9,-0.3,0.79,Dropout
4422,1,1,1,9147,1,1,180.0,1,37,37,...,0,5,6,5,12.000000,0,9.4,-0.8,-3.12,Graduate


In [17]:

headers_list = Student_Data.columns.values.tolist()

headers_list

['Marital status',
 'Application mode',
 'Application order',
 'Course',
 'Daytime/evening attendance\t',
 'Previous qualification',
 'Previous qualification (grade)',
 'Nacionality',
 "Mother's qualification",
 "Father's qualification",
 "Mother's occupation",
 "Father's occupation",
 'Admission grade',
 'Displaced',
 'Educational special needs',
 'Debtor',
 'Tuition fees up to date',
 'Gender',
 'Scholarship holder',
 'Age at enrollment',
 'International',
 'Curricular units 1st sem (credited)',
 'Curricular units 1st sem (enrolled)',
 'Curricular units 1st sem (evaluations)',
 'Curricular units 1st sem (approved)',
 'Curricular units 1st sem (grade)',
 'Curricular units 1st sem (without evaluations)',
 'Curricular units 2nd sem (credited)',
 'Curricular units 2nd sem (enrolled)',
 'Curricular units 2nd sem (evaluations)',
 'Curricular units 2nd sem (approved)',
 'Curricular units 2nd sem (grade)',
 'Curricular units 2nd sem (without evaluations)',
 'Unemployment rate',
 'Inflation r

array([[1, 17, 5, ..., 1.4, 1.74, 'Dropout'],
       [1, 15, 1, ..., -0.3, 0.79, 'Graduate'],
       [1, 1, 5, ..., 1.4, 1.74, 'Dropout'],
       ...,
       [1, 1, 1, ..., -0.3, 0.79, 'Dropout'],
       [1, 1, 1, ..., -0.8, -3.12, 'Graduate'],
       [1, 10, 1, ..., 3.7, -1.7, 'Graduate']], dtype=object)

(4424, 37)

array([[1.74],
       [0.79],
       [1.74],
       ...,
       [0.79],
       [-3.12],
       [-1.7]], dtype=object)

(4424, 36)
(4424, 1)


(3539, 36)
(885, 36)
(3539, 1)
(885, 1)


dtype('O')

tensor([[ 1.1763e+00,  1.8610e+01,  1.7327e+00,  8.8605e+03,  8.9206e-01,
          4.6369e+00,  1.3260e+02,  1.8519e+00,  1.9566e+01,  2.2390e+01,
          1.0880e+01,  1.0954e+01,  1.2697e+02,  5.4987e-01,  1.2715e-02,
          1.1077e-01,  8.8471e-01,  3.5010e-01,  2.5600e-01,  2.3234e+01,
          2.2888e-02,  6.9087e-01,  6.2484e+00,  8.2766e+00,  4.6847e+00,
          1.0629e+01,  1.2998e-01,  5.3009e-01,  6.2122e+00,  8.0206e+00,
          4.4295e+00,  1.0246e+01,  1.5202e-01,  1.1594e+01,  1.2267e+00,
         -1.4524e-03]])

tensor([[5.9828e-01, 1.7437e+01, 1.3150e+00, 2.0605e+03, 3.1045e-01, 1.0298e+01,
         1.3296e+01, 6.9632e+00, 1.5593e+01, 1.5318e+01, 2.5961e+01, 2.4980e+01,
         1.4618e+01, 4.9768e-01, 1.1216e-01, 3.1399e-01, 3.1951e-01, 4.7717e-01,
         4.3659e-01, 7.6358e+00, 1.4967e-01, 2.2957e+00, 2.4406e+00, 4.1473e+00,
         3.0555e+00, 4.8412e+00, 6.5603e-01, 1.8761e+00, 2.1719e+00, 3.9507e+00,
         2.9896e+00, 5.2112e+00, 7.5873e-01, 2.6612e+00, 1.3874e+00, 2.2627e+00]])

<torch.utils.data.dataloader.DataLoader at 0x22ffabca6d0>

0 loss= tensor(0.0704, grad_fn=<MseLossBackward0>)
20 loss= tensor(0.0026, grad_fn=<MseLossBackward0>)
40 loss= tensor(0.0015, grad_fn=<MseLossBackward0>)
60 loss= tensor(0.0018, grad_fn=<MseLossBackward0>)
80 loss= tensor(0.0060, grad_fn=<MseLossBackward0>)
