In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re
from datetime import datetime

from sklearn.pipeline import Pipeline

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, RobustScaler
from sklearn.compose import ColumnTransformer

from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import log_loss

C:\Users\Rudolf\Anaconda3\lib\site-packages\numpy\.libs\libopenblas.NOIJJG62EMASZI6NYURL6JBKM4EVBGM7.gfortran-win_amd64.dll
C:\Users\Rudolf\Anaconda3\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll
  stacklevel=1)
In C:\Users\Rudolf\Anaconda3\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle: 
The text.latex.preview rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In C:\Users\Rudolf\Anaconda3\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle: 
The mathtext.fallback_to_cm rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In C:\Users\Rudolf\Anaconda3\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle: Support for setting the 'mathtext.fallback_to_cm' rcParam is deprecated since 3.3 and will be removed two minor releases later; use 'mathtext.fallback : 'cm' instead.
In C:\Users\Rudolf\Anaconda3\lib\site-pack

## Data Exploration

#### Load data and get principal information about it

In [2]:
train_df = pd.read_csv('train_ml.csv', index_col=0)
test_df = pd.read_csv('test_ml.csv', index_col=0)

FileNotFoundError: [Errno 2] No such file or directory: '../input/2el1730-machine-learning/train_ml.csv'

In [None]:
train_df.head(2)

In [None]:
train_df.describe()

In [None]:
train_df.info()

#### Exploring links between features and labels

In [None]:
train_df.groupby(['tld','org'])[['updates','personal','promotions','forums','purchases','travel','spam','social']].mean()

#### Exploring features alone

In [None]:
# Splitting the data into real features and predictions
def split_X_y(Xa):
    return Xa.drop(['updates','personal','promotions','forums','purchases','travel',
                    'spam','social'],axis=1), Xa[['updates','personal','promotions',
                                                  'forums','purchases','travel','spam','social']]

In [None]:
X, y = split_X_y(train_df.copy())
print("Shape of design matrix of training set:", X.shape)
print("Shape of labels matrix of training set:",y.shape)

In [None]:
columns = list(X.columns)
categorical_features = X.select_dtypes(include=['object','category']).columns.tolist()
numerical_features = X.select_dtypes(exclude=['object','category']).columns.tolist()

print('Columns:')
print(columns)
print()
print('Categorical features:')
print(categorical_features)
print()
print('Numerical features:')
print(numerical_features)

In [None]:
# Display number of missing values for each column
for col in columns:
    print(col,'----------------',X[col].isnull().sum())

In [None]:
# Display value counts for each categorical feature
for col in categorical_features:
    print("Column", col)
    print(X[col].value_counts())
    print('*****************************')

## Preprocessing, feature engineering

In [None]:
X.columns

#### Dates

In [None]:
# Retrieve day, month, time, year, number of day from the feature "date"
days=['Mon','Tue','Wed','Thu','Fri','Sat','Sun']
months=['Jan','Feb','Mar','Apr','May','Jun','Jul','Sep','Oct','Nov','Dec']

def day_parse(L):
    for day in days:
        if day in L:
            return day

def month_parse(L):
    for month in months:
        if month in L:
            return month

def time_parse(L):
    M="".join(L.split(" "))
    plus=0;minus=0
    for i in range(len(M)):
        if M[i]==':':
            a=i
        if M[i]=='+' and (M[i-1] in ['0','1','2','3','4','5','6','7','8','9']) and (M[i+1] in ['0','1','2','3','4','5','6','7','8','9']):
            plus=i
        if M[i]=='-' and (M[i-1] in ['0','1','2','3','4','5','6','7','8','9']) and (M[i+1] in ['0','1','2','3','4','5','6','7','8','9']):
            minus=i
            
    time1=int(M[a-2:a]+M[a+1:a+3])
    
    
    if plus!=0:
        time2=int(M[plus+1:plus+5])
        time=(time1+time2)%2400
    elif minus!=0:
        time2=int(M[minus+1:minus+5])
        time=(time1-time2)%2400
    elif plus==0 and minus==0:
        time=time1
        
    return time

def year_parse(L):
    M=" ".join(L.split(" "))
    try :
        return int(re.findall('[0-9][0-9][0-9][0-9]',M)[0])
    except:
        return np.nan
    
def number_date_parse(L):
    M=" ".join(L.split(" "))
    try :
        return int(re.findall('[0-9][0-9]',M)[0])
    except:
        return np.nan

In [None]:
def date_process(data1):
    data=data1.copy()
    data['day']=data['date'].apply(day_parse)
    data['month']=data['date'].apply(month_parse)
    data['time']=data['date'].apply(time_parse)
    data['year']=data['date'].apply(year_parse)
    data['number_date']=data['date'].apply(number_date_parse)
    return data

In [None]:
X = date_process(X)

In [None]:
# Dummies creation for months and days
def create_dummies(data):
    return pd.get_dummies(data, columns=['month','day'], prefix='', prefix_sep='')

In [None]:
X = create_dummies(X)

In [None]:
X.columns

#### Ratios

In [None]:
average_url_size=1
def ratio_features(data):
    data['urls_ratio']=data['urls']*average_url_size/data['chars_in_body']
    data['images_ratio']=data['images']/data['chars_in_body']
    data['chars_in_subject_ratio']=data['chars_in_subject']/data['chars_in_body']
    data['salu_design_']=data['salutations']*data['designation']

In [None]:
ratio_features(X)

In [None]:
print(X.columns)
X.head(2)

#### Put all the strings in lower case

In [None]:
# Put all the strings in lower case
def lower_case(data):
    for col in data.select_dtypes(include=['object','category']).columns.tolist():
        data[col]=data[col].str.lower()

In [None]:
lower_case(X)

#### Mail types

In [None]:
# Splitting mail_type columns (e.g. "multipart/alternative" -> "multipart" + "alternative")
mail_type = train_df['mail_type'].unique().tolist()

M = []
for l in mail_type:
    a = "".join(str(l).split(" ")).lower()
    M += (a.split('/'))
keys = set(M)

def mail_type_transform(data):
    m=data.shape[0]
    newColumns={}
    for key in keys:
        temp=[]
        for i in range(m):
            if key in list( "".join(str(data['mail_type'].iloc[i]).split(" ")).split('/')):
                temp.append(1)
            else:
                temp.append(0)
        newColumns[key]=temp
    for key in newColumns.keys():
        data[key]=newColumns[key]

In [None]:
mail_type_transform(X)

In [None]:
X.columns

#### Dropping useless features

In [None]:
def dropping(data, cols=['date','mail_type','org','tld','nan']):
    data.drop(cols, axis=1, inplace=True)

In [None]:
dropping(X,['date','mail_type','nan'])

In [None]:
print(X.columns)
X.head(2)

#### Correlations

In [None]:
new_df = X.join(y)

In [None]:
new_df.columns

In [None]:
plt.figure(figsize=(20,15))
sns.heatmap(new_df.corr())

#### Remove low variance features

In [None]:
from sklearn.feature_selection import VarianceThreshold

# Define thresholds to check
thresholds = np.arange(0.0, 1, 0.010)
# Apply transform with each threshold
results = list()

for t in thresholds:
    # Define the transform
    transform = VarianceThreshold(threshold=t)
    # Transform the input data
    X_sel = transform.fit_transform(X.select_dtypes(exclude=['object','category']))
    # Determine the number of input features
    n_features = X_sel.shape[1]
    print('>Threshold=%.2f, Features=%d' % (t, n_features))
    # Store the result
    results.append(n_features)
# Plot the threshold vs the number of selected features
plt.plot(thresholds, results)
plt.show()

In [None]:
datavar = X.copy()

def variance_selector(data, threshold = 0.05):
    if threshold == 0:
        return data
    else:
        selector = VarianceThreshold(threshold)
        col1 = datavar.select_dtypes(exclude=['number']).columns.tolist()
        selector.fit(datavar[datavar.select_dtypes(include=['number']).columns]) 
        return data[list(set(col1+datavar.columns[selector.get_support(indices=True)].tolist()))] 

X_sel = variance_selector(X, threshold = 0.05)

In [None]:
print(X.shape)
sorted(X.columns.tolist())

In [None]:
print(X_sel.shape)
sorted(X_sel.columns.tolist())
# 8 features removed

#### Preprocessing function (does all the previous steps)

In [None]:
def preprocess(data):
    data = date_process(data)
    data = create_dummies(data)
    ratio_features(data)
    lower_case(data)
    mail_type_transform(data)
    dropping(data,['date','mail_type','nan'])
    data = variance_selector(data, threshold = 0.01)
    return data

## Pipeline creation

In [None]:
def model_pipeline(inner_model):
    # Obtaining the selected features
    categorical_features = X_sel.select_dtypes(include=['object','category']).columns.tolist()
    numerical_features = X_sel.select_dtypes(exclude=['object','category']).columns.tolist()
    
    # Pipeline with imputation, standardisation, one hot enconding, model creation
    numerical_pipeline = Pipeline(steps=[('imputation',SimpleImputer(strategy='median')),
                                         ('standard',RobustScaler())])

    categorical_pipeline = Pipeline(steps=[('imputation',SimpleImputer(strategy='most_frequent')),
                                          ('oneHotencode',OneHotEncoder(handle_unknown='ignore'))
                                          ])

    preparation = ColumnTransformer(transformers=[('categorical',categorical_pipeline, 
                                                   categorical_features),
                                                  ('numerical',numerical_pipeline,
                                                   numerical_features)])
    
    model = Pipeline(steps=[('preparation', preparation),
                            ('model', OneVsRestClassifier(inner_model))])
    
    return model

## Selection of the type of model to use

In [None]:
# A list of model to be tested
inner_models = [XGBClassifier(max_depth= 9, min_child_weight= 1 , gamma= 0.4, colsample_bytree= 0.7, subsample= 1, 
                            reg_alpha= 0.0001, learning_rate= 0.2, n_estimators= 350)]

In [None]:
# Splitting in train and test set
X_train, X_test, y_train, y_test = train_test_split(X_sel, y)

In [None]:
file = open("models_estimation.txt","a")
variance_threshold = 0.01

for inner_model in inner_models:
    model = model_pipeline(inner_model)
    model.fit(X_train, y_train)

    print("--------------------------------------------------------------------------")
    print("Date: ",datetime.now())
    print("Variance threshold: ",variance_threshold)
    print("Model name: ", type(inner_model).__name__)
    print("Model parameters: ", inner_model.get_params())
    # Printing model score
    model_train_score=model.score(X_train, y_train)
    model_test_score=model.score(X_test, y_test)
    model_train_logLoss=log_loss(y_train,model.predict_proba(X_train))
    model_test_logLoss=log_loss(y_test,model.predict_proba(X_test))
    print("Model score on the train set : %.2f" % model_train_score)
    print("Model score on the test set : %.2f" % model_test_score)

    #Printing model log score 
    print("Model log loss on the train set: ", model_train_logLoss)
    print("Model log loss on the test set: ",model_test_logLoss )
    print("--------------------------------------------------------------------------")
    
    file.write("--------------------------------------------------------------------------\r\n")
    file.write("Date: {}\r\n".format(datetime.now()))
    file.write("Variance threshold: {}\r\n".format(variance_threshold) )
    file.write("Model name: {}\r\n".format(type(inner_model).__name__))
    file.write("Model parameters: {}\r\n".format( inner_model.get_params()))
    file.write("Model score on the train set : {}\r\n".format(model.score(X_train, y_train)))
    file.write("Model score on the test set : {}\r\n".format(model.score(X_test, y_test)))
    file.write("Model log loss on the train set: {}\r\n".format(log_loss(y_train,model.predict_proba(X_train))))
    file.write("Model log loss on the test set: {}\r\n".format(log_loss(y_test,model.predict_proba(X_test))))
    file.write("--------------------------------------------------------------------------\r\n")
file.close()  

## Hyperparameters tuning (Grid Search)

Decision tree tuning

In [None]:
model=model_pipeline(DecisionTreeClassifier())
params_dt={'model__estimator__min_samples_leaf': range(1,10),
            'model__estimator__max_depth': (20,25,50,100,200,300),
            'model__estimator__criterion': ('gini', 'entropy'),
            'model__estimator__max_features': ('auto', 'sqrt', 'log2'),
            'model__estimator__min_samples_split': (2,3,4,5,6)}

gsearch_20 = GridSearchCV(estimator=model,param_grid=params_dt,
                          scoring='neg_log_loss',n_jobs=4,cv=5)
gsearch_20.fit(X_train,y_train)
model = gsearch_20.best_estimator_
print(gsearch_20.best_params_)

# Printing model score
model_train_score=model.score(X_train, y_train)
model_test_score=model.score(X_test, y_test)
model_train_logLoss=log_loss(y_train,model.predict_proba(X_train))
model_test_logLoss=log_loss(y_test,model.predict_proba(X_test))
print("Model score on the train set : %.2f" % model_train_score)
print("Model score on the test set : %.2f" % model_test_score)

#Printing model log score 
print("Model log loss on the train set: ", model_train_logLoss)
print("Model log loss on the test set: ",model_test_logLoss )

Start with plain RandomForestClassifier, and update it after each gridsearch

In [None]:
model = model_pipeline(RandomForestClassifier())

params_rf_11={'model__estimator__max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
 'model__estimator__max_features': ['auto', 'sqrt'],
 'model__estimator__min_samples_leaf': [1, 2, 4],
 'model__estimator__min_samples_split': [2, 5, 10],
 'model__estimator__n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]}

gsearch11 = RandomizedSearchCV(estimator=model, param_distributions=params_rf_11,
                             scoring='neg_log_loss',n_jobs=4,cv=5)
gsearch11.fit(X_train,y_train)
model = gsearch11.best_estimator_
print(gsearch11.best_params_)

# Printing model score
model_train_score=model.score(X_train, y_train)
model_test_score=model.score(X_test, y_test)
model_train_logLoss=log_loss(y_train,model.predict_proba(X_train))
model_test_logLoss=log_loss(y_test,model.predict_proba(X_test))
print("Model score on the train set : %.2f" % model_train_score)
print("Model score on the test set : %.2f" % model_test_score)

#Printing model log score 
print("Model log loss on the train set: ", model_train_logLoss)
print("Model log loss on the test set: ",model_test_logLoss )

In [None]:
model = model_pipeline(RandomForestClassifier(min_samples_leaf=1))

params_rf_12={'model__estimator__max_depth': [90, 100, 200,500,1000,None],
 'model__estimator__max_features': ['auto', 'sqrt'],
 'model__estimator__min_samples_split': [9, 10,11],
 'model__estimator__n_estimators': [700,750, 800, 850,900]}

gsearch12 = RandomizedSearchCV(estimator=model,param_distributions=params_rf_12,
                               scoring='neg_log_loss',n_jobs=4,cv=5)
gsearch12.fit(X_train,y_train)
model = gsearch_12.best_estimator_
print(gsearch_12.best_params_)

# Printing model score
model_train_score=model.score(X_train, y_train)
model_test_score=model.score(X_test, y_test)
model_train_logLoss=log_loss(y_train,model.predict_proba(X_train))
model_test_logLoss=log_loss(y_test,model.predict_proba(X_test))
print("Model score on the train set : %.2f" % model_train_score)
print("Model score on the test set : %.2f" % model_test_score)

#Printing model log score 
print("Model log loss on the train set: ", model_train_logLoss)
print("Model log loss on the test set: ",model_test_logLoss )

In [None]:
model = model_pipeline(RandomForestClassifier(min_samples_leaf=1,min_samples_split=9))

params_rf_13={'model__estimator__max_depth': [100,125,150],
 'model__estimator__max_features': ['auto', 'sqrt'],
 'model__estimator__n_estimators': [900,950,1000]}

gsearch13 = RandomizedSearchCV(estimator=model,param_distributions=params_rf_13, 
                               scoring='neg_log_loss',n_jobs=4,cv=5)
gsearch13.fit(X_train,y_train)
model = gsearch13.best_estimator_
print(gsearch13.best_params_)

# Printing model score
model_train_score=model.score(X_train, y_train)
model_test_score=model.score(X_test, y_test)
model_train_logLoss=log_loss(y_train,model.predict_proba(X_train))
model_test_logLoss=log_loss(y_test,model.predict_proba(X_test))
print("Model score on the train set : %.2f" % model_train_score)
print("Model score on the test set : %.2f" % model_test_score)

#Printing model log score 
print("Model log loss on the train set: ", model_train_logLoss)
print("Model log loss on the test set: ",model_test_logLoss )

Start with plain XGBClassifier, and update it after each gridsearch

In [None]:
model = model_pipeline(XGBClassifier())
param_test1 = {'model__estimator__max_depth': range(3,10,2),
               'model__estimator__min_child_weight': range(1,6,2)}
gsearch1 = GridSearchCV(estimator=model,param_grid=param_test1, 
                        scoring='neg_log_loss',n_jobs=4,cv=5)
gsearch1.fit(X_train,y_train)
model = gsearch1.best_estimator_
gsearch1.best_params_, gsearch1.best_score_

# max_depth = 9, min_child_weight = 1

In [None]:
param_test2 = {'model__estimator__gamma': [i/10.0 for i in range(0,5)]}
gsearch2 = GridSearchCV(estimator=model,param_grid=param_test2, 
                        scoring='neg_log_loss',n_jobs=4,cv=5)
gsearch2.fit(X_train,y_train)
model = gsearch2.best_estimator_
gsearch2.best_params_, gsearch2.best_score_

# gamma = 0.4

In [None]:
param_test3 = {'model__estimator__subsample': [i/10.0 for i in range(6,11)],
               'model__estimator__colsample_bytree': [i/10.0 for i in range(6,11)]}
gsearch3 = GridSearchCV(estimator=model,param_grid=param_test3,
                        scoring='neg_log_loss',n_jobs=4,cv=5)
gsearch3.fit(X_train,y_train)
model = gsearch3.best_estimator_
gsearch3.best_params_, gsearch3.best_score_

# subsample = 1, colsamble_bytree = 0.7

In [None]:
param_test4 = {'model__estimator__reg_alpha': [1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1, 100]}
gsearch4 = GridSearchCV(estimator=model,param_grid=param_test4,
                        scoring='neg_log_loss',n_jobs=4,cv=5)
gsearch4.fit(X_train,y_train)
model = gsearch4.best_estimator_
gsearch4.best_params_, gsearch4.best_score_

# reg_alpha = 1e-4

In [None]:
param_test5 = {'model__estimator__learning_rate': [0.001, 0.01, 0.1, 0.2, 0.3],
               'model__estimator__n_estimators': [250, 300, 350, 400]}
gsearch5 = GridSearchCV(estimator=model,param_grid=param_test5, 
                        scoring='neg_log_loss',n_jobs=4,cv=5)
gsearch5.fit(X_train,y_train)
model = gsearch5.best_estimator_
gsearch5.best_params_, gsearch5.best_score_

# learning_rate = 0.2, n_estimators = 350

## Model Evaluation

In [None]:
model = model_pipeline(XGBClassifier(max_depth = 9, min_child_weight = 1, gamma = 0.4,
                                     colsample_bytree = 0.7, subsample = 1,
                                     reg_alpha = 0.0001, learning_rate = 0.2, 
                                     n_estimators = 350))

model.fit(X_train,y_train)





In [46]:
print("Accuracy du modèle sur training set: %.2f" % model.score(X_train, y_train))
print("Score du modèle sur test set: %.2f" % model.score(X_test, y_test))

print()

log_loss_train = log_loss(y_train, model.predict_proba(X_train))
print("Log-loss du modèle sur training set: %.2f" % log_loss_train)
log_loss_test = log_loss(y_test, model.predict_proba(X_test))
print("Log-loss du modèle sur test set: %.2f" % log_loss_test)

Accuracy du modèle sur training set: 0.82
Score du modèle sur test set: 0.79

Log-loss du modèle sur training set: 2.11
Log-loss du modèle sur test set: 2.12


## Final training, prediction

In [None]:
# Train the model on the entire training set and show performance
X, y = split_X_y(pd.read_csv('../input/2el1730-machine-learning/train_ml.csv', index_col=0).copy())
X_preprocessed = preprocess(X)
model.fit(X_preprocessed, y)

accuracy_all_train = model.score(X_preprocessed, y)
print("Accuracy du modèle sur training set entier: %.2f" % accuracy_all_train)

log_loss_all_train = log_loss(y, model.predict_proba(X_preprocessed))
print("Log-loss du modèle sur training set entier: %.2f" % log_loss_all_train)

In [None]:
# Predict the labels of the unlabled data and save results to submission file
test_df = pd.read_csv('../input/2el1730-machine-learning/test_ml.csv', index_col=0)
test_df_preprocessed = preprocess(test_df)
pred_y = model.predict_proba(test_df_preprocessed)

pred_df = pd.DataFrame(pred_y, columns=['updates', 'personal', 'promotions',
                        'forums', 'purchases', 'travel',
                        'spam', 'social'])
pred_df.to_csv("submission_{}_var_{}_.csv".format("Bagg_XGBClassifier_cv",variance_threshold)
               , index=True, index_label='Id')

## Unsuccesful implementation of a neural network

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch

X_train.shape,y_train.shape
input = X_train.shape[1]
X_train = X_train.drop(['org','tld'], axis=1)
X_test = X_test.drop(['org','tld'], axis=1)

# determine the supported device
def get_device():
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu') # don't have GPU 
    return device

# convert a df to tensor to be used in pytorch
def df_to_tensor(df):
    device = get_device()
    return torch.from_numpy(df.values).float().to(device)

X_train = df_to_tensor(X_train)
X_test = df_to_tensor(X_test)
y_train = df_to_tensor(y_train)
y_test = df_to_tensor(y_test)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(43, 200)
        self.fc2 = nn.Linear(200, 100)
        self.fc3 = nn.Linear(100, 8)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

torch.reshape(X_train, (X_train.shape[0],X_train.shape[1]))
torch.reshape(X_test, (X_test.shape[0],X_test.shape[1]))
torch.reshape(y_train, (y_train.shape[0],y_train.shape[1]))
torch.reshape(y_test, (y_test.shape[0],y_test.shape[1]))

#Normalisation de nan par 0
test = torch.isnan(X_train)
for i in range (X_train.shape[0]):
    for j in range(X_train.shape[1]):
        if test[i,j]:
            X_train[i,j] =0

#Normalisation de nan par 0
test = torch.isnan(X_test)
for i in range (X_test.shape[0]):
    for j in range(X_test.shape[1]):
        if test[i,j]:
            X_test[i,j] =0

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    
    for i in range(len(X_train)):
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(X_train[i,:])
        loss = criterion(outputs, y_train[i,:])
        loss.backward()
        optimizer.step()
        if i%1000==0:
            print(loss)

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

m = nn.Sigmoid()
loss = nn.BCELoss()
input = torch.randn(3, requires_grad=True)
target = torch.empty(3).random_(2)
output = loss(m(input), target)
print(output)

PATH = './cifar_net.pth'
torch.save(net.state_dict(), PATH)

y_pred = model.predict(X_test)

outputs = net(X_test)

# Printing model score
print("Score du modèle : %.2f" % criterion(net(X_train), y_train))
print("Score du modèle : %.2f" % criterion(net(X_test), y_test))

print(net(X_test[1,:]))