In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

np.random.seed(42)

def load_data(DataSet):
    return pd.read_excel(DataSet, dtype={'NameMouv' : str, 
                  'SerieID' : int ,'X1': int, 'Y1': int,'X2': int, 'Y2': int ,'X3': int, 'Y3': int, 
                  "Vector_X1": float,'Vector_Y1': float,'Vector_X2': float,'Vector_Y2': float,'Vector_X3': float,'Vector_Y3': float})

global_data = load_data('DataSet.xlsx')
test = load_data('Lundi.xlsx')
print(global_data)

    NameMouv  SerieID   X1   Y1   X2  Y2   X3   Y3  Vector_X1  Vector_Y1  \
0          1        1  476  145  528  58  550  157      -13.0      -12.0   
1          1        2  465  141  520  42  546  151      -11.0       -4.0   
2          1        3  457  138  512  36  543  148       -8.0       -3.0   
3          1        4  452  137  508  31  541  146       -5.0       -1.0   
4          1        5  450  137  506  30  540  146       -2.0        0.0   
..       ...      ...  ...  ...  ...  ..  ...  ...        ...        ...   
302        1       25  413  205  448  88  514  178        0.0        0.0   
303        1       26  420  207  455  92  518  179        7.0        2.0   
304        1       27  437  209  473  94  527  178       17.0        2.0   
305        1       28  456  205  494  91  536  175       19.0       -4.0   
306        1       29  465  203  504  89  541  174        9.0       -2.0   

     Vector_X2  Vector_Y2  Vector_X3  Vector_Y3  
0         -9.0      -25.0       -4.0 

In [2]:
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import roc_curve
from sklearn.metrics import precision_recall_curve


train_data = global_data.drop("NameMouv", axis=1)
train_label = global_data["NameMouv"].copy()
test_data = test.drop("NameMouv", axis=1)
test_label = test["NameMouv"].copy()
train_data = train_data.sort_index()
train_label = train_label.sort_index()
test_data = test_data.sort_index()
test_label = test_label.sort_index()

#train_data, train_label, test_data, test_label = splitData(global_data)
num_attribs = list(train_data.select_dtypes(include=[np.number]))
print(num_attribs)
num_pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy="median")),
        ('std_scaler', StandardScaler()),
    ])

full_pipeline = ColumnTransformer([
        ("num", num_pipeline, num_attribs),
    ])



['SerieID', 'X1', 'Y1', 'X2', 'Y2', 'X3', 'Y3', 'Vector_X1', 'Vector_Y1', 'Vector_X2', 'Vector_Y2', 'Vector_X3', 'Vector_Y3']


In [3]:
train_data_transformed = full_pipeline.fit_transform(train_data)
test_data_transformed = full_pipeline.fit_transform(test_data)
train_data_prepared = pd.DataFrame(train_data_transformed, columns= num_attribs)
test_data_prepared = pd.DataFrame(test_data_transformed, columns= num_attribs)
#print(df_train_label)

In [4]:
def plot_precision_recall_vs_threshold(precisions, recalls, thresholds):
    plt.plot(thresholds, precisions[:-1], "b-", label="Precision", linewidth=2)
    plt.plot(thresholds, recalls[:-1], "g-", label="Recall", linewidth=2)
    plt.xlabel("Threshold", fontsize=16)
    plt.legend(loc="upper left", fontsize=16)
    plt.ylim([0, 1])

def plot_roc_curve(fpr, tpr, label=None):
    plt.plot(fpr, tpr, linewidth=2, label=label)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.axis([0, 1, 0, 1])
    plt.xlabel('False Positive Rate', fontsize=16)
    plt.ylabel('True Positive Rate', fontsize=16)

    
def calc_and_print_curves(model, X_data, Y_data):
    cross_val_score(model, train_data_prepared, X_data, cv=5, scoring="accuracy")
    y_scores = cross_val_predict(model, X_data, Y_data, cv=3,
                                 method="decision_function")
    precisions, recalls, thresholds = precision_recall_curve(Y_data, y_scores)

    fpr, tpr, thresholds = roc_curve(Y_data, y_scores)
    plt.figure(figsize=(8, 6))
    plot_roc_curve(fpr, tpr)
    plt.show()
    
    precisions, recalls, thresholds = precision_recall_curve(Y_data, y_scores)

    plt.figure(figsize=(8, 4))
    plot_precision_recall_vs_threshold(precisions, recalls, thresholds)
    plt.xlim([-10, 7.5])
    plt.show()

In [5]:
models = []

In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error
from sklearn import  metrics

forest_clf = RandomForestClassifier(random_state=42, n_estimators=1)
forest_clf.fit(train_data_prepared, train_label)
forest_clf_predictions = forest_clf.predict(test_data_prepared)

forest_clf_mse = mean_squared_error(train_label, forest_clf.predict(train_data_prepared))
forest_clf_rmse = np.sqrt(forest_clf_mse)

In [7]:
test = list(forest_clf_predictions)
lundi = list(forest_clf_predictions).count('1')
mardi = list(forest_clf_predictions).count('2')
meanLundi = lundi/ len(forest_clf_predictions)
meanMardi = mardi/ len (forest_clf_predictions)

print('Lundi '+str(meanLundi)+'%')
print('Mardi '+str(meanMardi)+'%')
print(forest_clf_mse)
print(forest_clf_rmse)

Lundi 0.6129032258064516%
Mardi 0.3870967741935484%
0.026058631921824105
0.16142686245425233


In [8]:
from sklearn.svm import SVC
svc_reg = SVC(kernel = 'rbf', max_iter=2)
svc_reg.fit(train_data_prepared, train_label)
svc_predictions = svc_reg.predict(test_data_prepared)
svc_reg_mse = mean_squared_error(train_label, svc_reg.predict(train_data_prepared))
svc_reg_rmse = np.sqrt(forest_clf_mse)



In [9]:
print(svc_predictions)
reslundi = list(svc_predictions).count('1')
resmardi = list(svc_predictions).count('2')
resmeanLundi = reslundi/ len(svc_predictions)
resmeanMardi = resmardi/ len (svc_predictions)

print('Lundi '+str(resmeanLundi)+'%')
print('Mardi '+str(resmeanMardi)+'%')
print(svc_reg_rmse)

['1' '1' '1' '2' '2' '2' '2' '2' '2' '2' '2' '2' '2' '2' '2' '2' '2' '2'
 '2' '2' '2' '2' '1' '1' '1' '1' '1' '1' '1' '1' '1']
Lundi 0.3870967741935484%
Mardi 0.6129032258064516%
0.16142686245425233


In [10]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=3)

knn.fit(train_data_prepared, train_label)
knn_predictions = knn.predict(test_data_prepared)
knn_mse = mean_squared_error(train_label, knn.predict(train_data_prepared))
knn_rmse = np.sqrt(knn_mse)

print(knn_predictions)
knnlundi = list(knn_predictions).count('1')
knnmardi = list(knn_predictions).count('2')
knnmeanLundi = knnlundi/ len(knn_predictions)
knnmeanMardi = knnmardi/ len (knn_predictions)

print('Lundi '+str(knnmeanLundi)+'%')
print('Mardi '+str(knnmeanMardi)+'%')

print(knn_rmse)

['1' '1' '1' '1' '1' '1' '1' '1' '1' '2' '2' '2' '2' '2' '2' '2' '2' '1'
 '1' '1' '1' '1' '1' '1' '1' '2' '2' '1' '1' '1' '2']
Lundi 0.6451612903225806%
Mardi 0.3548387096774194%
0.05707301455353496
