# Train Over Tracks Classifier

### Reading in Data

In [12]:
import pandas as pd
import numpy as np
from PIL import Image

In [33]:
crossing_images = pd.read_csv("../data/bw_trains.csv")

In [20]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [34]:
crossing_images

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,110583,110584,110585,110586,110587,110588,110589,110590,110591,110592
0,0,42,42,43,43,44,44,45,45,45,...,22,22,22,22,22,22,22,22,22,0
1,1,109,109,109,110,110,111,111,112,113,...,34,34,34,34,34,34,34,34,34,0
2,2,164,164,164,165,166,166,167,167,165,...,61,64,60,56,56,59,63,65,66,0
3,3,164,164,164,165,166,166,167,167,174,...,48,52,50,47,44,44,47,50,52,0
4,4,161,161,161,162,163,163,164,164,165,...,58,61,59,57,56,56,57,59,61,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165,165,65,65,65,66,67,67,68,68,68,...,26,26,26,26,26,26,26,26,26,1
166,166,136,137,137,138,138,139,139,139,141,...,53,51,53,55,54,52,51,53,55,1
167,167,146,146,147,147,148,148,149,149,151,...,58,56,56,56,53,52,54,59,64,1
168,168,144,145,145,146,146,147,147,147,146,...,54,57,58,56,54,51,53,58,63,1


In [35]:
y = crossing_images["110592"]
x = crossing_images.drop(["110592","Unnamed: 0"],axis=1)
print(y.shape)
print(x.shape)

(170,)
(170, 110592)


In [38]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3,random_state=2023)

In [39]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
from sklearn import metrics
import matplotlib.pyplot as plt

In [40]:
input_nodes = x.shape[1]
output_nodes = 2
n_neurons = int(np.floor(input_nodes/(2*(x.shape[0]+output_nodes))))
nn_pipe = make_pipeline(StandardScaler(), MLPClassifier(hidden_layer_sizes=(100,50,25,5),max_iter=500,random_state=20))

In [41]:
nn_pipe.fit(x_train.values,y_train.values)
y_pred = nn_pipe.predict(x_test)
nn_cm = confusion_matrix(y_test,y_pred)
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred)
roc_auc = metrics.auc(fpr, tpr)

print(f"Accuracy: {metrics.accuracy_score(y_test, y_pred)}")
print(f"Precision: {metrics.precision_score(y_test, y_pred)}")
print(f"Recall: {metrics.recall_score(y_test, y_pred)}")
print(f"F1-Score: {metrics.f1_score(y_test, y_pred)}")
print(f"AUC: {roc_auc}")



Accuracy: 0.9607843137254902
Precision: 0.9583333333333334
Recall: 0.9583333333333334
F1-Score: 0.9583333333333334
AUC: 0.9606481481481483


In [26]:
import pickle

In [29]:
with open('tot_model', 'wb') as fh:
   pickle.dump(nn_pipe, fh)

In [42]:
from joblib import dump, load
dump(nn_pipe, '../src/model/model.joblib')

['../src/model/model.joblib']