In [7]:
from sklearn import preprocessing, metrics, svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

In [8]:
pd.set_option('display.width', 500)
np.set_printoptions(linewidth=500)
pd.set_option('display.max_columns', 10)

df = pd.read_csv(
    "../../dataset_building/paris_velib_dataset_preprocessed.csv",
    index_col=False
)

df.shape

df.head()

Unnamed: 0,is_installed,is_renting,is_returning,longitude,latitude,...,record_timestamp_minute_21,record_timestamp_minute_46,record_timestamp_minute_50,record_timestamp_minute_51,record_timestamp_second_0
0,1,1,1,0.023661,0.48871,...,0,1,0,0,1
1,1,1,1,0.02336,0.488375,...,0,1,0,0,1
2,1,1,1,0.02352,0.488439,...,0,1,0,0,1
3,1,1,1,0.023851,0.489104,...,0,1,0,0,1
4,1,1,1,0.023768,0.488158,...,0,1,0,0,1


In [9]:
le = preprocessing.LabelEncoder()
Y = le.fit_transform(df["bike_availability_ratio"])
Y

array([269, 170, 281, ..., 827,   0,   0], dtype=int64)

In [10]:
X = df.drop(["bike_availability_ratio"], axis=1).values
X

array([[1., 1., 1., ..., 0., 0., 1.],
       [1., 1., 1., ..., 0., 0., 1.],
       [1., 1., 1., ..., 0., 0., 1.],
       ...,
       [1., 1., 1., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3) # 70% training and 30% test

In [None]:
# # # # # # # # # # # # # #
#                         #
#   K-NEAREST NEIGHBOUR   #
#                         #
# # # # # # # # # # # # # #

knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

print("Precision:", metrics.precision_score(y_test, y_pred, average='weighted'))
print("Recall:", metrics.recall_score(y_test, y_pred, average='weighted'))
print("F1 Score:", metrics.f1_score(y_test, y_pred, average='weighted'))

In [None]:
# # # # # # # # # # # # # #
#                         #
#   LOGISTIC REGRESSION   #
#                         #
# # # # # # # # # # # # # #

logisticRegr = LogisticRegression(solver='lbfgs', max_iter=1000)
logisticRegr.fit(X_train, y_train)
y_pred = logisticRegr.predict(X_test)

print("Precision:", metrics.precision_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred)))
print("Recall:", metrics.recall_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred)))
print("F1 Score:", metrics.f1_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred)))

In [None]:
# # # # # # # # # # # # # #
#                         #
#      RANDOM FOREST      #
#                         #
# # # # # # # # # # # # # #

clf = RandomForestClassifier(n_estimators=500, verbose=10)
clf.fit(X_train,y_train)
y_pred=clf.predict(X_test)

print("Precision:", metrics.precision_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred)))
print("Recall:", metrics.recall_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred)))
print("F1 Score:", metrics.f1_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred)))

In [None]:
# # # # # # # # # # # # # #
#                         #
#  SUPPORT-VECTOR MACHINE #
#                         #
# # # # # # # # # # # # # #

clf = svm.SVC(kernel='linear', verbose=True)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print("Precision:", metrics.precision_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred)))
print("Recall:", metrics.recall_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred)))
print("F1 Score:", metrics.f1_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred)))