In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import wandb

In [2]:
idle_time_data = pd.read_csv('../../data/final_df_points_18_21_class.csv')

TargetVariable = ['idle_time_class']
Predictors = ['bike_id', 'lat', 'lng', 'temp', 'rain', 'snow', 'wind_speed', 'humidity', 'dt_start',
              'hex_enc', 'start_min', 'year', 'month', 'day', 'on_station', 'in_zone', 'zone_name_enc']

X = idle_time_data[Predictors].values
y = idle_time_data[TargetVariable].values

PredictorScaler = StandardScaler()
PredictorScalerFit = PredictorScaler.fit(X)
X = PredictorScalerFit.transform(X)

#TargetScaler = StandardScaler()
#TargetScalerFit = TargetScaler.fit(y)
#y = TargetScalerFit.transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.9, shuffle=False)

In [3]:
print('X_train: ',X_train.shape)
print('y_train: ',y_train.shape)
print('X_test: ',X_test.shape)
print('y_test: ',y_test.shape)

X_train:  (2289447, 17)
y_train:  (2289447, 1)
X_test:  (254383, 17)
y_test:  (254383, 1)


In [6]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


def eval_classification(y_test,y_pred,labels):
    # Metrics
    acc = accuracy_score(y_test, y_pred.ravel())
    macro_precision = precision_score(y_test.ravel(), y_pred.ravel(), average='macro', labels=labels)
    macro_recall = recall_score(y_test.ravel(), y_pred.ravel(), average='macro', labels=labels)

    macro_f1 = f1_score(y_test.ravel(), y_pred.ravel(), average='macro', labels=labels)

    print('accuracy: %f' % acc)
    print('macro_precision: %f' % macro_precision)
    print('macro_recall: %f' % macro_recall)
    print('macro_f1: %f' % macro_f1)

    return acc, macro_precision, macro_recall, macro_f1

In [None]:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier

# Spot Check Algorithms
models = []
models.append(('SDG', SGDClassifier()))
models.append(('GB', GradientBoostingClassifier()))
models.append(('KN', KNeighborsClassifier()))
models.append(('DT', DecisionTreeClassifier()))
models.append(('MLP', MLPClassifier()))
models.append(('SVC', SVC(decision_function_shape='ovo')))

results = []
names = []
for name, model in models:

    run = wandb.init(reinit=True,project='Compare-Algorithms-Classification')

    model.fit(X_train, y_train.ravel())
    y_pred = model.predict(X_test)

    acc, macro_precision, macro_recall, macro_f1 = eval_classification(y_test, y_pred)

    wandb.log({"accuracy": acc})
    wandb.log({"macro_precision": macro_precision})
    wandb.log({"macro_recall": macro_recall})
    wandb.log({"macro_f1": macro_f1})

    run.finish()
