In [1]:
from sklearn.preprocessing import StandardScaler
import pandas as pd

idle_time_data = pd.read_csv('../data/df_points/df_points_18_21_class.csv')
TargetVariable = ['idle_time_class']
Predictors = ['bike_id', 'lat', 'lng', 'temp', 'rain', 'snow', 'dt_start', 'hex_enc', 'start_min', 'month', 'day']

X = idle_time_data[Predictors].values
y = idle_time_data[TargetVariable].values

PredictorScaler=StandardScaler()
PredictorScalerFit=PredictorScaler.fit(X)
X=PredictorScalerFit.transform(X)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.9,)
# , random_state=42

In [2]:
from sklearn.model_selection import TimeSeriesSplit

ts_cv = TimeSeriesSplit(n_splits=4,max_train_size=10000)

In [None]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import zero_one_loss, accuracy_score, roc_auc_score, f1_score
from sklearn.model_selection import cross_validate, cross_val_score, GridSearchCV
from sklearn.svm import SVC
from time import perf_counter
import wandb

sweep_configuration_rfc = {
    "project": "SVC",
    "name": "SVC-sweep-CV",
    "metric": {"name": "accuracy", "goal": "maximize"},
    "method": "random",
    "parameters": {
        "C": {
            "values": [0.6, 0.8, 1.0, 1.2, 1.4]
        },
        "kernel": {
            "values": ['linear','poly','rbf','sigmoid','precomputed']
        },
        "degree": {
            "values": [2,3,4,5]
        },
        "gamma": {
            "values": ['scale', 'auto']
        },
        "decision_function_shape": {
            "values": ['ovr']
        }
    }
}

def my_train_func():
    t1_start = perf_counter()
    wandb.init()

    _C = wandb.config.C
    _kernel = wandb.config.kernel
    _degree = wandb.config.degree
    _gamma = wandb.config.gamma
    _decision_function_shape=wandb.config.decision_function_shape

    model = SVC(C=_C,
                kernel=_kernel,
                degree=_degree,
                gamma=_gamma,
                decision_function_shape=_decision_function_shape,
                shrinking=True,
                verbose=True
                )

    clsf = OneVsRestClassifier(model)
    cv_res = cross_val_score(clsf,X_train,y_train,cv = ts_cv, scoring='accuracy')
    wandb.log({"cv_res": cv_res})

    t1_stop = perf_counter()
    wandb.log({"process_time": t1_stop-t1_start})

# INIT SWEEP
sweep_id_svc = wandb.sweep(sweep_configuration_rfc, project="SVC")
# RUN SWEEP
wandb.agent(sweep_id_svc, function=my_train_func)

In [None]:
wandb.finish()

from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import zero_one_loss, accuracy_score, roc_auc_score, f1_score
from sklearn.model_selection import cross_validate, cross_val_score, GridSearchCV
from sklearn.svm import SVC
from time import perf_counter

t1_start = perf_counter()

model = SVC(verbose=True,decision_function_shape='ovr')

clsf = OneVsRestClassifier(model)
print('train...')
clsf.fit(X_train[:100000],y_train[:100000].ravel())
print('predicting...')
y_pred = clsf.predict(X_test[:100000])

acc = accuracy_score(y_test[:100000].ravel(), y_pred[:100000].ravel())

print('acc: ',acc)

t1_stop = perf_counter()
print('program time: ',t1_stop-t1_start)