In [None]:
import pandas as pd
import matplotlib as plt

from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

import optuna

import tracemalloc

#svc
from sklearn.svm import SVC

### Data preparation

In [None]:
data = pd.read_csv('collisions_stat_BigShip_move_SmallShip_maxvel_10.csv')
data.head()

In [None]:
data['collision'].hist()

In [None]:
X = data.drop(columns=['collision'])
y = data['collision']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y, shuffle=True)

In [None]:
y_train.hist()

In [None]:
sm = SMOTE(random_state=42)
X_train, y_train = sm.fit_resample(X=X_train, y=y_train)

In [None]:
y_train.hist()

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Support vector classification

## Tuning params

In [None]:
def objective(trial):
    params = {
        'C': trial.suggest_float('C', 1e-3, 1e3, log=True),
        'kernel': trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid']),
        'degree': 3,
        'gamma': 'scale',
        'coef0': 0.0,
    }

    if params['kernel'] in ['poly', 'rbf', 'sigmoid']:
        gamma_type = trial.suggest_categorical('gamma_type', ['scale', 'value'])
        if gamma_type == 'value':
            params['gamma'] = trial.suggest_float('gamma', 1e-5, 1e2, log=True)
    
    if params['kernel'] == 'poly':
        params['degree'] = trial.suggest_int('degree', 2, 5)
    
    if params['kernel'] in ['poly', 'sigmoid']:
        params['coef0'] = trial.suggest_float('coef0', -1.0, 1.0)

    trial.set_user_attr("full_params", params)

    model = SVC(**params, random_state=42)
    return cross_val_score(model, X, y, cv=3, scoring='accuracy').mean()

In [None]:
study = optuna.create_study(
    direction='maximize',
    sampler=optuna.samplers.CmaEsSampler()
)

study.optimize(objective, n_trials=100, n_jobs=-1)

In [None]:
print("Лучшие параметры:", study.best_params)
print("Все параметры", study.best_trial.user_attrs["full_params"])
print("Лучший F1 (кросс-валидация):", study.best_value)

## Run & Check

In [None]:
svc = SVC(**study.best_trial.user_attrs["full_params"])

In [None]:
tracemalloc.start()

In [None]:
svc.fit(X_train, y_train)

In [None]:
y_pred = svc.predict(X_test)

In [None]:
current, peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
print(f"Потребление памяти: {current / 10**6:.4f} MB (текущее), {peak / 10**6:.4f} MB (пиковое)")

In [None]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

## Conclusion

**Time:**
+ fit: - s
+ predict: - s

**Memory:**     
+ after run: - MB
+ peak: - MB 