# Support vector classification for TPS November 2021

Release notes:
- V2: Bugfix

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.metrics import roc_auc_score

In [None]:
train_df = pd.read_csv('../input/tabular-playground-series-nov-2021/train.csv')
test_df = pd.read_csv('../input/tabular-playground-series-nov-2021/test.csv')


In [None]:
auc_list, pred_list = [], []
kf = StratifiedKFold(n_splits=7, shuffle=True, random_state=1)
for fold, (train_idx, val_idx) in enumerate(kf.split(train_df, train_df.target)):
    print(f"Fold {fold}")
    X_tr = train_df.iloc[train_idx]#.iloc[:50000]
    X_va = train_df.iloc[val_idx]
    y_tr = X_tr.target
    y_va = X_va.target
    X_tr = X_tr.drop(columns=['id', 'target'])
    X_va = X_va.drop(columns=['id', 'target'])

    # Train
    model = make_pipeline(StandardScaler(), LinearSVC(tol=1e-7, penalty='l2', dual=False, max_iter=2000))
    model.fit(X_tr, y_tr)
    
    # Validate
    y_pred = model.decision_function(X_va)
    score = roc_auc_score(y_va, y_pred)
    print(score)
    auc_list.append(score)
    
    # Predict for the submission
    pred_list.append(model.decision_function(test_df.drop(columns=['id'])))

avg_auc = sum(auc_list) / len(auc_list)
print(f"Average AUC: {avg_auc:.5f}")

In [None]:
test_df['target'] = sum(pred_list)
test_df[['id', 'target']].to_csv('submission.csv', index=False)
test_df[['id', 'target']].head()