In [2]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

# set the path to your local folder where you saved the data files
data_path = "D:/jupyter_py/5/"

# read the train and test data
train = pd.read_csv(data_path + 'train_sample.csv')
test = pd.read_csv(data_path + 'test_sample.csv')

# split the train data into features and labels
X_train = train[['x0', 'x1']]
y_train = train['class']

# define the pipeline with scaling and SVM
pipe = Pipeline([('scaler', MinMaxScaler()), ('svc', SVC())])

# define the parameter grid to search over
param_grid = {'svc__gamma': 0.1**np.arange(-1, 4),
              'svc__C': 5*np.arange(3, 6)}

# perform the grid search
grid = GridSearchCV(pipe, param_grid=param_grid, cv=5)
grid.fit(X_train, y_train)

# print the best parameters and the corresponding mean cross-validated score
print("Best parameters:", grid.best_params_)
print("Best mean cross-validated score:", grid.best_score_)

# get the best model and make predictions on the test data
best_model = grid.best_estimator_
test_prediction = best_model.predict(test[['x0', 'x1']])

# save the predictions to a submission file
test['class'] = test_prediction
test[['ID','class']].to_csv('submission.csv', index=False)

Best parameters: {'svc__C': 20, 'svc__gamma': 10.0}
Best mean cross-validated score: 0.8949999999999999
