In [6]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.datasets import make_classification
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
import pandas as pd
categorical_cols = ["Competition", "PlayerType", "Movement"]

def encode_labels(df, columns):
    le = LabelEncoder()
    for col in columns:
        df[col] = le.fit_transform(df[col])    
    return df, le

def load_data():
    return pd.read_csv("dataset.csv", sep=";")


TARGET = "ShotType"

# Parameters for grid search
gammas = [1e-2, 1e-1, 1, "scale"]
regularization_params = [1e-2, 1e-1]
kernels = ["linear", "rbf"]

# Create the parameter grid
param_grid = {
    "svc__gamma": gammas,
    "svc__C": regularization_params,
    "svc__kernel": kernels
}

df = load_data()
    
df, le = encode_labels(df, [TARGET] + categorical_cols)

X, y = df.drop(columns=TARGET), df[TARGET]

# Create a pipeline with scaling and SVM
pipeline = make_pipeline(SVC())

# Set up the GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1, verbose=1)

# Run grid search
grid_search.fit(X, y)

# Print the best parameters and score
print("Best parameters found:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)


Fitting 5 folds for each of 16 candidates, totalling 80 fits
Best parameters found: {'svc__C': 0.1, 'svc__gamma': 0.01, 'svc__kernel': 'linear'}
Best cross-validation score: 0.6871120493151771
