In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

# 1. Load dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Create pipeline
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('classifier', SVC())
])

# 3. Define parameter grid
param_grid = {
    'pca__n_components': [2, 3],
    'classifier__C': [0.1, 1, 10],
    'classifier__kernel': ['linear', 'rbf']
}

# 4. GridSearchCV
grid = GridSearchCV(pipe, param_grid)
grid.fit(X_train, y_train)

# 5. Results
print("Best parameters found:", grid.best_params_)
print("Best cross-validation score: {:.2f}".format(grid.best_score_))
print("Test set score: {:.2f}".format(grid.score(X_test, y_test)))


Best parameters found: {'classifier__C': 0.1, 'classifier__kernel': 'linear', 'pca__n_components': 3}
Best cross-validation score: 0.96
Test set score: 1.00


Check for 3 fold, 5 fold and 7 fold cross validation

Replace classifier, SVC with RandomForestClassifier and LogisticRegression, Perceptron, knn .

Update the param_grid accordingly (e.g., for RandomForestClassifier, use n_estimators, max_depth, etc.)

Also replace Gridsearch with randomnsearch function.

Relplace with with your own csv dataset using code below:

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.neighbors import KNeighborsClassifier
from scipy.stats import randint
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
data = pd.read_csv("./datasets/sports_betting_predictive_analysis.csv")
X = data.drop(["Predicted_Winner", 'Match_ID', 'Date'], axis=1)
y = data["Predicted_Winner"]


In [14]:
#splitting the data

categorical_cols = X.select_dtypes(include='object').columns.tolist()
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

numeric_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', numeric_pipeline, numerical_cols),
    ('cat', categorical_pipeline, categorical_cols)
])



In [15]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
models = {
    "RandomForest": (RandomForestClassifier(), {
        'classifier__n_estimators': randint(50, 200),
        'classifier__max_depth': randint(3, 20)
    }),
    "LogisticRegression": (LogisticRegression(max_iter=1000), {
        'classifier__C': [0.01, 0.1, 1, 10],
        'classifier__penalty': ['l2']
    }),
    "Perceptron": (Perceptron(max_iter=1000), {
        'classifier__penalty': ['l2', 'elasticnet'],
        'classifier__alpha': [0.0001, 0.001, 0.01]
    }),
    "KNN": (KNeighborsClassifier(), {
        'classifier__n_neighbors': [3, 5, 7, 9],
        'classifier__weights': ['uniform', 'distance']
    })
}

for cv_fold in [3, 5, 7]:
    print(f"\n=== Cross-validation: {cv_fold}-fold ===")
    for name, (clf, param_dist) in models.items():
        pipe = Pipeline([
            ('preprocessor', preprocessor),
            ('pca', PCA()),
            ('classifier', clf)
        ])
        search = RandomizedSearchCV(pipe, {
            'pca__n_components': [2, 3, 5],
            **param_dist
        }, n_iter=10, cv=cv_fold, random_state=42)
        search.fit(X_train, y_train)
        print(f"\n{name} | Best Params: {search.best_params_}")
        print(f"{name} | CV Score: {search.best_score_:.2f}")
        print(f"{name} | Test Score: {search.score(X_test, y_test):.2f}")


=== Cross-validation: 3-fold ===





RandomForest | Best Params: {'classifier__max_depth': 4, 'classifier__n_estimators': 137, 'pca__n_components': 3}
RandomForest | CV Score: 0.14
RandomForest | Test Score: 0.11





LogisticRegression | Best Params: {'pca__n_components': 2, 'classifier__penalty': 'l2', 'classifier__C': 0.01}
LogisticRegression | CV Score: 0.14
LogisticRegression | Test Score: 0.11





Perceptron | Best Params: {'pca__n_components': 5, 'classifier__penalty': 'elasticnet', 'classifier__alpha': 0.0001}
Perceptron | CV Score: 0.09
Perceptron | Test Score: 0.01





KNN | Best Params: {'pca__n_components': 5, 'classifier__weights': 'uniform', 'classifier__n_neighbors': 5}
KNN | CV Score: 0.11
KNN | Test Score: 0.09

=== Cross-validation: 5-fold ===





RandomForest | Best Params: {'classifier__max_depth': 4, 'classifier__n_estimators': 137, 'pca__n_components': 3}
RandomForest | CV Score: 0.14
RandomForest | Test Score: 0.11





LogisticRegression | Best Params: {'pca__n_components': 2, 'classifier__penalty': 'l2', 'classifier__C': 0.01}
LogisticRegression | CV Score: 0.14
LogisticRegression | Test Score: 0.11





Perceptron | Best Params: {'pca__n_components': 5, 'classifier__penalty': 'elasticnet', 'classifier__alpha': 0.0001}
Perceptron | CV Score: 0.08
Perceptron | Test Score: 0.01





KNN | Best Params: {'pca__n_components': 5, 'classifier__weights': 'uniform', 'classifier__n_neighbors': 5}
KNN | CV Score: 0.11
KNN | Test Score: 0.09

=== Cross-validation: 7-fold ===





RandomForest | Best Params: {'classifier__max_depth': 4, 'classifier__n_estimators': 137, 'pca__n_components': 3}
RandomForest | CV Score: 0.14
RandomForest | Test Score: 0.11





LogisticRegression | Best Params: {'pca__n_components': 2, 'classifier__penalty': 'l2', 'classifier__C': 0.01}
LogisticRegression | CV Score: 0.14
LogisticRegression | Test Score: 0.11





Perceptron | Best Params: {'pca__n_components': 5, 'classifier__penalty': 'l2', 'classifier__alpha': 0.0001}
Perceptron | CV Score: 0.07
Perceptron | Test Score: 0.04





KNN | Best Params: {'pca__n_components': 5, 'classifier__weights': 'uniform', 'classifier__n_neighbors': 5}
KNN | CV Score: 0.11
KNN | Test Score: 0.09
