In [6]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

# 1. Load dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Create pipeline
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('classifier', SVC())
])

# 3. Define parameter grid
param_grid = {
    'pca__n_components': [2, 3],
    'classifier__C': [0.1, 1, 10],
    'classifier__kernel': ['linear', 'rbf']
}

# 4. GridSearchCV
grid = GridSearchCV(pipe, param_grid)
grid.fit(X_train, y_train)

# 5. Results
print("Best parameters found:", grid.best_params_)
print("Best cross-validation score: {:.2f}".format(grid.best_score_))
print("Test set score: {:.2f}".format(grid.score(X_test, y_test)))


Best parameters found: {'classifier__C': 0.1, 'classifier__kernel': 'linear', 'pca__n_components': 3}
Best cross-validation score: 0.96
Test set score: 1.00


Check for 3 fold, 5 fold and 7 fold cross validation

Replace classifier, SVC with RandomForestClassifier and LogisticRegression, Perceptron, knn .

Update the param_grid accordingly (e.g., for RandomForestClassifier, use n_estimators, max_depth, etc.)

Also replace Gridsearch with randomnsearch function.

Relplace with with your own csv dataset using code below:

In [21]:
import pandas as pd

data = pd.read_csv("/content/pd_speech_features.csv")
X = data.drop("numPeriodsPulses", axis=1)
y = data["numPeriodsPulses"]
print(X)
print(y)

      id  gender      PPE      DFA     RPDE  numPulses  meanPeriodPulses  \
0      0       1  0.85247  0.71826  0.57227        240          0.008064   
1      0       1  0.76686  0.69481  0.53966        234          0.008258   
2      0       1  0.85083  0.67604  0.58982        232          0.008340   
3      1       0  0.41121  0.79672  0.59257        178          0.010858   
4      1       0  0.32790  0.79782  0.53028        236          0.008162   
..   ...     ...      ...      ...      ...        ...               ...   
751  250       0  0.80903  0.56355  0.28385        417          0.004627   
752  250       0  0.16084  0.56499  0.59194        415          0.004550   
753  251       0  0.88389  0.72335  0.46815        381          0.005069   
754  251       0  0.83782  0.74890  0.49823        340          0.005679   
755  251       0  0.81304  0.76471  0.46374        340          0.005676   

     stdDevPeriodPulses  locPctJitter  locAbsJitter  ...  \
0              0.000087    

In [9]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Define parameter grid
param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [3, 5, None]
}

# Evaluate for different folds
results = {}
for cv_folds in [3, 5, 7]:
    clf = RandomForestClassifier(random_state=42)
    grid_search = GridSearchCV(clf, param_grid, cv=cv_folds)
    grid_search.fit(X, y)
    results[cv_folds] = {
        'best_params': grid_search.best_params_,
        'best_score': grid_search.best_score_
    }

# Display results
for fold, res in results.items():
    print(f"Fold: {fold}, Best Params: {res['best_params']}, Best Score: {res['best_score']:.4f}")

Fold: 3, Best Params: {'max_depth': 3, 'n_estimators': 50}, Best Score: 0.9667
Fold: 5, Best Params: {'max_depth': 3, 'n_estimators': 50}, Best Score: 0.9667
Fold: 7, Best Params: {'max_depth': 3, 'n_estimators': 50}, Best Score: 0.9533


In [8]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Classifiers and their hyperparameter grids
classifiers = {
    "RandomForestClassifier": (
        RandomForestClassifier(random_state=42),
        {
            'classifier__n_estimators': [50, 100],
            'classifier__max_depth': [3, 5, None]
        }
    ),
    "LogisticRegression": (
        LogisticRegression(max_iter=2000),
        {
            'classifier__C': [0.01, 0.1, 1, 10],
            'classifier__penalty': ['l2']
        }
    ),
    "Perceptron": (
        Perceptron(),
        {
            'classifier__penalty': ['l2', 'l1', None],
            'classifier__max_iter': [500, 1000]
        }
    ),
    "KNeighborsClassifier": (
        KNeighborsClassifier(),
        {
            'classifier__n_neighbors': [3, 5, 7],
            'classifier__weights': ['uniform', 'distance']
        }
    )
}

# Evaluate each classifier with 3, 5, and 7 folds
for name, (clf, params) in classifiers.items():
    print(f"\n--- {name} ---")
    for cv_folds in [3, 5, 7]:
        pipe = Pipeline([
            ('scaler', StandardScaler()),
            ('classifier', clf)
        ])
        grid = GridSearchCV(pipe, params, cv=cv_folds)
        grid.fit(X, y)
        print(f"CV: {cv_folds} | Best Params: {grid.best_params_} | Best Score: {grid.best_score_:.4f}")



--- RandomForestClassifier ---
CV: 3 | Best Params: {'classifier__max_depth': 3, 'classifier__n_estimators': 50} | Best Score: 0.9667
CV: 5 | Best Params: {'classifier__max_depth': 3, 'classifier__n_estimators': 100} | Best Score: 0.9667
CV: 7 | Best Params: {'classifier__max_depth': 3, 'classifier__n_estimators': 100} | Best Score: 0.9533

--- LogisticRegression ---
CV: 3 | Best Params: {'classifier__C': 10, 'classifier__penalty': 'l2'} | Best Score: 0.9733
CV: 5 | Best Params: {'classifier__C': 10, 'classifier__penalty': 'l2'} | Best Score: 0.9733
CV: 7 | Best Params: {'classifier__C': 10, 'classifier__penalty': 'l2'} | Best Score: 0.9731

--- Perceptron ---
CV: 3 | Best Params: {'classifier__max_iter': 500, 'classifier__penalty': 'l1'} | Best Score: 0.8600
CV: 5 | Best Params: {'classifier__max_iter': 500, 'classifier__penalty': 'l1'} | Best Score: 0.8733
CV: 7 | Best Params: {'classifier__max_iter': 500, 'classifier__penalty': None} | Best Score: 0.9140

--- KNeighborsClassifier -

In [5]:
from sklearn.datasets import load_iris
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from scipy.stats import randint, uniform

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Define classifiers and parameter distributions for RandomizedSearchCV
classifiers = {
    "RandomForestClassifier": (
        RandomForestClassifier(random_state=42),
        {
            'classifier__n_estimators': randint(50, 200),
            'classifier__max_depth': randint(3, 10),
            'classifier__min_samples_split': randint(2, 10)
        }
    ),
    "LogisticRegression": (
        LogisticRegression(max_iter=2000),
        {
            'classifier__C': uniform(0.01, 10),
            'classifier__solver': ['lbfgs', 'liblinear'],
            'classifier__penalty': ['l2']
        }
    ),
    "Perceptron": (
        Perceptron(),
        {
            'classifier__penalty': ['l2', 'l1', None],
            'classifier__alpha': uniform(0.0001, 0.01),
            'classifier__max_iter': randint(500, 1500)
        }
    ),
    "KNeighborsClassifier": (
        KNeighborsClassifier(),
        {
            'classifier__n_neighbors': randint(3, 10),
            'classifier__weights': ['uniform', 'distance'],
            'classifier__p': [1, 2]  # p=1: Manhattan, p=2: Euclidean
        }
    )
}

# Perform RandomizedSearchCV for each classifier and CV fold
for name, (clf, param_dist) in classifiers.items():
    print(f"\n--- {name} ---")
    for cv_folds in [3, 5, 7]:
        pipeline = Pipeline([
            ('scaler', StandardScaler()),
            ('classifier', clf)
        ])
        random_search = RandomizedSearchCV(
            estimator=pipeline,
            param_distributions=param_dist,
            n_iter=10,  # Number of random combinations
            cv=cv_folds,
            random_state=42,
            n_jobs=-1
        )
        random_search.fit(X, y)
        print(f"CV: {cv_folds} | Best Params: {random_search.best_params_} | Best Score: {random_search.best_score_:.4f}")



--- RandomForestClassifier ---
CV: 3 | Best Params: {'classifier__max_depth': 9, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 142} | Best Score: 0.9667
CV: 5 | Best Params: {'classifier__max_depth': 9, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 142} | Best Score: 0.9667
CV: 7 | Best Params: {'classifier__max_depth': 9, 'classifier__min_samples_split': 4, 'classifier__n_estimators': 121} | Best Score: 0.9533

--- LogisticRegression ---
CV: 3 | Best Params: {'classifier__C': np.float64(5.996584841970366), 'classifier__penalty': 'l2', 'classifier__solver': 'lbfgs'} | Best Score: 0.9733
CV: 5 | Best Params: {'classifier__C': np.float64(5.996584841970366), 'classifier__penalty': 'l2', 'classifier__solver': 'lbfgs'} | Best Score: 0.9733
CV: 7 | Best Params: {'classifier__C': np.float64(3.7554011884736247), 'classifier__penalty': 'l2', 'classifier__solver': 'lbfgs'} | Best Score: 0.9666

--- Perceptron ---
CV: 3 | Best Params: {'classifier__alpha': np.

In [19]:
pd.read_csv("pd_speech_features.csv")
display(data.head())

Unnamed: 0,id,gender,PPE,DFA,RPDE,numPulses,numPeriodsPulses,meanPeriodPulses,stdDevPeriodPulses,locPctJitter,...,tqwt_kurtosisValue_dec_28,tqwt_kurtosisValue_dec_29,tqwt_kurtosisValue_dec_30,tqwt_kurtosisValue_dec_31,tqwt_kurtosisValue_dec_32,tqwt_kurtosisValue_dec_33,tqwt_kurtosisValue_dec_34,tqwt_kurtosisValue_dec_35,tqwt_kurtosisValue_dec_36,class
0,0,1,0.85247,0.71826,0.57227,240,239,0.008064,8.7e-05,0.00218,...,1.562,2.6445,3.8686,4.2105,5.1221,4.4625,2.6202,3.0004,18.9405,1
1,0,1,0.76686,0.69481,0.53966,234,233,0.008258,7.3e-05,0.00195,...,1.5589,3.6107,23.5155,14.1962,11.0261,9.5082,6.5245,6.3431,45.178,1
2,0,1,0.85083,0.67604,0.58982,232,231,0.00834,6e-05,0.00176,...,1.5643,2.3308,9.4959,10.7458,11.0177,4.8066,2.9199,3.1495,4.7666,1
3,1,0,0.41121,0.79672,0.59257,178,177,0.010858,0.000183,0.00419,...,3.7805,3.5664,5.2558,14.0403,4.2235,4.6857,4.846,6.265,4.0603,1
4,1,0,0.3279,0.79782,0.53028,236,235,0.008162,0.002669,0.00535,...,6.1727,5.8416,6.0805,5.7621,7.7817,11.6891,8.2103,5.0559,6.1164,1
