<a href="https://colab.research.google.com/github/megantoops/demo/blob/main/Quiz.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd

# 1. Load Iris dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Define classifiers and parameter distributions
classifiers = {
    'RandomForestClassifier': (
        RandomForestClassifier(random_state=42),
        {
            'classifier__n_estimators': [50, 100, 200],
            'classifier__max_depth': [None, 5, 10],
            'classifier__min_samples_split': [2, 5, 10]
        }
    ),
    'LogisticRegression': (
        LogisticRegression(max_iter=1000),
        {
            'classifier__C': [0.1, 1, 10],
            'classifier__penalty': ['l2'],
            'classifier__solver': ['lbfgs']
        }
    ),
    'Perceptron': (
        Perceptron(),
        {
            'classifier__penalty': [None, 'l2', 'l1', 'elasticnet'],
            'classifier__alpha': [0.0001, 0.001, 0.01]
        }
    ),
    'KNeighborsClassifier': (
        KNeighborsClassifier(),
        {
            'classifier__n_neighbors': [3, 5, 7],
            'classifier__weights': ['uniform', 'distance']
        }
    )
}

# 3. Cross-validation folds to test
cv_folds = [3, 5, 7]

# 4. Run RandomizedSearchCV for each classifier and CV fold
results = []

for clf_name, (clf, param_dist) in classifiers.items():
    for cv in cv_folds:
        pipe = Pipeline([
            ('scaler', StandardScaler()),
            ('pca', PCA(n_components=2)),
            ('classifier', clf)
        ])

        search = RandomizedSearchCV(pipe, param_distributions=param_dist, n_iter=5, cv=cv, random_state=42)
        search.fit(X_train, y_train)

        results.append({
            'Classifier': clf_name,
            'CV Folds': cv,
            'Best Params': search.best_params_,
            'Best CV Score': search.best_score_,
            'Test Score': search.score(X_test, y_test)
        })

# 5. Convert results to DataFrame and display
results_df = pd.DataFrame(results)
print(results_df)




                Classifier  CV Folds  \
0   RandomForestClassifier         3   
1   RandomForestClassifier         5   
2   RandomForestClassifier         7   
3       LogisticRegression         3   
4       LogisticRegression         5   
5       LogisticRegression         7   
6               Perceptron         3   
7               Perceptron         5   
8               Perceptron         7   
9     KNeighborsClassifier         3   
10    KNeighborsClassifier         5   
11    KNeighborsClassifier         7   

                                          Best Params  Best CV Score  \
0   {'classifier__n_estimators': 200, 'classifier_...       0.900000   
1   {'classifier__n_estimators': 100, 'classifier_...       0.916667   
2   {'classifier__n_estimators': 50, 'classifier__...       0.915966   
3   {'classifier__solver': 'lbfgs', 'classifier__p...       0.908333   
4   {'classifier__solver': 'lbfgs', 'classifier__p...       0.916667   
5   {'classifier__solver': 'lbfgs', 'classifier

In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd

from google.colab import files
files.upload()

import pandas as pd

data = pd.read_csv("my_custom_dataset.csv")
X = data.drop("is_default", axis=1)
y = data["is_default"]

# 2. Define classifiers and parameter distributions
classifiers = {
    'RandomForestClassifier': (
        RandomForestClassifier(random_state=42),
        {
            'classifier__n_estimators': [50, 100, 200],
            'classifier__max_depth': [None, 5, 10],
            'classifier__min_samples_split': [2, 5, 10]
        }
    ),
    'LogisticRegression': (
        LogisticRegression(max_iter=1000),
        {
            'classifier__C': [0.1, 1, 10],
            'classifier__penalty': ['l2'],
            'classifier__solver': ['lbfgs']
        }
    ),
    'Perceptron': (
        Perceptron(),
        {
            'classifier__penalty': [None, 'l2', 'l1', 'elasticnet'],
            'classifier__alpha': [0.0001, 0.001, 0.01]
        }
    ),
    'KNeighborsClassifier': (
        KNeighborsClassifier(),
        {
            'classifier__n_neighbors': [3, 5, 7],
            'classifier__weights': ['uniform', 'distance']
        }
    )
}

# 3. Cross-validation folds to test
cv_folds = [3, 5, 7]

# 4. Run RandomizedSearchCV for each classifier and CV fold
results = []

for clf_name, (clf, param_dist) in classifiers.items():
    for cv in cv_folds:
        pipe = Pipeline([
            ('scaler', StandardScaler()),
            ('pca', PCA(n_components=2)),
            ('classifier', clf)
        ])

        search = RandomizedSearchCV(pipe, param_distributions=param_dist, n_iter=5, cv=cv, random_state=42)
        search.fit(X_train, y_train)

        results.append({
            'Classifier': clf_name,
            'CV Folds': cv,
            'Best Params': search.best_params_,
            'Best CV Score': search.best_score_,
            'Test Score': search.score(X_test, y_test)
        })

# 5. Convert results to DataFrame and display
results_df = pd.DataFrame(results)
print(results_df)


Saving my_custom_dataset.csv to my_custom_dataset.csv




                Classifier  CV Folds  \
0   RandomForestClassifier         3   
1   RandomForestClassifier         5   
2   RandomForestClassifier         7   
3       LogisticRegression         3   
4       LogisticRegression         5   
5       LogisticRegression         7   
6               Perceptron         3   
7               Perceptron         5   
8               Perceptron         7   
9     KNeighborsClassifier         3   
10    KNeighborsClassifier         5   
11    KNeighborsClassifier         7   

                                          Best Params  Best CV Score  \
0   {'classifier__n_estimators': 200, 'classifier_...       0.900000   
1   {'classifier__n_estimators': 100, 'classifier_...       0.916667   
2   {'classifier__n_estimators': 50, 'classifier__...       0.915966   
3   {'classifier__solver': 'lbfgs', 'classifier__p...       0.908333   
4   {'classifier__solver': 'lbfgs', 'classifier__p...       0.916667   
5   {'classifier__solver': 'lbfgs', 'classifier

In [5]:
from google.colab import files
files.upload()



Saving my_custom_dataset.csv to my_custom_dataset (2).csv


In [11]:
import pandas as pd

data = pd.read_csv("my_custom_dataset.csv")
X = data.drop("is_default", axis=1)
y = data["is_default"]