<a href="https://colab.research.google.com/github/kenstars/FibonacciSpiralCV/blob/main/FibonacciSpiral.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
## Import all relevant libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Import standard libraries for data preprocessing
from sklearn import preprocessing
from sklearn.impute import SimpleImputer

#Import specific libraries for supervised learning
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

#Evaluation
from sklearn.metrics import accuracy_score, roc_curve, auc, ConfusionMatrixDisplay, RocCurveDisplay, precision_score, recall_score

# Visualizing the decision tree
from sklearn import tree

# Hyperparameter Tuning
from sklearn.model_selection import cross_val_score, GridSearchCV

#Standard Instruction
%matplotlib inline

#suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
#Load the data (CSV file) to the google colab
from google.colab import files
files=files.upload()

Saving titanic_cleaned.csv to titanic_cleaned.csv


In [3]:
#Load the data from the CSV file into the dataframe that we called df
df= pd.read_csv("titanic_cleaned.csv")

In [4]:
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.base import clone

class FibonacciSpiralSearchCV:
    def __init__(self, estimator, param_bounds, n_iter=50, scoring=None, cv=5, random_state=None):
        self.estimator = estimator
        self.param_bounds = param_bounds
        self.n_iter = n_iter
        self.scoring = scoring
        self.cv = cv
        self.random_state = random_state
        self.best_estimator_ = None
        self.best_params_ = None
        self.best_score_ = -np.inf

    def _generate_fibonacci_spiral(self):
        golden_angle = np.pi * (3 - np.sqrt(5))
        samples = []
        keys = list(self.param_bounds.keys())
        num_params = len(keys)

        for i in range(self.n_iter):
            theta = i * golden_angle
            r = np.sqrt(i / self.n_iter)
            x = r * np.cos(theta)
            y = r * np.sin(theta)

            # Normalize from [-1, 1] to [0, 1]
            coords = [(x + 1) / 2, (y + 1) / 2]

            param_sample = {}
            for j, key in enumerate(keys):
                low, high = self.param_bounds[key]
                if isinstance(low, int) and isinstance(high, int):
                    value = int(low + coords[j % 2] * (high - low))
                else:
                    value = low + coords[j % 2] * (high - low)
                param_sample[key] = value

            samples.append(param_sample)

        return samples

    def fit(self, X, y=None, **fit_params):
        sampled_params = self._generate_fibonacci_spiral()

        for params in sampled_params:
            model = clone(self.estimator).set_params(**params)
            scores = cross_val_score(model, X, y, scoring=self.scoring, cv=self.cv)
            score = np.mean(scores)
            if score > self.best_score_:
                self.best_score_ = score
                self.best_params_ = params
                self.best_estimator_ = model

        self.best_estimator_.fit(X, y, **fit_params)
        return self

    def predict(self, X):
        return self.best_estimator_.predict(X)

    def score(self, X, y):
        return self.best_estimator_.score(X, y)


In [5]:
import time
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

#  ← your class file must be importable

# ---------------------------------------------------------------
# 2. Load & preprocess Titanic data
# ---------------------------------------------------------------
cols_needed = ['Pclass', 'Sex', 'Age', 'Embarked',
               'Cabin_ind', 'New_fare', 'Family_Cnt']

# Feature engineering identical to previous example
titanic = df
df = titanic[['Survived'] + cols_needed].copy()

df['Age'] = df['Age'].fillna(df['Age'].median())
df = df.dropna(subset=['Embarked'])

for col in ['Sex', 'Embarked']:
    df[col] = LabelEncoder().fit_transform(df[col])

X = df[cols_needed]
y = df['Survived']

In [7]:
import time, numpy as np, pandas as pd
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

rskf = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=42)

param_grid = {           # same grid as before
    "n_estimators": [50,100,150,200,300],
    "max_depth":    [3,5,7,9,None]
}
param_bounds = {         # same bounds as before
    "n_estimators": [50,300],
    "max_depth":    [3,10]
}
n_iter_fib = 25

records = []
iterations_count = 0
for fold, (train_idx, valid_idx) in enumerate(rskf.split(X, y), 1):
    iterations_count += 1
    print("Count of Value : ",iterations_count)
    X_tr, X_val = X.iloc[train_idx], X.iloc[valid_idx]
    y_tr, y_val = y.iloc[train_idx], y.iloc[valid_idx]

    # ---------- GridSearchCV ----------
    gs = GridSearchCV(RandomForestClassifier(random_state=42, n_jobs=-1),
                      param_grid, scoring="accuracy", cv=5, n_jobs=-1)
    t0 = time.time()
    gs.fit(X_tr, y_tr)
    gs_time = time.time() - t0
    pred = gs.best_estimator_.predict(X_val)

    records.append({
        "fold": fold, "method": "grid",
        "acc": accuracy_score(y_val, pred),
        "f1":  f1_score(y_val, pred, average="macro"),
        "auc": roc_auc_score(y_val, pred),
        "time": gs_time
    })

    # ---------- Fibonacci Spiral ----------
    fib = FibonacciSpiralSearchCV(RandomForestClassifier(random_state=42, n_jobs=-1),
                                  param_bounds, n_iter=n_iter_fib,
                                  scoring="accuracy", cv=5)
    t0 = time.time()
    fib.fit(X_tr, y_tr)
    fib_time = time.time() - t0
    pred = fib.best_estimator_.predict(X_val)

    records.append({
        "fold": fold, "method": "fib",
        "acc": accuracy_score(y_val, pred),
        "f1":  f1_score(y_val, pred, average="macro"),
        "auc": roc_auc_score(y_val, pred),
        "time": fib_time
    })

df = pd.DataFrame(records)





Count of Value :  1
Count of Value :  2
Count of Value :  3
Count of Value :  4
Count of Value :  5
Count of Value :  6
Count of Value :  7
Count of Value :  8
Count of Value :  9
Count of Value :  10
