# Takeway:

standard Adam most likely works best on CIFAR10 with Cohen architecture.

In [None]:
# magic commands, make python reimport modules when code is changed
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import sys

sys.path.append(".utilities/")

from utilities.download import download_sweep

# set pandas dataframe display options
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)

In [None]:
sweep_str = "ljroos-msc/knot-solver/odiafcc3"

In [None]:
import os

# make figure folder
if not os.path.exists("figures"):
    os.makedirs("figures")

SAVE_FOLDER = "csv-files"

SWEEPS = {"cifar10_hparams_tune": "nsn5yc39"}

full_dfs = {}
for dataset, sweep in SWEEPS.items():
    print(f"Downloading {dataset}")
    sweep_id = f"ljroos-msc/knot-solver/{sweep}"
    save_loc = os.path.join(SAVE_FOLDER, f"{dataset}_{sweep}.csv")
    _ = download_sweep(sweep_id, save_loc, override_existing=False)

    full_dfs[dataset] = pd.read_csv(save_loc)

In [None]:
# downloaded data from https://wandb.ai/ljroos-msc/mosaic/sweeps/w705aehx/table?workspace=user-luro
# not sure if link will work for others.
# The API is very slow to download.

# code to link different notebooks
df = full_dfs["cifar10_hparams_tune"]

In [None]:
# set up hyperparameter df
hparams = ["beta1", "beta2", "learning_rate", "weight_decay"]
hparam_df = df[hparams + ["val_loss", "val_accuracy", "test_accuracy"]].dropna()
hparam_df.head(20)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error


def decision_tree(cols, target, df=hparam_df):
    X = df[cols]
    y = df[target]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    model = DecisionTreeRegressor(min_samples_leaf=10)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    print(f"Decision Tree Mean Squared Error: {mse}")

    # predict baseline error as mean of y_train
    baseline = np.mean(y_train)
    baseline_mse = mean_squared_error(y_test, np.full_like(y_test, baseline))
    print(f"Baseline Mean Squared Error: {baseline_mse}")

    # report mean percentage errors
    # decision tree
    mpe = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
    print(f"Mean Percentage Error: {mpe}")

    # baseline
    mpe_baseline = np.mean(np.abs((y_test - baseline) / y_test)) * 100
    print(f"Baseline Mean Percentage Error: {mpe_baseline}")

    return model


tree_model = decision_tree(hparams, "val_loss", hparam_df)

In [None]:
# print nodes in tree_model
from sklearn.tree import export_text

r = export_text(tree_model, feature_names=hparams)
print(r)

In [None]:
# iterate through nodes in tree_model, and print the mean squared error for each node
# this is a measure of the variance in the data at each node

# find leaf index with lowest prediction value
preds = tree_model.predict(hparam_df[hparams])
appls = tree_model.apply(hparam_df[hparams])

min_pred_obs = np.argmin(tree_model.predict(hparam_df[hparams]))
print(f"min pred value: {preds[min_pred_obs]}")
min_apply_obs = appls[min_pred_obs]

print(min_pred_obs)
print(f"min node number: {min_apply_obs}")

# find indices with apply value equal to min_apply_obs
min_indices = np.where(appls == min_apply_obs)

# number of indices in leaf
print(f"num indices in min leaf: {len(min_indices[0])}")

# take average values of df at these indices
min_df = hparam_df.iloc[min_indices]

# print mean values of df at these indices
print(min_df.median())

In [None]:
# CIFAR10:

# min pred value: 0.6864680647850037
# 41
# min node number: 35
# num indices in min leaf: 15
# beta1                0.804736
# beta2                0.993898
# log_learning_rate   -5.205313
# log_weight_decay    -4.909954
# log_eps             -7.989710
# val_loss             0.690507
# dtype: float64

In [None]:
hparam_df

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(2, 2, figsize=(10, 10))

for a, hparam in zip(ax.flatten(), hparam_df.columns):
    if hparam in ["val_loss", "val_accuracy", "test_accuracy"]:
        continue
    for metric in ["val_loss", "val_accuracy", "test_accuracy"]:
        a.scatter(
            x=hparam_df[hparam], y=hparam_df[metric], label=metric, s=15, alpha=0.5
        )
        a.set_title(label=hparam)
    a.legend()
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1)
ax.scatter(
    x=hparam_df["learning_rate"],
    y=hparam_df["val_accuracy"],
    label="train",
    s=15,
    alpha=0.5,
)
ax.scatter(
    x=hparam_df["learning_rate"],
    y=hparam_df["test_accuracy"],
    label="test",
    s=15,
    alpha=0.5,
)
ax.legend()
ax.set_title("train / val accuracies")
ax.set_xlabel("learning rate")
ax.set_ylabel("accuracy")
plt.show()

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
import statsmodels.api as sm

# Define predictor columns and target column
predictor_columns = ["beta1", "beta2", "learning_rate", "weight_decay"]
target_column = "val_loss"

# Extract the predictors and target
X = hparam_df[predictor_columns]
y = hparam_df[target_column]

# Reset index to ensure alignment
X = X.reset_index(drop=True)
y = y.reset_index(drop=True)

# Generate polynomial features
poly = PolynomialFeatures(degree=2, include_bias=False, interaction_only=False)
X_poly = poly.fit_transform(X)

# Get feature names for the polynomial features
feature_names = poly.get_feature_names_out(predictor_columns)

# Create a DataFrame for the polynomial features
X_poly_df = pd.DataFrame(X_poly, columns=feature_names)

# Add a constant term for the intercept
X_poly_df = sm.add_constant(X_poly_df)

# Fit the OLS regression model
model = sm.OLS(y, X_poly_df).fit()

# Output the regression summary
print(model.summary())