In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, root_mean_squared_error, make_scorer

In [2]:
# Load data
df = pd.read_csv("/Users/mariahloehr/IICD/IICD/Data/top40_cell_cycle.csv")

# Separate features and target
X = df.drop(columns=['phase', 'age'])
y = df['age']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=949)

In [3]:
# Define MLP regressor
mlp = MLPRegressor(max_iter=1000, random_state=949)

In [6]:
# Hyperparameter tuning
param_grid = {
    'hidden_layer_sizes': [
        (150,),
        (300,),         # 1 hidden layer
        (500,),        # 1 hidden layer
        (50,),        # 1 hidden layer
        (300, 150),     # 2 hidden layers
        (150, 75),     # 2 hidden layers
        (300, 150, 75), # 3 hidden layers
        (300,300,300)   # 3 hidden layers
    ]
}

# GridSearchCV using RMSE as scoring

grid_search = GridSearchCV(
    estimator=mlp,
    param_grid=param_grid,
    cv=10,
    scoring= 'neg_root_mean_squared_error',
    n_jobs=-1
)

# Fit model
grid_search.fit(X_train, y_train)


# Best parameters and R² score
print("Best parameters:", grid_search.best_params_)


Best parameters: {'hidden_layer_sizes': (500,)}


In [7]:
# Convert the cv_results_ dictionary to a DataFrame
results_df = pd.DataFrame(grid_search.cv_results_)

# Select and display relevant columns
print(
    results_df[
        [
            'param_hidden_layer_sizes',
            'mean_test_score',
            'std_test_score',
            'rank_test_score'
        ]
    ].sort_values(by='rank_test_score')
)

  param_hidden_layer_sizes  mean_test_score  std_test_score  rank_test_score
2                   (500,)        -1.541698        0.122010                1
6           (300, 150, 75)        -1.573757        0.133069                2
7          (300, 300, 300)        -1.575272        0.163491                3
1                   (300,)        -1.580438        0.131399                4
3                    (50,)        -1.593197        0.141743                5
5                (150, 75)        -1.596964        0.117773                6
4               (300, 150)        -1.611681        0.136503                7
0                   (150,)        -1.618588        0.178456                8


In [8]:
# Retrain with best estimator
best_mlp = grid_search.best_estimator_
best_mlp.fit(X_train, y_train)

# Predictions
y_train_pred = best_mlp.predict(X_train)
y_test_pred = best_mlp.predict(X_test)

LOCO

In [3]:
import sys
sys.path.append("/Users/mariahloehr/IICD/IICD/feature_importance")

In [4]:
import locomp
from locomp import *
from locomp.MLmodels import *
import itertools
import importlib
from sklearn.base import BaseEstimator, RegressorMixin, clone
import itertools
from functools import partial
import multiprocessing as mp
import re

In [None]:
# define fit_func
def MLPreg(X,Y,X1):
    mlp = MLPRegressor(max_iter=1000, random_state=949, hidden_layer_sizes = ).fit(X,Y)
    return mlp.predict(X1)

In [None]:
J1 = 0
J2 = 1
m_ratio = 0.2
n_ratio = 0.2
B = 5000
fit_func = MLPreg

In [None]:
x=LOCOMPReg(X_train,y_train,n_ratio,m_ratio,B,fit_func, selected_features=[],alpha=0.1,bonf=False)
x.run_loco()
#x.loco_ci