In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings

warnings.filterwarnings("ignore")

# Load the data
df = pd.read_csv('../Hba1cData/fact_visits_final_rev01.csv')

# Select relevant columns
df = df[['patient_id', 'visited_date', 'sugar', 'hba1c']]

# Convert visited_date to datetime 
df['visited_date'] = pd.to_datetime(df['visited_date'])


# Extracting year, month, and day from visited_date
df['year'] = df['visited_date'].dt.year
df['month'] = df['visited_date'].dt.month
df['day'] = df['visited_date'].dt.day

# Dropping the original visited_date column
df = df.drop(columns=['visited_date'])

# Defining features and target
X = df.drop(columns=['hba1c'])
y = df['hba1c']

# Splitting the data
train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.25, random_state=42)

# Defining the model
model = ElasticNet()

# Defining the parameters grid
param_grid = {
    'alpha': [0.1, 0.5, 1.0, 5.0],
    'l1_ratio': [0.1, 0.5, 0.7, 0.9]
}

# Initializing GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1, verbose=2)

# Fitting GridSearchCV
grid_search.fit(train_x, train_y)

# Getting the best parameters
best_params = grid_search.best_params_
print("Best parameters found: ", best_params)

# Training the model with the best parameters
best_model = grid_search.best_estimator_

# Predicting on the test set
predicted_values = best_model.predict(test_x)

# Evaluating the model
rmse = np.sqrt(mean_squared_error(test_y, predicted_values))
mae = mean_absolute_error(test_y, predicted_values)
r2 = r2_score(test_y, predicted_values)

print(f"Best ElasticNet model (alpha={best_params['alpha']}, l1_ratio={best_params['l1_ratio']}):")
print(f"  RMSE: {rmse}")
print(f"  MAE: {mae}")
print(f"  R2: {r2}")


Fitting 5 folds for each of 16 candidates, totalling 80 fits
Best parameters found:  {'alpha': 0.5, 'l1_ratio': 0.7}
Best ElasticNet model (alpha=0.5, l1_ratio=0.7):
  RMSE: 2.0109321397445847
  MAE: 1.6713187671935066
  R2: 0.4214615482514946
