In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LassoCV

In [None]:
md = pd.read_csv('medical_clean.csv')
print(md.shape)
print(md.info())

In [None]:
md.head(5).T

In [None]:
print(md.duplicated().sum())
print(md.isna().sum().sum())

In [None]:
md = md.replace({'No':0,'Yes':1})

In [None]:
mdreg = md[['Initial_days','Age','Gender','VitD_levels','Initial_admin','HighBlood','Stroke','Overweight'
            ,'Diabetes','Hyperlipidemia','BackPain','Anxiety','Asthma']].copy()

In [None]:
mdreg.info()

In [None]:
mdreg.head(3)

In [None]:
mdgender = pd.get_dummies(data=mdreg['Gender'])
mdreg.drop(columns='Gender', inplace=True)
mdreg.insert(2, 'gender_male', mdgender.Male.astype(int))
mdreg.insert(2, 'gender_female', mdgender.Female.astype(int))
mdreg.insert(2, 'gender_nonbinary', mdgender.Nonbinary.astype(int))

In [None]:
mdinitial_admit = pd.get_dummies(data=mdreg['Initial_admin'])
mdreg.drop(columns='Initial_admin', inplace=True)
mdreg.insert(5, 'admit_emerg', mdinitial_admit['Emergency Admission'].astype(int))
mdreg.insert(5, 'admit_observation', mdinitial_admit['Observation Admission'].astype(int))
mdreg.insert(5, 'admit_elective', mdinitial_admit['Elective Admission'].astype(int))

In [None]:
mdreg.to_csv('medical_209T2_data.csv', index=False)

In [None]:
plt.figure(figsize = (9, 9))
sns.heatmap(mdreg.corr(), annot = True)

In [None]:
mdreg.drop(columns = ['gender_male'], inplace = True)

In [None]:
plt.figure(figsize = (9, 9))
sns.heatmap(mdreg.corr(), annot = True)

In [None]:
y = mdreg.Initial_days
X = mdreg[['Age','gender_female','gender_nonbinary','VitD_levels',
           'admit_elective','admit_emerg','admit_observation','HighBlood','Stroke',
           'Overweight','Diabetes','Hyperlipidemia','BackPain',
           'Anxiety','Asthma']].assign(const=1)
X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.3, random_state=62)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
np.savetxt('X_train.csv', X_train, delimiter=',')
np.savetxt('y_train.csv', y_train, delimiter=',')
np.savetxt('X_test.csv', X_test, delimiter=',')
np.savetxt('y_test.csv', y_test, delimiter=',')

In [None]:
lasso_model = Lasso(alpha=0.1) 
lasso_model.fit(X_train, y_train)
y_pred = lasso_model.predict(X_test)
t_score = lasso_model.score(X_train, y_train)
mse = mean_squared_error(y_test, y_pred)
print(f'Lasso Training Score: {t_score}')
print(f"Mean Squared Error: {mse}")

print(f"Learned Coefficients: {lasso_model.coef_}")
print(f"Intercept: {lasso_model.intercept_}")

In [None]:
lasso_cv_model = LassoCV(cv=5, random_state=62)
lasso_cv_model.fit(X_train, y_train)
best_alpha = lasso_cv_model.alpha_
print(f'Optimal alpha: {best_alpha}')

In [None]:
lasso_model = Lasso(alpha=best_alpha) 
lasso_model.fit(X_train, y_train)
y_pred = lasso_model.predict(X_test)
t_score = lasso_model.score(X_train, y_train)
mse = mean_squared_error(y_test, y_pred)
print(f'Lasso Training Score: {t_score}')
print(f"Mean Squared Error: {mse}")
print(f"Learned Coefficients: {lasso_model.coef_}")
print(f"Intercept: {lasso_model.intercept_}")

In [None]:
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse:.4f}')
print(f'Root Mean Squared Error: {rmse:.4f}')
print(f'R-squared: {r2:.4f}')