In [61]:
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [40]:
hosp = pd.read_csv('LengthOfStay.csv', sep=',', low_memory=False)

In [41]:
hosp = pd.get_dummies(hosp, columns=['vdate', 'rcount', 'gender', 'dialysisrenalendstage', 'asthma', 'irondef', 'pneum', 'substancedependence', 'psychologicaldisordermajor', 'depress', 'psychother', 'fibrosisandother', 'malnutrition', 'hemo', 'discharged'])

In [49]:
hosp = pd.get_dummies(hosp, columns=['facid'])

In [42]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [43]:
hosp['eid'] = hosp['eid'].astype(float)
hosp['hematocrit'] = hosp['hematocrit'].str.replace(',', '')
hosp['hematocrit'] = hosp['hematocrit'].astype(float)
hosp[['eid', 'hematocrit']] = scaler.fit_transform(hosp[['eid', 'hematocrit']])

In [44]:
hosp['neutrophils'] = hosp['neutrophils'].astype(float)
hosp['sodium'] = hosp['sodium'].str.replace(',', '')
hosp['sodium'] = hosp['sodium'].astype(float)
hosp[['neutrophils', 'sodium']] = scaler.fit_transform(hosp[['neutrophils', 'sodium']])

In [45]:
hosp['bloodureanitro'] = hosp['bloodureanitro'].str.replace(',', '')
hosp['bloodureanitro'] = hosp['bloodureanitro'].astype(float)
hosp['glucose'] = hosp['glucose'].astype(float)
hosp[['glucose', 'bloodureanitro']] = scaler.fit_transform(hosp[['glucose', 'bloodureanitro']])

In [46]:
hosp[['creatinine', 'bmi']] = scaler.fit_transform(hosp[['creatinine', 'bmi']])

In [47]:
hosp[['pulse', 'respiration']] = scaler.fit_transform(hosp[['pulse', 'respiration']])

In [50]:
hosp[['secondarydiagnosisnonicd9','lengthofstay']] = scaler.fit_transform(hosp[['secondarydiagnosisnonicd9','lengthofstay']])

In [51]:
X_train, X_test, y_train, y_test = train_test_split(hosp.drop('lengthofstay', axis=1), hosp['lengthofstay'], test_size=0.2, random_state=42)

In [64]:
reg = HistGradientBoostingRegressor(learning_rate=0.1, max_iter=100, max_depth=3)
reg.fit(X_train, y_train)

In [65]:
y_pred = reg.predict(X_test)

In [66]:
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.06951247382873643


In [67]:
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)

print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'MAE: {mae}')

MSE: 0.06951247382873643
RMSE: 0.2636521834325224
MAE: 0.1975993100043929


In [68]:
import pickle
with open('hospital_model.pkl', 'wb') as file:
    pickle.dump(reg, file)