# The purpose of this short notebook is to familiarize myself with importing models. I will:
1. Initialize new data (as could be collected from a user if I designed an interface)
2. Transform that data into the same normalized, regularized, one-hot-encoded format as the training data
3. Import the models
4. Make predictions on the new data

In [33]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import joblib

In [34]:
df = pd.read_csv("heart_failure_data.csv")
df = df.drop(["HeartDisease"], axis=1)

### Initialize new data

In [35]:
new_data = pd.DataFrame({
    "Age": [50],
    "Sex": ["F"],
    "ChestPainType": ["TA"],
    "RestingBP": [130],
    "Cholesterol": [240],
    "FastingBS": [1],
    "RestingECG": ["LVH"],
    "MaxHR": [170],
    "ExerciseAngina": ["N"],
    "Oldpeak": [1.5],
    "ST_Slope": ["Down"]
})

### Add new data to dataframe

In [36]:
# add new data to dataframe
display(df.tail())
df = pd.concat([new_data, df], ignore_index=True)
display(df.tail())

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
913,45,M,TA,110,264,0,Normal,132,N,1.2,Flat
914,68,M,ASY,144,193,1,Normal,141,N,3.4,Flat
915,57,M,ASY,130,131,0,Normal,115,Y,1.2,Flat
916,57,F,ATA,130,236,0,LVH,174,N,0.0,Flat
917,38,M,NAP,138,175,0,Normal,173,N,0.0,Up


Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
914,45,M,TA,110,264,0,Normal,132,N,1.2,Flat
915,68,M,ASY,144,193,1,Normal,141,N,3.4,Flat
916,57,M,ASY,130,131,0,Normal,115,Y,1.2,Flat
917,57,F,ATA,130,236,0,LVH,174,N,0.0,Flat
918,38,M,NAP,138,175,0,Normal,173,N,0.0,Up


### Augment features the same way I did on training data

In [37]:
numerical_features = df.select_dtypes(include=[np.number])
numerical_features = numerical_features.drop(["FastingBS"], axis=1)
continuous_feature_names = numerical_features.columns.tolist()

categorical_features = df.select_dtypes(include=[object])
categorical_feature_names = categorical_features.columns.to_list() + ["FastingBS"]

In [38]:
df2 = df.copy(deep=True)  # make a copy of the original data which we will modify

# Initialize the scalers
min_max_scaler = MinMaxScaler()
standard_scaler = StandardScaler()  # not clear this was required for 'Age', 'RestingBP', or, 'MaxHR' because those were already looking pretty close to Gaussian. Further normalization here is unlikely to hurt, however. A further investigation into normality with QQ-plots and the shapiro wilk test could be a future direction and dictate whether those features get StandardScaler applied to them

# Apply both scalers to each continuous variable
for feature in continuous_feature_names:
    # Perform MinMax scaling
    min_max_scaled_data = min_max_scaler.fit_transform(df2[[feature]])

    # Perform Standard scaling on the MinMax scaled data
    min_max_standard_scaled_data = standard_scaler.fit_transform(min_max_scaled_data)

    # Update the original DataFrame with the scaled data
    df2[feature] = min_max_standard_scaled_data.flatten()

display(df2.tail())

# one hot encoding of categorical variables
df2 = pd.get_dummies(df2, columns=categorical_feature_names, dtype=int)
display(df2.tail())

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
914,-0.902793,M,TA,-1.210863,0.596262,0,Normal,-0.190346,N,0.292764,Flat
915,1.538028,M,ASY,0.627554,-0.053484,1,Normal,0.163203,N,2.357328,Flat
916,0.370679,M,ASY,-0.129441,-0.620868,0,Normal,-0.858162,Y,0.292764,Flat
917,0.370679,F,ATA,-0.129441,0.340024,0,LVH,1.45955,N,-0.833362,Flat
918,-1.645652,M,NAP,0.303128,-0.218208,0,Normal,1.420267,N,-0.833362,Up


Unnamed: 0,Age,RestingBP,Cholesterol,MaxHR,Oldpeak,Sex_F,Sex_M,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,...,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ExerciseAngina_N,ExerciseAngina_Y,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up,FastingBS_0,FastingBS_1
914,-0.902793,-1.210863,0.596262,-0.190346,0.292764,0,1,0,0,0,...,0,1,0,1,0,0,1,0,1,0
915,1.538028,0.627554,-0.053484,0.163203,2.357328,0,1,1,0,0,...,0,1,0,1,0,0,1,0,0,1
916,0.370679,-0.129441,-0.620868,-0.858162,0.292764,0,1,1,0,0,...,0,1,0,0,1,0,1,0,1,0
917,0.370679,-0.129441,0.340024,1.45955,-0.833362,1,0,0,1,0,...,1,0,0,1,0,0,1,0,1,0
918,-1.645652,0.303128,-0.218208,1.420267,-0.833362,0,1,0,0,1,...,0,1,0,1,0,0,0,1,1,0


### Extract row I added

In [39]:
to_predict = df2.tail(1)  # get last row, keep as dataframe structure
display(to_predict)

Unnamed: 0,Age,RestingBP,Cholesterol,MaxHR,Oldpeak,Sex_F,Sex_M,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,...,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ExerciseAngina_N,ExerciseAngina_Y,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up,FastingBS_0,FastingBS_1
918,-1.645652,0.303128,-0.218208,1.420267,-0.833362,0,1,0,0,1,...,0,1,0,1,0,0,0,1,1,0


### Import models

In [40]:
# Load the models
gnb_model1 = joblib.load("saved models/gnb_model1.pkl")
logistic_regressor1 = joblib.load("saved models/logistic_regressor1.pkl")
# Repeat for each model

In [41]:
# Use the loaded models to make predictions
predictions1 = gnb_model1.predict(to_predict)
predictions2 = logistic_regressor1.predict(to_predict)
# Repeat for each model you want to use for predictions
print(predictions2, predictions1)

[0] [0]


# Conclusion:
- Great! this went well, now I can implement a separate interface to prompt a user for their data and have my model predict their state!