# The purpose of this short notebook is to familiarize myself with importing models. I will:
1. Initialize new data (as could be collected from a user if I designed an interface)
2. Transform that data into the same normalized, regularized, one-hot-encoded format as the training data
3. Import the models
4. Make predictions on the new data

In [38]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import joblib

In [39]:
df = pd.read_csv("heart_failure_data.csv")

# TEMPORARY TESTING STUFF
positive = df[df["HeartDisease"] == 1]
display(positive.loc[913])
new_data = positive.drop(["HeartDisease"], axis=1)
# display(new_data.loc[913])
# END TEMP TESTING

df = df.drop(["HeartDisease"], axis=1)

Age                   45
Sex                    M
ChestPainType         TA
RestingBP            110
Cholesterol          264
FastingBS              0
RestingECG        Normal
MaxHR                132
ExerciseAngina         N
Oldpeak              1.2
ST_Slope            Flat
HeartDisease           1
Name: 913, dtype: object

### Initialize new data

In [None]:

new_data = pd.Series({
    "Age": [45],
    "Sex": ["M"],
    "ChestPainType": ["TA"],
    "RestingBP": [110],
    "Cholesterol": [264],
    "FastingBS": [0],
    "RestingECG": ["Normal"],
    "MaxHR": [132],
    "ExerciseAngina": ["N"],
    "Oldpeak": [1.2],
    "ST_Slope": ["Flat"]
})
# wait a second I can't seem to come up with data that has it

### Add new data to dataframe

In [None]:
# add new data to dataframe
display(df.tail())
# df = pd.concat([new_data, df], ignore_index=True)
df.loc[len(df)] = new_data
display(df.tail())

### Augment features the same way I did on training data

In [None]:
numerical_features = df.select_dtypes(include=[np.number])
numerical_features = numerical_features.drop(["FastingBS"], axis=1)
continuous_feature_names = numerical_features.columns.tolist()

categorical_features = df.select_dtypes(include=[object])
categorical_feature_names = categorical_features.columns.to_list() + ["FastingBS"]

KeyError: "['FastingBS'] not found in axis"

In [None]:
df2 = df.copy(deep=True)  # make a copy of the original data which we will modify

# Initialize the scalers
min_max_scaler = MinMaxScaler()
standard_scaler = StandardScaler()  # not clear this was required for 'Age', 'RestingBP', or, 'MaxHR' because those were already looking pretty close to Gaussian. Further normalization here is unlikely to hurt, however. A further investigation into normality with QQ-plots and the shapiro wilk test could be a future direction and dictate whether those features get StandardScaler applied to them

# Apply both scalers to each continuous variable
for feature in continuous_feature_names:
    # Perform MinMax scaling
    min_max_scaled_data = min_max_scaler.fit_transform(df2[[feature]])

    # Perform Standard scaling on the MinMax scaled data
    min_max_standard_scaled_data = standard_scaler.fit_transform(min_max_scaled_data)

    # Update the original DataFrame with the scaled data
    df2[feature] = min_max_standard_scaled_data.flatten()

display(df2.tail())

# one hot encoding of categorical variables
df2 = pd.get_dummies(df2, columns=categorical_feature_names, dtype=int)
display(df2.tail())

### Extract row I added

In [None]:
to_predict = df2.tail(1)  # get last row, keep as dataframe structure
display(to_predict)

### Import models

In [None]:
# Load the models
gnb_model1 = joblib.load("saved models/gnb_model1.pkl")
logistic_regressor1 = joblib.load("saved models/logistic_regressor1.pkl")
# Repeat for each model

In [None]:
# Use the loaded models to make predictions
predictions1 = gnb_model1.predict(to_predict)
predictions2 = logistic_regressor1.predict(to_predict)
# Repeat for each model you want to use for predictions
print(predictions2, predictions1)

# Conclusion:
- Great! this went well, now I can implement a separate interface to prompt a user for their data and have my model predict their state!