In [45]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, recall_score, precision_recall_fscore_support, f1_score, precision_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
import pickle

In [46]:
## load pickle file and model_selection

with open("pickle/robust_scaler.pkl", "rb") as f:
    transformer = pickle.load(f)

with open("pickle/label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

with open("pickle/onehot_encoder.pkl", "rb") as f:
    onehot_encoder = pickle.load(f)

with open("models/model_1.pkl", "rb") as file:
    model_1 = pickle.load(file)


In [47]:
input_data = {
    'Pregnancies' : 1,
    'Glucose' : 85,
	'BloodPressure' : 66,
	'SkinThickness' : 29,
    'Insulin' : 99,
    'BMI' : 26.6,
    'DiabetesPedigreeFunction' : 0.34,
    'Age' : 31
}


In [48]:
# convert to df... first convert to numerical if you have encoders
input_df = pd.DataFrame([input_data])
input_df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,1,85,66,29,99,26.6,0.34,31


In [49]:
def categorize_bmi(bmi):
    if bmi < 18.5:
        return "Underweight"
    elif bmi <= 25:
        return "Normal"
    elif bmi <= 30:
        return "Overweight"
    elif bmi <= 35:
        return "Obesity I"
    elif bmi <= 40:
        return "Obesity II"
    else:
        return "Obesity III"

def categorize_glucose(glucose):
    if glucose < 70:
        return "Low Glucose"
    elif glucose <= 99:
        return "Normal"
    elif glucose <= 125:
        return "Prediabetic"
    else:
        return "High Glucose"

def insulin_score(insulin):
    if 16 <= insulin <= 165:
        return "Normal"
    else:
        return "Abnormal"


In [50]:
input_df["Insulin"] = input_df["Insulin"].apply(insulin_score)
input_df["Glucose"] = input_df["Glucose"].apply(categorize_glucose)
input_df["BMI"] = input_df["BMI"].apply(categorize_bmi)
input_df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,1,Normal,66,29,Normal,Overweight,0.34,31


In [51]:
input_df["Insulin"] = label_encoder.transform(input_df["Insulin"])

In [52]:
input_cat = onehot_encoder.transform(input_df[["BMI","Glucose"]])


In [53]:
input_cat_columns = onehot_encoder.get_feature_names_out(["BMI", "Glucose"])
# converting into a adataframe
input_cat_encoded = pd.DataFrame(input_cat.toarray(),columns= input_cat_columns)


In [54]:
input_df= input_df.drop(['Glucose','BMI'], axis =1 )



In [55]:
cols = input_df.columns
input_df_index = input_df.index

input_df_scaled=transformer.transform(input_df)
input_scaled=pd.DataFrame(input_df_scaled, columns = cols, index = input_df_index)
input_final = pd.concat([input_scaled, input_cat_encoded], axis=1)
#X_test_concat = pd.concat([X_test, X_test_encoded], axis=1)



In [56]:
input_final

Unnamed: 0,Pregnancies,BloodPressure,SkinThickness,Insulin,DiabetesPedigreeFunction,Age,BMI_Normal,BMI_Obesity I,BMI_Obesity II,BMI_Obesity III,BMI_Overweight,BMI_Underweight,Glucose_High Glucose,Glucose_Low Glucose,Glucose_Normal,Glucose_Prediabetic
0,-0.679675,-0.447389,-0.166667,0.0,-0.115465,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0


In [57]:
# prediction
prediction = model_1.predict(input_final)
prediction

array([0])

In [None]:
prediction.dtype

In [58]:
prediction_proba = model_1.predict_proba(input_final)
prediction_proba

array([[0.94745822, 0.05254178]])