## Load Model and Prediction

Kernel -> Restart Kernel..

In [1]:
import pickle;
import pandas as pd 

with open('knn_isSmoker.pkl', 'rb') as file:
    # Load the data from the file
    model, smoker_encoder, region_encoder ,sex_encoder = pickle.load(file)

### New data

In [2]:
# age sex	bmi	children	region	charges

x_new =  pd.DataFrame() 
x_new['age'] = [25]
x_new['sex'] = ['male']
x_new['bmi'] = [22]
x_new['children'] = [2]
x_new['region'] = ['northwest']
x_new['charges'] = [4440.46200]

### Categorical Data Encoding

In [3]:
x_new['sex'] = sex_encoder.transform(x_new['sex'])
x_new['region'] = region_encoder.transform(x_new['region'])

### Predicting

In [4]:
y_pred_new = model.predict(x_new)

result = smoker_encoder.inverse_transform(y_pred_new) 

print('Predicted is smokers: ', result)

Predicted is smokers:  ['no']


## Create predict_knn_smoker.py for Running with streamlit

**Modify Input widgets**

https://docs.streamlit.io/library/api-reference

Run: streamlit run predict_knn_smoker.py

In [5]:
%%writefile predict_knn_smoker.py

import streamlit as st
# import numpy as np 
import pandas as pd 
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
import pickle

#Load model
with open('knn_isSmoker.pkl', 'rb') as file:
    # Load the data from the file
    model, smoker_encoder, region_encoder ,sex_encoder = pickle.load(file)

st.title("Smoker Prediction")

# ['female' 'male']
# ['southwest' 'southeast' 'northwest' 'northeast']
# ['yes' 'no']

# Get user input for each variable
sex_input = st.selectbox('Sex:', ['female', 'male'])
region_input = st.selectbox('Region:', ['southwest', 'southeast','northwest','northeast'])
age_input = st.number_input('Age (18 to 64):', min_value=18, max_value=64)
bmi_input = st.number_input('BMI (15 to 54):', min_value=15, max_value=54)
children_input = st.number_input('Children (0 to 6):', min_value=0, max_value=6)
charges_input = st.number_input('Charges (1000 to 70,000):', min_value=1000, max_value=70000)

# Index(['age', 'sex', 'bmi', 'children', 'smoker', 'region', 'charges'], dtype='object')

# Create a DataFrame with user input
x_new = pd.DataFrame({
    'age': [age_input],
    'sex': [sex_input],
    'bmi': [bmi_input],
    'children': [children_input],
    'region': [region_input],
    'charges': [charges_input]
})

# Encoding
x_new['sex'] = sex_encoder.transform(x_new['sex'])
x_new['region'] = region_encoder.transform(x_new['region'])

# Prediction
y_pred_new = model.predict(x_new)
result = smoker_encoder.inverse_transform(y_pred_new)

# Display result
st.subheader('Prediction Result:')
st.write(f'Predicted Smoker: {result[0]}')

Overwriting predict_knn_smoker.py


## Deploy on Streamlit Sharing

https://github.com/study-in-sit/smoker-classification-predict

https://smoker-classification-predict-2zyvpn4smpord3cjbzm4uf.streamlit.app

In [6]:
%%writefile requirements.txt

streamlit 
numpy 
scikit-learn 
joblib

Overwriting requirements.txt


![alt text](app.png "App Prediction")