## Model Deployment using Streamlit on Heroku

### Want to build an End-to-End project using Machine Learning, this Notebook will help you do so!

[Link to GitHub Repo](https://github.com/Lokeshrathi/Deploying-a-Machine-Learning-Model)

[Link to My WebPage](https://heart-disease-ml-app.herokuapp.com/)

# Data Cleaning and Exploration

In [None]:
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set_style('whitegrid')
import warnings
warnings.filterwarnings('ignore')

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score,confusion_matrix,recall_score
from sklearn.model_selection import cross_val_score, StratifiedKFold, GridSearchCV
from xgboost import XGBClassifier

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('/kaggle/input/heart-disease-uci/heart.csv')
print(df.shape)
df.head()

- age- in years
- sex-(1 = male; 0 = female)
- cp- chest pain type
- trestbps- resting blood pressure (in mm Hg on admission to the hospital)
- chol- serum cholestoral in mg/dl
- fbs-(fasting blood sugar > 120 mg/dl) (1 = true; 0 = false)
- restecg-resting electrocardiographic results
- thalach-maximum heart rate achieved
- exang-exercise induced angina (1 = yes; 0 = no)
- oldpeak-ST depression induced by exercise relative to rest
- slope-the slope of the peak exercise ST segment
- ca-number of major vessels (0-3) colored by flourosopy
- thal- 3 = normal; 6 = fixed defect; 7 = reversable defect
- target- 1 or 0

In [None]:
df['ca'].value_counts()

In [None]:
plt.figure(figsize=(10,8))
sns.heatmap(df.corr(),annot=True,cmap='YlGnBu',fmt='.2f',linewidths=2)
#No much of correlation

## Exploratory Data Analysis

In [None]:
df['target'].value_counts()

In [None]:
sns.distplot(df['age'],color='Red',hist_kws={'alpha':1,"linewidth": 2},
             kde_kws={"color": "k", "lw": 3, "label": "KDE"})
#Most people age is from 40 to 60

In [None]:
fig,ax=plt.subplots(figsize=(24,12))
plt.subplot(1, 3, 1)
age_bins = [10,20,30,40,50,70,80]
df['age_bins'] = pd.cut(df['age'],bins = age_bins, duplicates='drop')
g1 = sns.countplot(data= df, x= 'age_bins',hue='target',palette='plasma',linewidth=3)
g1.set_title("Age vs Heart Disease")


plt.subplot(1, 3, 2)
cho_bins = [100,150,200,250,300,350,400,450]
df['bin_chol']=pd.cut(df['chol'], bins=cho_bins)
g2=sns.countplot(x='bin_chol',data=df,hue='target',palette='plasma',linewidth=3)
g2.set_title("Cholestoral vs Heart Disease")


plt.subplot(1, 3, 3)
thal_bins = [60,80,100,120,140,160,180,200,220]
df['bin_thal']=pd.cut(df['thalach'], bins=thal_bins)
g3=sns.countplot(x='bin_thal',data=df,hue='target',palette='plasma',linewidth=3)
g3.set_title("Thal vs Heart Disease")

In [None]:
sns.distplot(df['target'],kde= True)
df['target'].value_counts()

## we can see that our independent variable is Balanced.

In [None]:
df.columns

In [None]:
df.info()

In [None]:
#Conversion to categorical variables
df['sex']=df['sex'].astype('category')
df['cp']=df['cp'].astype('category')
df['fbs']=df['fbs'].astype('category')
df['restecg']=df['restecg'].astype('category')
df['exang']=df['exang'].astype('category')
df['slope']=df['slope'].astype('category')
df['ca']=df['ca'].astype('category')
df['thal']=df['thal'].astype('category')
df['target']=df['target'].astype('category')
df.dtypes


In [None]:
correlation = df.corr()
plt.figure(figsize=(12,10))
sns.heatmap(correlation,annot=True,cmap = 'Blues')

In [None]:
y = df['target']

In [None]:
df = pd.get_dummies(df, drop_first = True) ## Converting the categorical features so that the model learns in a better way
df.head()

In [None]:
X = df.drop('target_1', axis = 1)
X.head()

# Model Training

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state = 100)

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
lr = LogisticRegression()
lr.fit(x_train,y_train)
pred = lr.predict(x_test)
accuracy = accuracy_score(y_test,pred)
classification = classification_report(y_test,pred)
confusion_ = confusion_matrix(y_test,pred)
print(accuracy, classification,confusion_)

In [None]:
from sklearn.model_selection import GridSearchCV
penalty = ['l1','l2']
C = np.logspace(0, 4, 10)
hyperparameters = dict(C=C, penalty=penalty)
h_logmodel = GridSearchCV(lr, hyperparameters, cv=6, verbose=0)
best_logmodel=h_logmodel.fit(x_train,y_train)
print('Best Penalty:', best_logmodel.best_estimator_.get_params()['penalty'])
print('Best C:', best_logmodel.best_estimator_.get_params()['C'])

In [None]:
lr1 = LogisticRegression(penalty='l2',C = 1.0)
lr1.fit(x_train,y_train)
pred = lr1.predict(x_test)
accuracy = accuracy_score(y_test,pred)
classification = classification_report(y_test,pred)
confusion_ = confusion_matrix(y_test,pred)
print(accuracy)
print(confusion_)
print(classification)

### Use Pickle to save your model!

In [None]:
import pickle
pickle.dump(lr1,open('heart.pkl','wb'))

## Prediction

In [None]:
def predict_price(age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldspeak,slope,ca,thal):    

    x = np.zeros(40)
    x[0] = age
    x[1] = sex
    x[2] = cp
    x[3] = trestbps
    x[4] = chol
    x[5] = fbs
    x[6] = restecg
    x[7] = thalach
    x[8] = exang
    x[9] = oldspeak
    x[10] = slope
    x[11] = ca
    x[12] = thal

    #if loc_index >= 0:
        #   x[loc_index] = 1

    return lr.predict([x])[0]

predict_price(54,0,2,108,267,0,0,167,0,0.0,2,0,2)


In [None]:
pickle_in = open('heart.pkl','rb')
clf = pickle.load(pickle_in)

In [None]:
clf.predict(x_train)

# Visit this [GitHub Link](https://github.com/Lokeshrathi/Deploying-a-Machine-Learning-Model) for Deployment of the Model on Heroku

### Also pin down your doubts, if you have any!