# Basic ML Model Deployment

## Import libraries

In [51]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import KNNImputer,SimpleImputer
from sklearn.preprocessing import StandardScaler
import pickle

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor

## Fetch Data

In [52]:
data=pd.read_csv('https://raw.githubusercontent.com/tkseneee/Dataset/master/Loan_data_ver2.csv')
#data=pd.read_csv('Loan_data_ver2.csv')

## Explore Data

In [53]:
data.shape

(614, 6)

In [54]:
data.dtypes

Married             object
Education           object
ApplicantIncome      int64
LoanAmount         float64
Credit_History     float64
Loan_Status        float64
dtype: object

In [55]:
data.head(2)

Unnamed: 0,Married,Education,ApplicantIncome,LoanAmount,Credit_History,Loan_Status
0,No,Graduate,5849,,1.0,0.1
1,Yes,Graduate,4583,128.0,1.0,0.32


In [56]:
# fetch features with missing values
data.isnull().sum()

Married             3
Education           0
ApplicantIncome     0
LoanAmount         22
Credit_History     50
Loan_Status         0
dtype: int64

3 features namely - Married,LoanAmount,Credit_History has missing values

In [57]:
data['Married'].value_counts()

Yes    398
No     213
Name: Married, dtype: int64

In [58]:
data['Education'].value_counts()

Graduate        449
Not Graduate    127
HSC              38
Name: Education, dtype: int64

In [59]:
# segreegating target & feature
X=data.drop('Loan_Status', axis=1)
y=data['Loan_Status']

In [60]:
# spliting data into train & validation set
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=48)

In [61]:
# fetching numeric features list
feat_num=list(X.select_dtypes(include=np.number).columns)


In [62]:
# fetching categorical features  list
feat_cat=list(X.select_dtypes(exclude=np.number).columns)

In [63]:
feat_cat

['Married', 'Education']

## Defining Data processing & Modeling  Pipeline

In [64]:
#  pipeline for numeric atures -missing values replacement using k-Nearest Neighbors follwed by StandardScaler() 
num_pipe=Pipeline([('imputer',KNNImputer()),('std_scale',StandardScaler())])



In [65]:
# pipeline for categorical faetures - missing category replacement by new category i.e. missing followed by one hot encoding 
feat_pipe = Pipeline([('imputer',SimpleImputer(strategy='constant', fill_value='Missing')), 
                      ('one_hot',(OneHotEncoder()))]) 



In [66]:
#combine data processing pipeline
data_pipeline=ColumnTransformer([('numeric',num_pipe,feat_num),
                                 ('categorical',feat_pipe, feat_cat)],
                                remainder='passthrough')



In [67]:
data_pipeline

ColumnTransformer(remainder='passthrough',
                  transformers=[('numeric',
                                 Pipeline(steps=[('imputer', KNNImputer()),
                                                 ('std_scale',
                                                  StandardScaler())]),
                                 ['ApplicantIncome', 'LoanAmount',
                                  'Credit_History']),
                                ('categorical',
                                 Pipeline(steps=[('imputer',
                                                  SimpleImputer(fill_value='Missing',
                                                                strategy='constant')),
                                                 ('one_hot', OneHotEncoder())]),
                                 ['Married', 'Education'])])

In [68]:
# adding ml-model into pipeline 
full_pipe=Pipeline([('pre_process',data_pipeline),('model',RandomForestRegressor())])

In [69]:
# training
full_pipe.fit(X_train,y_train)

Pipeline(steps=[('pre_process',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('numeric',
                                                  Pipeline(steps=[('imputer',
                                                                   KNNImputer()),
                                                                  ('std_scale',
                                                                   StandardScaler())]),
                                                  ['ApplicantIncome',
                                                   'LoanAmount',
                                                   'Credit_History']),
                                                 ('categorical',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(fill_value='Missing',
                                                                   

In [70]:
# prediction
full_pipe.predict(X_test)

array([0.1341, 0.214 , 0.5507, 0.311 , 0.1687, 0.1142, 0.9625, 0.2565,
       0.2472, 0.2655, 0.2126, 0.977 , 0.2706, 0.1569, 0.0731, 0.1204,
       0.0911, 0.0327, 0.0556, 0.2247, 0.3192, 0.0482, 0.2708, 0.4986,
       0.5876, 0.1423, 0.2672, 0.2303, 0.2351, 0.0192, 0.0971, 0.2294,
       0.3835, 0.9513, 0.1284, 0.2733, 0.2197, 0.5964, 0.9553, 0.1976,
       0.316 , 0.145 , 0.3243, 0.3419, 0.4469, 0.5375, 0.1106, 0.1745,
       0.2365, 0.1827, 0.1125, 0.3796, 0.0549, 0.0803, 0.9613, 0.0111,
       0.4537, 0.3533, 0.245 , 0.2095, 0.3301, 0.844 , 0.2716, 0.1409,
       0.4799, 0.2333, 0.0836, 0.2978, 0.166 , 0.2421, 0.3945, 0.1787,
       0.3264, 0.3351, 0.0769, 0.8407, 0.0829, 0.519 , 0.5793, 0.0388,
       0.293 , 0.1   , 0.3379, 0.7805, 0.4368, 0.1467, 0.1801, 0.1013,
       0.063 , 0.483 , 0.0392, 0.1889, 0.2732, 0.5398, 0.3955, 0.1925,
       0.9677, 0.1085, 0.0889, 0.3629, 0.5031, 0.0606, 0.008 , 0.0511,
       0.0716, 0.0511, 0.3168, 0.2179, 0.0482, 0.1415, 0.1736, 0.1034,
      

In [71]:
## can store numeric and categorical variables also as pickle file
pickle.dump(feat_num,open('feat_numv1','wb'))
pickle.dump(feat_cat,open('feat_catv1','wb'))

 

## Store the model as pickle file 

In [72]:
pickle.dump(full_pipe,open('full_pipeline','wb'))