# Basic ML Model Deployment

## Import libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import KNNImputer,SimpleImputer
from sklearn.preprocessing import StandardScaler
import pickle

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor

## Fetch Data

In [2]:
data=pd.read_csv('https://raw.githubusercontent.com/tkseneee/Dataset/master/Loan_data_ver2.csv')
#data=pd.read_csv('Loan_data_ver2.csv')

## Explore Data

In [3]:
data.shape

(614, 6)

In [4]:
data.dtypes

Married             object
Education           object
ApplicantIncome      int64
LoanAmount         float64
Credit_History     float64
Loan_Status        float64
dtype: object

In [5]:
data.head(2)

Unnamed: 0,Married,Education,ApplicantIncome,LoanAmount,Credit_History,Loan_Status
0,No,Graduate,5849,,1.0,0.1
1,Yes,Graduate,4583,128.0,1.0,0.32


In [6]:
# fetch features with missing values
data.isnull().sum()

Married             3
Education           0
ApplicantIncome     0
LoanAmount         22
Credit_History     50
Loan_Status         0
dtype: int64

3 features namely - Married,LoanAmount,Credit_History has missing values

In [7]:
data['Married'].value_counts()

Married
Yes    398
No     213
Name: count, dtype: int64

In [8]:
data['Education'].value_counts()

Education
Graduate        449
Not Graduate    127
HSC              38
Name: count, dtype: int64

In [9]:
# segreegating target & feature
X=data.drop('Loan_Status', axis=1)
y=data['Loan_Status']

In [10]:
# spliting data into train & validation set
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=48)

In [11]:
# fetching numeric features list
feat_num=list(X.select_dtypes(include=np.number).columns)


In [12]:
# fetching categorical features  list
feat_cat=list(X.select_dtypes(exclude=np.number).columns)

In [13]:
feat_cat

['Married', 'Education']

## Defining Data processing & Modeling  Pipeline

In [14]:
#  pipeline for numeric atures -missing values replacement using k-Nearest Neighbors follwed by StandardScaler() 
num_pipe=Pipeline([('imputer',KNNImputer()),('std_scale',StandardScaler())])



In [15]:
# pipeline for categorical faetures - missing category replacement by new category i.e. missing followed by one hot encoding 
feat_pipe = Pipeline([('imputer',SimpleImputer(strategy='constant', fill_value='Missing')), 
                      ('one_hot',(OneHotEncoder()))]) 



In [16]:
#combine data processing pipeline
data_pipeline=ColumnTransformer([('numeric',num_pipe,feat_num),
                                 ('categorical',feat_pipe, feat_cat)],
                                remainder='passthrough')



In [17]:
data_pipeline

In [18]:
# adding ml-model into pipeline 
full_pipe=Pipeline([('pre_process',data_pipeline),('model',RandomForestRegressor())])

In [19]:
# training
full_pipe.fit(X_train,y_train)

In [20]:
# prediction
full_pipe.predict(X_test)

array([0.1784, 0.1837, 0.567 , 0.383 , 0.1549, 0.0885, 0.9635, 0.258 ,
       0.2592, 0.2751, 0.2189, 0.9761, 0.2835, 0.1455, 0.0621, 0.0916,
       0.1028, 0.0405, 0.067 , 0.2746, 0.3354, 0.0706, 0.3022, 0.4887,
       0.5694, 0.1787, 0.2625, 0.2214, 0.2083, 0.0133, 0.1018, 0.1538,
       0.3747, 0.9478, 0.1064, 0.3049, 0.2147, 0.6208, 0.9531, 0.1852,
       0.2798, 0.1518, 0.2983, 0.3597, 0.369 , 0.4433, 0.1122, 0.1652,
       0.2983, 0.1684, 0.096 , 0.433 , 0.0677, 0.1001, 0.9642, 0.0067,
       0.4328, 0.3214, 0.2863, 0.1881, 0.3476, 0.8597, 0.266 , 0.1275,
       0.4636, 0.23  , 0.0839, 0.2868, 0.2033, 0.1978, 0.3761, 0.1414,
       0.3681, 0.2848, 0.0702, 0.8207, 0.109 , 0.4523, 0.588 , 0.0494,
       0.2481, 0.1295, 0.3058, 0.7005, 0.4203, 0.1375, 0.1503, 0.0923,
       0.0609, 0.5078, 0.06  , 0.2054, 0.2285, 0.5615, 0.412 , 0.1519,
       0.9741, 0.0835, 0.096 , 0.4088, 0.5199, 0.0656, 0.0058, 0.0414,
       0.0773, 0.0949, 0.3232, 0.2285, 0.048 , 0.1371, 0.1702, 0.0926,
      

In [21]:
## can store numeric and categorical variables also as pickle file
pickle.dump(feat_num,open('feat_numv1','wb'))
pickle.dump(feat_cat,open('feat_catv1','wb'))

 

## Store the model as pickle file 

In [22]:
pickle.dump(full_pipe,open('full_pipeline','wb'))