PART - 1 : Model Building and hosting local API
1. Data Preparation
2. Machine Learning Modelling
3. Model Evaluation
4. Export Trained Model
5. LOCAL REST API with Flask web-server 
6. Create a website for predicing marriage age calling REST API

PART - 2 : Deploying Public API to AWS EC2 server and launch website service 
1. Spin up an EC2 server
2. Configure EC2 with security group and private key
3. Install libraries and dependencies on the EC2 server
4. Move trained model and app.py flask files to EC2 (winscp)
5. Configure flaskapp.wsgi file and Apache vhost file
6. Restart apache webserver and Check API status
7. Launch a website with domain name and host webpage. 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [2]:
df=pd.read_csv('age_of_marriage_data.csv')

print(df.shape)
df.head()

(2567, 10)


Unnamed: 0,id,gender,height,religion,caste,mother_tongue,profession,location,country,age_of_marriage
0,1,female,"5'4""",,others,Telugu,,London,United Kingdom,21.0
1,2,male,"5'7""",Jain,Shwetamber,Gujarati,Doctor / Healthcare Professional,Fairfax- VA,USA,32.0
2,3,male,"5'7""",Hindu,Brahmin,Hindi,Entrepreneurs / Business,Begusarai,India,32.0
3,4,female,"5'0""",Hindu,Thakur,Hindi,Architect,Mumbai,India,30.0
4,5,male,"5'5""",Christian,Born Again,Malayalam,Sales Professional / Marketing,Sulthan Bathery,India,30.0


In [3]:
df.columns

Index(['id', 'gender', 'height', 'religion', 'caste', 'mother_tongue',
       'profession', 'location', 'country', 'age_of_marriage'],
      dtype='object')

In [4]:
df.isnull().sum()

id                   0
gender              29
height             118
religion           635
caste              142
mother_tongue      164
profession         330
location           155
country             16
age_of_marriage     19
dtype: int64

In [5]:
# df.fillna(df.mean(),inplace=True)

df.dropna(inplace=True)
df.isnull().sum()

id                 0
gender             0
height             0
religion           0
caste              0
mother_tongue      0
profession         0
location           0
country            0
age_of_marriage    0
dtype: int64

In [6]:
x=df.loc[:,['gender','religion','caste','mother_tongue',
       'country','height']]
y=df['age_of_marriage']

In [7]:
x.head()

Unnamed: 0,gender,religion,caste,mother_tongue,country,height
1,male,Jain,Shwetamber,Gujarati,USA,"5'7"""
2,male,Hindu,Brahmin,Hindi,India,"5'7"""
3,female,Hindu,Thakur,Hindi,India,"5'0"""
4,male,Christian,Born Again,Malayalam,India,"5'5"""
5,male,Hindu,Valmiki,Hindi,India,"5'5"""


In [8]:
#dummies 

# Gender=pd.get_dummies(x['gender'],drop_first=True)
# Religion=pd.get_dummies(x['religion'],drop_first=True)
# Cast=pd.get_dummies(x['caste'],drop_first=True)
# Mother_tounge=pd.get_dummies(x['mother_tongue'],drop_first=True)
# # Profession=pd.get_dummies(x['profession'],drop_first=True)
# # Location=pd.get_dummies(x['location'],drop_first=True)
# Country=pd.get_dummies(x['country'],drop_first=True)
# Height=pd.get_dummies(x['height'],drop_first=True)


In [9]:
from sklearn.preprocessing import LabelEncoder

enc=LabelEncoder()
x.loc[:,['gender','religion','caste','mother_tongue','country']]=\
x.loc[:,['gender','religion','caste','mother_tongue','country']].apply(enc.fit_transform)

In [10]:
map={l: i for i,l in enumerate(enc.classes_)}
map

{' Australia': 0,
 ' Bahrain': 1,
 ' Bangladesh': 2,
 ' Canada': 3,
 ' Germany': 4,
 ' India': 5,
 ' Ireland': 6,
 ' Kuwait': 7,
 ' Malaysia': 8,
 ' Myanmar': 9,
 ' Netherlands': 10,
 ' New Zealand': 11,
 ' Norway': 12,
 ' Oman': 13,
 ' Pakistan': 14,
 ' Philippines': 15,
 ' Qatar': 16,
 ' Singapore': 17,
 ' South Africa': 18,
 ' USA': 19,
 ' United Arab Emirates': 20,
 ' United Kingdom': 21}

In [11]:
x.height

1        5'7"
2        5'7"
3        5'0"
4        5'5"
5        5'5"
        ...  
2561    5'11"
2562     5'3"
2563    5'11"
2564     5'3"
2566     5'2"
Name: height, Length: 1932, dtype: object

In [12]:
#### convert height into cm


h_feet=int(x.loc[1,'height'].split("'")[0])*30.48
h_inch=int(x.loc[1,'height'].split("'")[1].split("\"")[0])*2.54

print("feet :",h_feet)
print("inches:",h_inch)

feet : 152.4
inches: 17.78


In [13]:
## define function to convert hgt into cms
def h_cm(h):
    return int(h.split("'")[0])*30.48+\
           int(h.split("'")[1].split("\"")[0])*2.54

x['height_cm']=x.height.apply(h_cm)

In [20]:
x=x.drop(['height'],axis=1)
x.head()

Unnamed: 0,gender,religion,caste,mother_tongue,country,height_cm
1,1,2,34,6,19,170.18
2,1,1,14,8,5,170.18
3,0,1,36,8,5,152.4
4,1,0,13,13,5,165.1
5,1,1,38,8,5,165.1


In [15]:
y.head()

1    32.0
2    32.0
3    30.0
4    30.0
5    29.0
Name: age_of_marriage, dtype: float64

In [21]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import confusion_matrix,plot_confusion_matrix,classification_report
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score


xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2,random_state=0)


In [27]:
rf=RandomForestRegressor(random_state=0,max_depth=10)
rf.fit(xtrain,ytrain)
rf_pred=rf.predict(xtest)

In [28]:
from sklearn.metrics import r2_score,mean_absolute_error

r2score=r2_score(ytest,rf_pred)
print(r2score)

0.698057485847989


In [29]:
mean_absolute_error(ytest,rf_pred)

1.038753774669992

In [30]:
rf.score(xtrain,ytrain)

0.8072769098185266

In [31]:
rf.score(xtest,ytest)

0.698057485847989