<h2 align="center"> Deploying Project on Cloud using Flask </h2>

### Importing Essential Modules:

In [3]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn  as sns

import warnings
warnings.simplefilter('ignore')

In [4]:
#loading dataset
data = pd.read_csv('insurance.csv')
data.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


### Inspecting Data

In [5]:
data.columns

Index(['age', 'sex', 'bmi', 'children', 'smoker', 'region', 'charges'], dtype='object')

In [6]:
data.dtypes

age           int64
sex          object
bmi         float64
children      int64
smoker       object
region       object
charges     float64
dtype: object

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB


In [8]:
data.describe() 

Unnamed: 0,age,bmi,children,charges
count,1338.0,1338.0,1338.0,1338.0
mean,39.207025,30.663397,1.094918,13270.422265
std,14.04996,6.098187,1.205493,12110.011237
min,18.0,15.96,0.0,1121.8739
25%,27.0,26.29625,0.0,4740.28715
50%,39.0,30.4,1.0,9382.033
75%,51.0,34.69375,2.0,16639.912515
max,64.0,53.13,5.0,63770.42801


In [9]:
data.size

9366

In [10]:
data.isna().sum()

age         0
sex         0
bmi         0
children    0
smoker      0
region      0
charges     0
dtype: int64

In [11]:
data.head(1)

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924


### DataPreprocessing

In [12]:
from sklearn import preprocessing
label_encoder = preprocessing.LabelEncoder()
data['sex'] = label_encoder.fit_transform(data['sex'])
print(label_encoder.classes_)
data['sex'].head()

['female' 'male']


0    0
1    1
2    1
3    1
4    1
Name: sex, dtype: int32

In [13]:
data['smoker'] = label_encoder.fit_transform(data['smoker'])
print(label_encoder.classes_)
data['smoker'].head()

['no' 'yes']


0    1
1    0
2    0
3    0
4    0
Name: smoker, dtype: int32

In [14]:
data.drop(['region'],axis=1,inplace=True)
data.head()

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,0,27.9,0,1,16884.924
1,18,1,33.77,1,0,1725.5523
2,28,1,33.0,3,0,4449.462
3,33,1,22.705,0,0,21984.47061
4,32,1,28.88,0,0,3866.8552


### Spliting data 

In [None]:
# from sklearn.preprocessing import StandardScaler
# sc = StandardScaler()

In [15]:
x = data.iloc[:,:-1]
x.head()

Unnamed: 0,age,sex,bmi,children,smoker
0,19,0,27.9,0,1
1,18,1,33.77,1,0
2,28,1,33.0,3,0
3,33,1,22.705,0,0
4,32,1,28.88,0,0


In [16]:
y = data.iloc[:,-1:]
y.head()

Unnamed: 0,charges
0,16884.924
1,1725.5523
2,4449.462
3,21984.47061
4,3866.8552


In [30]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.1, random_state = 0)
data.shape

(1338, 6)

### RandomForest

In [31]:
# Train
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=100, random_state=0)
regressor.fit(x_train,y_train)
y_pred = regressor.predict(x_test)

from sklearn import metrics
from sklearn.metrics import mean_squared_error,r2_score

print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

r_square = r2_score(y_test,y_pred)
print('The value of Rsquare is ...........',r_square*100)

Mean Absolute Error: 2603.8263766895516
Mean Squared Error: 20674973.701887038
Root Mean Squared Error: 4546.974125931116
The value of Rsquare is ........... 89.33749031234964


In [33]:
#Deployment model

import pickle
pickle.dump(regressor,open('model.pkl','wb'))

In [61]:
data.head(4)

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,0,27.9,0,1,16884.924
1,18,1,33.77,1,0,1725.5523
2,28,1,33.0,3,0,4449.462
3,33,1,22.705,0,0,21984.47061


### Loading pickle file 

In [46]:
model2 = pickle.load(open("model.pkl","rb"))
model2.predict([[19,0,27.9,0,1]])

array([16966.1593405])

### Prediction 

In [47]:
model2.predict([[18,1,33,1,0]])

array([2412.4169945])

In [36]:
model2.predict([[28,1,33.00,3,0]])

array([5522.7768705])

In [41]:
model2.predict([[33,1,22.705,0,0]])

array([15059.9413123])