# Ada boost Regressor Algorithm - insurance_charge_prediction
## MODEL CREATION PHASE

## read the dataset

In [1]:
import pandas as pd 
dataset = pd.read_csv("insurance_pre.csv")
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


## convert categorical data into numerical data

In [2]:
dataset = pd.get_dummies(dataset,drop_first=True,dtype=int)
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,0,1
1,18,33.770,1,1725.55230,1,0
2,28,33.000,3,4449.46200,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.880,0,3866.85520,1,0
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0
1334,18,31.920,0,2205.98080,0,0
1335,18,36.850,0,1629.83350,0,0
1336,21,25.800,0,2007.94500,0,0


## split Input and Output

In [3]:
dataset.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

In [4]:
independent = dataset[['age', 'bmi', 'children',  'sex_male', 'smoker_yes' ]]
independent

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,19,27.900,0,0,1
1,18,33.770,1,1,0
2,28,33.000,3,1,0
3,33,22.705,0,1,0
4,32,28.880,0,1,0
...,...,...,...,...,...
1333,50,30.970,3,1,0
1334,18,31.920,0,0,0
1335,18,36.850,0,0,0
1336,21,25.800,0,0,0


In [5]:
dependent = dataset[['charges']]
dependent

Unnamed: 0,charges
0,16884.92400
1,1725.55230
2,4449.46200
3,21984.47061
4,3866.85520
...,...
1333,10600.54830
1334,2205.98080
1335,1629.83350
1336,2007.94500


## split train and test set 


In [6]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(independent, dependent, test_size=0.20, random_state=0)

In [7]:
x_test

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
578,52,30.200,1,1,0
610,47,29.370,1,0,0
569,48,40.565,2,1,1
1034,61,38.380,0,1,0
198,51,18.050,0,0,0
...,...,...,...,...,...
1084,62,30.495,2,0,0
726,41,28.405,1,1,0
1132,57,40.280,0,1,0
725,30,39.050,3,0,1


In [8]:
y_test

Unnamed: 0,charges
578,9724.53000
610,8547.69130
569,45702.02235
1034,12950.07120
198,9644.25250
...,...
1084,15019.76005
726,6664.68595
1132,20709.02034
725,40932.42950


## model creation 

In [58]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
regressor = AdaBoostRegressor(random_state=0, estimator=DecisionTreeRegressor(max_depth=3))
regressor.fit(x_train, y_train)

  y = column_or_1d(y, warn=True)


## test the model 

In [56]:
y_predict = regressor.predict(x_test)
y_predict

array([13985.37855442, 13985.37855442, 44892.23363368, 14012.65935331,
       13985.37855442,  7380.2950056 ,  5410.15212949, 13985.37855442,
        7727.40031849,  7727.40031849,  7727.40031849, 13985.37855442,
       11614.71010922,  7727.40031849, 18549.59569903, 13985.37855442,
       14429.04030403,  7727.40031849,  7727.40031849, 40007.44676135,
       24984.53200711, 16467.52439246, 13985.37855442, 24984.53200711,
        5410.15212949,  7727.40031849,  7380.2950056 ,  8782.96435407,
        7727.40031849, 13985.37855442,  8782.96435407, 48307.68410882,
       14429.04030403, 13985.37855442, 18549.59569903,  7727.40031849,
       13985.37855442, 40007.44676135, 43006.40533829,  5410.15212949,
        7380.2950056 ,  7727.40031849, 18549.59569903, 48307.68410882,
       40007.44676135,  5410.15212949, 13985.37855442,  8782.96435407,
        7727.40031849, 13985.37855442,  7727.40031849,  5410.15212949,
       24984.53200711, 48307.68410882, 13985.37855442,  7727.40031849,
      

## Evaluation metrics 

In [57]:
from sklearn.metrics import r2_score
r_score = r2_score(y_test, y_predict)
print(f" THE ACCURACY OF THE MODEL : {r_score}")    # THE ACCURACY OF THE MODEL : 

 THE ACCURACY OF THE MODEL : 0.8648762914305494


## save the model 

In [None]:
# import pickle 
# pickle.dump(regressor, open(" ","wb"))

In [226]:
regressor.predict([[ 52, 30.200, 1, 1, 0 ]])



array([9377.9047])