### Medical  Cost Prediction

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler

from sklearn.linear_model import LinearRegression,Ridge,Lasso,ElasticNet,RidgeCV,LassoCV,ElasticNetCV


In [None]:
data=pd.read_csv("../input/insurance/insurance.csv")
data

In [None]:
data.describe(include="all")

In [None]:
data.info()

### Preprocesssing Step

In [None]:
print("Total Number of Null Value :", data.isnull().sum().sum())

In [None]:
data["children"] = data["children"].astype(str)

In [None]:
print(data.select_dtypes("object") ) 

print(" ")
print(" ")

print(" 'Total Non-Numeric Column' :", len(data.select_dtypes("object").columns) )

In [None]:
{column : list(data[column].unique()) for column in data.select_dtypes("object") }

In [None]:
def binary_encode(df,column,positive_value):
    df=df.copy()
    df[column]=df[column].apply(lambda x: 1 if x==positive_value else 0)
    return df
    

def onehot_encode (df,column,prefix):
    df=df.copy()
    dummies=pd.get_dummies(df[column],prefix=prefix)
    df=pd.concat([df, dummies],axis=1)
    df=df.drop(column,axis=1)
    return df

In [None]:
def preprocessing (df,scaler,train_size=0.7):
    df=df.copy()
    
    #binary encode for Sex and Smoker Column
    df=binary_encode(df,'sex', "male" )
    df=binary_encode(df,'smoker', "yes" )
    
    
    #onehot encoding for children and region
    df=onehot_encode(df,'children', "ch" )
    df=onehot_encode(df,'region', "re" )
    
    #spliting
    y=df["charges"].copy()
    X=df.drop("charges" ,axis=1).copy()
    
    
    #scaler
    X=pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
    
   
    #spliting train and test
    X_train,X_test,y_train,y_test =train_test_split(X,y,train_size=0.7,shuffle=True,random_state=123)
    
    return X_train,X_test,y_train,y_test

In [None]:
X_train,X_test,y_train,y_test=preprocessing(data,StandardScaler())

In [None]:
X_train,X_test,y_train,y_test

In [None]:
### Training

In [None]:
models ={
    "LinearRegression :":LinearRegression(),
    "Ridge            :":Ridge(),
    "Lasso            :":Lasso(),
    "ElasticNet       :":ElasticNet(),
    "RidgeCV          :":RidgeCV(),
    "LassoCV          :":LassoCV(),
    "ElasticNetCV     :":ElasticNetCV()
}

for model in models.values():
    model.fit(X_train,y_train)

In [None]:
print ("Model R^2 Scores: \n------------------------")
for name, model in models.items():
    print(name,model.score(X_test,y_test))