In [23]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
import warnings
warnings.filterwarnings("ignore")

In [24]:
df = pd.read_csv("medical_insurance.csv")
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB


In [26]:
df.sex.unique(), df.smoker.unique(),df.region.unique()

(array(['female', 'male'], dtype=object),
 array(['yes', 'no'], dtype=object),
 array(['southwest', 'southeast', 'northwest', 'northeast'], dtype=object))

In [27]:
df.sex.replace({"female":0,"male":1},inplace = True)
df.smoker.replace({"yes":1,"no":0},inplace =True)

In [28]:
df = pd.get_dummies(df,columns=["region"])

In [29]:
X= df.drop("charges",axis = 1)
y = df.charges

In [30]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=32)

In [31]:
model = LinearRegression()
model.fit(X_train,y_train)
pre = model.predict(X_test)

In [32]:
r2 = r2_score(y_test,pre)
mse = mean_squared_error(y_test,pre)
mae = mean_absolute_error(y_test,pre)
print(f"r2 will be {r2}, mean square error is {mse},mean absolute error {mae}")

r2 will be 0.781705116269015, mean square error is 29552150.007714216,mean absolute error 3828.8075105941816


In [102]:
X.head(1)
X.columns

Index(['age', 'sex', 'bmi', 'children', 'smoker', 'region_northeast',
       'region_northwest', 'region_southeast', 'region_southwest'],
      dtype='object')

In [34]:
model.fit(X,y)

LinearRegression()

In [35]:
test_array = np.zeros(len(X.columns))
test_array

array([0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [156]:
age = 19.0
sex = "male"
bmi = 99
children = 2
smoker = 'yes'
region = 'northwest'
project = {"sex":{"female":0,"male":1},"smoker":{"yes":1,"no":0},"columns":['age', 'sex', 'bmi', 'children', 'smoker', 'region_northeast',
       'region_northwest', 'region_southeast', 'region_southwest']}


7

In [160]:
region = 'southeast'
region = "region_"+region
print(region)
index = project['columns'].index(region)
index

region_southeast


7

In [145]:
test_array[0] = age
test_array[1] = project["sex"][sex]
test_array[2] = bmi
test_array[3] = children
test_array[4] = project["smoker"][smoker]
test_array[index] =1 

In [146]:
test_array

array([19.,  1., 99.,  2.,  1.,  0.,  0.,  0.,  1.])

In [161]:
print("Predicted insurance Charge is :",round(model.predict([test_array])[0],2))

Predicted insurance Charge is : 50230.05


In [148]:
import pickle as pk
import json

In [149]:
with open ("linear_model.pkl","wb") as file:
    pk.dump(model,file)
with open ("project.json","w+") as file:
    json.dump(project,file)

In [100]:
with open("linear_model.pkl","rb") as f:
    model = pk.load(f)
model

LinearRegression()

# Example of Pickle


In [32]:
# Python3 program to illustrate store
# efficiently using pickle module
# Module translates an in-memory Python object
# into a serialized byte stream—a string of
# bytes that can be written to any file-like object.

import pickle

def storeData():
	# initializing data to be stored in db
	Omkar = {'key' : 'Omkar', 'name' : 'Omkar Pathak',
	'age' : 21, 'pay' : 40000}
	Jagdish = {'key' : 'Jagdish', 'name' : 'Jagdish Pathak',
	'age' : 50, 'pay' : 50000}

	# database
	db = {}
	db['Omkar'] = Omkar
	db['Jagdish'] = Jagdish
	
	# Its important to use binary mode
	dbfile = open('examplePickle', 'ab')
	
	# source, destination
	pickle.dump(db, dbfile)					
	dbfile.close()

def loadData():
	# for reading also binary mode is important
	dbfile = open('examplePickle', 'rb')	
	db = pickle.load(dbfile)
	for keys in db:
		print(keys, '=>', db[keys])
	dbfile.close()

if __name__ == '__main__':
	storeData()
	loadData()


Omkar => {'key': 'Omkar', 'name': 'Omkar Pathak', 'age': 21, 'pay': 40000}
Jagdish => {'key': 'Jagdish', 'name': 'Jagdish Pathak', 'age': 50, 'pay': 50000}


In [33]:
# initializing data to be stored in db
Omkar = {'key' : 'Omkar', 'name' : 'Omkar Pathak',
'age' : 21, 'pay' : 40000}
Jagdish = {'key' : 'Jagdish', 'name' : 'Jagdish Pathak',
'age' : 50, 'pay' : 50000}

# database
db = {}
db['Omkar'] = Omkar
db['Jagdish'] = Jagdish

# For storing
b = pickle.dumps(db)	 # type(b) gives <class 'bytes'>

# For loading
myEntry = pickle.loads(b)
print(myEntry)


{'Omkar': {'key': 'Omkar', 'name': 'Omkar Pathak', 'age': 21, 'pay': 40000}, 'Jagdish': {'key': 'Jagdish', 'name': 'Jagdish Pathak', 'age': 50, 'pay': 50000}}


In [42]:
name = {"A":[1,4,56,64,45],"B":[1,5,4,5,"asd"]}
with open ("name.json","w") as file:
    json.dump(name,file)
file.close

<function TextIOWrapper.close()>

<_io.TextIOWrapper name='name.json' mode='w' encoding='cp1252'>
