# Auto Insurance Case Study

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import warnings
warnings.filterwarnings("ignore")

## Loading case study data:

### Loading 2017 auto policies data:

- This data set is a set of personal auto insurance policies taken out in 2017. There are 60,392 policies (rows), of which 10,030 had at least one claim. 

In [2]:
auto_policies_2017 = pd.read_csv("auto_policies_2017.csv")

- Checking dataset shape:

In [3]:
print('Shape of auto_policies_2017 :',auto_policies_2017.shape)

Shape of auto_policies_2017 : (60392, 15)


- Looking at first few rows:

In [4]:
auto_policies_2017.columns

Index(['pol_number', 'pol_eff_dt', 'gender', 'agecat', 'date_of_birth',
       'credit_score', 'area', 'traffic_index', 'veh_age', 'veh_body',
       'veh_value', 'claim_office', 'numclaims', 'claimcst0',
       'annual_premium'],
      dtype='object')

- Renamed few columns for clarity and future analysis:

In [5]:
auto_policies_2017.columns = auto_policies_2017.columns.str.replace("agecat","age_category")
auto_policies_2017.columns = auto_policies_2017.columns.str.replace("numclaims","num_claims")
auto_policies_2017.columns = auto_policies_2017.columns.str.replace("claimcst0","claim_amount")
auto_policies_2017.columns = auto_policies_2017.columns.str.replace("veh_age","vehicle_age")
auto_policies_2017.columns = auto_policies_2017.columns.str.replace("veh_body","vehicle_body")
auto_policies_2017.columns = auto_policies_2017.columns.str.replace("veh_value","vehicle_value")

In [6]:
auto_policies_2017.head(10)

Unnamed: 0,pol_number,pol_eff_dt,gender,age_category,date_of_birth,credit_score,area,traffic_index,vehicle_age,vehicle_body,vehicle_value,claim_office,num_claims,claim_amount,annual_premium
0,64080188,7/10/2017,M,2.0,1/4/1989,631.0,B,140.9,4,TRUCK,0.924,,0,0.0,716.53
1,18917133,7/31/2017,M,2.0,6/21/1985,531.0,C,136.5,3,HBACK,1.43,B,1,583.010876,716.53
2,82742606,2/1/2017,M,6.0,7/25/1942,838.0,D,88.8,3,SEDAN,1.1,D,1,159.375757,716.53
3,43601997,10/17/2017,M,5.0,6/8/1959,835.0,E,,2,SEDAN,2.09,,0,0.0,716.53
4,58746861,4/13/2017,F,4.0,5/16/1967,748.0,C,123.0,3,HBACK,0.803,C,1,143.555642,716.53
5,83346346,11/23/2017,M,5.0,11/2/1956,785.0,B,108.6,2,SEDAN,1.903,,0,0.0,716.53
6,92111059,11/29/2017,M,3.0,10/31/1976,759.0,E,75.0,4,STNWG,1.452,,0,0.0,716.53
7,69967688,4/24/2017,F,,11/27/1947,836.0,C,88.5,1,HBACK,1.397,,0,0.0,716.53
8,58856161,7/19/2017,M,,3/12/1967,688.0,A,50.0,1,SEDAN,2.838,,0,0.0,716.53
9,57786319,8/31/2017,M,,12/25/1984,503.0,B,,3,HBACK,1.936,B,1,1039.39835,716.53


### Loading auto policies potential customers 2018 data:

- This is a list of 7,464 potential customers for 2018.  

In [7]:
auto_potential_2018 = pd.read_csv("auto_potential_customers_2018.csv")

- Checking dataset shape:

In [8]:
print('Shape of auto_potential_2018 :',auto_potential_2018.shape)

Shape of auto_potential_2018 : (7464, 10)


- Looking at first few rows:

In [9]:
auto_potential_2018.columns

Index(['quote_number', 'gender', 'agecat', 'date_of_birth', 'credit_score',
       'area', 'traffic_index', 'veh_age', 'veh_body', 'veh_value'],
      dtype='object')

- Renamed few columns for clarity and future analysis:

In [10]:
auto_potential_2018.columns = auto_potential_2018.columns.str.replace("agecat","age_category")
auto_potential_2018.columns = auto_potential_2018.columns.str.replace("veh_age","vehicle_age")
auto_potential_2018.columns = auto_potential_2018.columns.str.replace("veh_body","vehicle_body")
auto_potential_2018.columns = auto_potential_2018.columns.str.replace("veh_value","vehicle_value")

In [11]:
auto_potential_2018.head(10)

Unnamed: 0,quote_number,gender,age_category,date_of_birth,credit_score,area,traffic_index,vehicle_age,vehicle_body,vehicle_value
0,29323463,F,4.0,4/14/1968,750.0,C,124.5,2,HBACK,1.1
1,16732441,M,2.0,10/4/1984,567.0,F,,3,STNWG,2.079
2,68744228,M,4.0,11/30/1962,378.0,D,111.4,3,HDTOP,2.354
3,30875047,F,3.0,5/17/1973,807.0,E,,4,UTE,1.969
4,37049210,M,6.0,10/2/1945,844.0,C,111.0,1,HBACK,1.969
5,90859532,M,1.0,1/19/1995,743.0,C,133.5,1,HBACK,2.09
6,58572612,M,5.0,3/27/1953,647.0,F,156.0,3,HDTOP,2.915
7,32679788,M,4.0,9/20/1964,785.0,A,,2,SEDAN,1.925
8,90845095,F,1.0,1/17/1999,601.0,B,105.2,2,HBACK,1.43
9,97453004,F,3.0,12/2/1979,672.0,C,145.5,2,STNWG,3.905
