In [1]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from scipy.stats import linregress
import requests
import json
from pprint import pprint
from datetime import datetime
import numpy as np

# Setting path to import Csv files.
auto_insurance_to_load = Path("Resources/AutoInsurance.csv")

# Reading CSV files and transforming into Dataframe.
auto_insurance_df = pd.read_csv(auto_insurance_to_load)

#url = "https://crashviewer.nhtsa.dot.gov/CrashAPI/crashes/GetCrashesByLocation?fromCaseYear=2011&toCaseYear=2011&format=json"
url_cali = "https://crashviewer.nhtsa.dot.gov/CrashAPI/crashes/GetCaseList?states=6&fromYear=2011&toYear=2011&minNumOfVehicles=1&maxNumOfVehicles=6&format=json"
url_ariz = "https://crashviewer.nhtsa.dot.gov/CrashAPI/crashes/GetCaseList?states=4&fromYear=2011&toYear=2011&minNumOfVehicles=1&maxNumOfVehicles=6&format=json"
url_wash = "https://crashviewer.nhtsa.dot.gov/CrashAPI/crashes/GetCaseList?states=53&fromYear=2011&toYear=2011&minNumOfVehicles=1&maxNumOfVehicles=6&format=json"
url_ore = "https://crashviewer.nhtsa.dot.gov/CrashAPI/crashes/GetCaseList?states=41&fromYear=2011&toYear=2011&minNumOfVehicles=1&maxNumOfVehicles=6&format=json"
url_nev = "https://crashviewer.nhtsa.dot.gov/CrashAPI/crashes/GetCaseList?states=32&fromYear=2011&toYear=2011&minNumOfVehicles=1&maxNumOfVehicles=6&format=json"

In [2]:
response = requests.get(url_cali).json()
response_one = requests.get(url_ariz).json()
response_two = requests.get(url_nev).json()
response_three =  requests.get(url_wash).json()
response_four =  requests.get(url_ore).json()

In [3]:
data = response['Results'][0]
data_one = response_one['Results'][0]
data_two = response_two['Results'][0]
data_three = response_three['Results'][0]
data_four = response_four['Results'][0]

In [4]:
cali_df = pd.DataFrame(data)
ariz_df = pd.DataFrame(data_one)
nev_df = pd.DataFrame(data_two)
wash_df = pd.DataFrame(data_three)
ore_df = pd.DataFrame(data_four)

In [5]:
combined_df = pd.concat([cali_df, ariz_df, nev_df, wash_df, ore_df], ignore_index=True)
combined_df.sample(20)

Unnamed: 0,CountyName,CrashDate,Fatals,Peds,Persons,St_Case,State,StateName,TotalVehicles
855,FRESNO (19),/Date(1306768200000-0400)/,1,0,2,60858,6,California,2
252,LOS ANGELES (37),/Date(1296606660000-0500)/,1,1,1,60254,6,California,1
1277,LOS ANGELES (37),/Date(1312625400000-0400)/,1,1,1,61281,6,California,1
3575,ESMERALDA (9),/Date(1324930020000-0500)/,3,0,4,320211,32,Nevada,2
326,SAN BERNARDINO (71),/Date(1298698140000-0500)/,1,1,1,60328,6,California,1
3130,YUMA (27),/Date(1317639540000-0400)/,1,1,1,40527,4,Arizona,1
3045,SANTA CRUZ (23),/Date(1314428520000-0400)/,1,0,1,40442,4,Arizona,1
2342,SANTA CLARA (85),/Date(1322436000000-0500)/,1,0,2,62349,6,California,1
3532,CLARK (3),/Date(1317374820000-0400)/,1,0,2,320168,32,Nevada,2
1539,SOLANO (95),/Date(1315304880000-0400)/,1,0,3,61545,6,California,2


In [6]:
auto_insurance_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9135 entries, 0 to 9134
Data columns (total 24 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Customer                       9135 non-null   object 
 1   State                          9135 non-null   object 
 2   Customer Lifetime Value        9135 non-null   float64
 3   Response                       9135 non-null   object 
 4   Coverage                       9135 non-null   object 
 5   Education                      9135 non-null   object 
 6   Effective To Date              9135 non-null   object 
 7   EmploymentStatus               9135 non-null   object 
 8   Gender                         9135 non-null   object 
 9   Income                         9135 non-null   int64  
 10  Location Code                  9135 non-null   object 
 11  Marital Status                 9135 non-null   object 
 12  Monthly Premium Auto           9135 non-null   i

In [7]:
unclean_auto_df =  auto_insurance_df[[
                            "State",
                            "Coverage",
                            "Education",
                            "Income",
                            "Months Since Policy Inception",
                            "Months Since Last Claim",
                            "Number of Policies",
                            "Marital Status",
                            "EmploymentStatus",
                            "Policy Type",
                            "Monthly Premium Auto",
                            "Total Claim Amount",
                            "Vehicle Class", 
                            "Vehicle Size"
                            ]]

auto_df = unclean_auto_df.drop(index=0)

income_df = auto_df.loc[auto_df["Income"] != 0]
income_df[["Income","Monthly Premium Auto","Total Claim Amount","Number of Policies","Months Since Policy Inception","Months Since Last Claim"]].corr()

Unnamed: 0,Income,Monthly Premium Auto,Total Claim Amount,Number of Policies,Months Since Policy Inception,Months Since Last Claim
Income,1.0,-0.012372,-0.221879,-0.001324,0.015799,-0.034453
Monthly Premium Auto,-0.012372,1.0,0.636084,-0.026795,0.020112,0.005887
Total Claim Amount,-0.221879,0.636084,1.0,-0.012253,-0.003733,0.00146
Number of Policies,-0.001324,-0.026795,-0.012253,1.0,-0.020548,0.017022
Months Since Policy Inception,0.015799,0.020112,-0.003733,-0.020548,1.0,-0.039993
Months Since Last Claim,-0.034453,0.005887,0.00146,0.017022,-0.039993,1.0
