# GLM from Emblem

In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
import sklearn

from sklearn.preprocessing import OrdinalEncoder

In [None]:
Saga_NB_df_train = pd.read_csv(r"R:\Pricing & Actuarial\Pricing\Personal Lines Pricing - Motor\Technical\8. Optimisation\6. CDL\1. Jan22\2. Modelling\Python\Cancellation_NB_Saga_Nov19-Nov21_train.csv")
Saga_NB_df_test = pd.read_csv(r"R:\Pricing & Actuarial\Pricing\Personal Lines Pricing - Motor\Technical\8. Optimisation\6. CDL\1. Jan22\2. Modelling\Python\Cancellation_NB_Saga_Nov19-Nov21_test.csv")

In [None]:
print('Train shape: ',Saga_NB_df_train.shape, 'Test shape: ', Saga_NB_df_test.shape)

In [None]:
Saga_NB_df_train.info()

In [None]:
Saga_NB_df_train.head()

In [None]:
columns = Saga_NB_df_train.columns
new_column_headers = []
for val in columns:
    altered_header = val.replace(" ", "_")
    #print(val)
    #print(altered_header)
    new_column_headers.append(altered_header)

Saga_NB_df_train.columns = new_column_headers
print(Saga_NB_df_train.columns)

In [None]:
#These are factors that either aren't GIPP compliant, allowed to be used in this stage of modelling  (may be added in overlays etc) or are part of the setup of the dataset, not important for modelling.
Saga_NB_dropped_unnecessary_df = Saga_NB_df_train.drop(columns=['Weight', 'set', 'Add_Driver_Sex', 'Main_Driver_Sex', 'Main_Driver_Age_Months'])
Saga_NB_dropped_unnecessary_df_test = Saga_NB_df_test.drop(columns=['Weight', 'set', 'Add_Driver_Sex', 'Main_Driver_Sex', 'Main_Driver_Age_Months'])

## Banding features with Ordinal Encoder 

In [None]:
## Encoding Training Data 

Saga_NB_transformed_df = Saga_NB_dropped_unnecessary_df[[
                            'AgePassedTest', 'Cover', 'Duration', 'Garaged', 'NCDAllowed',
                            'NCDDifference', 'RegistrationYear', 'VehicleAgeatPurchase', 
                            'YearsOwned', 'Class_of_Use', 'Annual_Mileage', 'Age_Difference',
                            'Add_Driving_Experience', 'Add_Other_Vehs_Owned', 'Add_Driver_Age',
                            'Add_Access_to_Other_Vehs', 'Access_to_Other_Vehs', 'PDR_Code',
                            'Other_Vehs_Owned', 'NCD_Protected', 'NCD_Earned', 'Most_Severe_Conviction',
                            'Main_UK_Residency', 'Main_Occupation', 'Main_Marital_Status', 
                            'Main_Licence_Type', 'Main_Full_Time_Employed', 'Main_Employment_Type',
                            'Main_Driving_Experience_Months', 'Main_Employer_Business', 'Main_Driving_Experience',
                            'Latest_Conviction', 'Latest_Fault_Accident_Claim','Latest_Non_Fault_Accident_Claim',
                            'Latest_Windscreen_Claim','Main_Driver_Age', 'Transaction_Type', 'Tot_Windscreen_Claims', 
                            'Tot_Vandalism_Claims', 'Tot_Theft_Claims', 'Tot_Non_Fault_Accident_Claims', 
                            'Tot_Fire_Claims', 'Tot_Fault_Accident_Claims', 'Vehicle_Age', 'Vehicle_Keeper', 
                            'Vehicle_Owner', 'Vehicle_Value', 'Saga_Years_with_Broker', 'SagaCais', 'SagaFactor', 'SagaQuoteLag']]
ordinal_encoder = OrdinalEncoder(categories=[[-1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87],
                                             ['Comp', 'TPFT'],
                                             [0,1,2,3,4,5,6],
                                             ['Car Park', 'Carport', 'Garaged', 'Locked Building', 'Locked Compound', 'On Drive', 'Private Property', 'Public Road', 'Third Party Premises', 'Unlocked Building', 'Unlocked Compund', 'Business Address', 'N/A'],
                                             [0,1,2,3,4,5,6,7,8,9], #Missing? Can't mix int and strings... what to do?
                                             [0,1,2,3,4,5,6,7,8,9],
                                             [1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021],
                                             [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
                                             [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
                                             ['SDP', 'SDP & C', 'Class 1', 'Class 2', 'Class 3', 'Class 1 (Policyholder Only)', 'Class 1 (Spouse Only)'],
                                             ['1 - 1000', '1001 - 2000', '2001 - 3000', '3001 - 4000', '4001 - 5000', '5001 - 6000', '6001 - 7000', '7001 - 8000', '8001 - 9000', '9001 - 10000', '10001 - 11000', '11001 - 12000', '12001 - 13000', '13001 - 14000', '14001 - 15000', '15001 - 16000', '16001 - 17000', '17001 - 18000', '18001 - 19000', '19001 - 20000', '20001 - 21000', '21001 - 22000', '22001 - 23000', '23001 - 24000', '24001 - 25000', '25001 - 26000', '26001 - 27000', '27001 - 28000', '28001 - 29000', '29001 - 30000', 'Over 30000'],
                                             ['IOD', '-15+', '-10 to -14', '-5 to -9', '0 to -4',  '1 to 5','6+'],
                                             ['IOD', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
                                             ['IOD', '0', '1', '2', '3', '4'],
                                             ['IOD', '17 to 24', '25 to 29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59', '60-64', '65+'],
                                             ['IOD', '0', '1', '2', '3', '4'],
                                             [0,1,2,3,4,5],
                                             ['Any', 'I&1', 'I&2', 'I&3', 'I&F', 'I&P', 'I&S', 'IOD'],
                                             [0,1,2,3,4,5],
                                             ['N', 'Y'],
                                             [0,1,2,3,4,5,6,7,8,9], 
                                             ['No Conviction', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'],
                                             [0,1,2,3],#Missing? Can't mix int and strings... what to do?
                                             ['Not in Employment', 'Armed Services', 'Bar/Restaurant', 'Building & Construction', 'Driving/Motor Trade', 'Education/Research', 'Farming/Animals', 'Government/Public Services', 'Healthcare', 'Leisure/Beauty', 'Manual/Unskilled', 'Office/Professional', 'Other', 'Security Cash & Courier', 'Skilled', 'Sports/Entertainment'],
                                             ['Common Law', 'Divorced', 'Married', 'Partnered', 'Separated', 'Single', 'N/A'],
                                             ['Full (UK)', 'International', 'Provisional', 'Foreign', 'Restricted < 3 Years', 'Restricted >= 3 Years'],
                                             ['N/A', 'N', 'Y'],
                                             ['Employed', 'Employed (Temporary)','Financially Assisted', 'Household Duties', 'In Full Or Part Time Education', 'Independent Means', 'Unemployed', 'Own Company', 'Retired', 'Self Employed', 'Voluntary Work', 'Contractor', 'Missing'],
                                             [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119],
                                             ['Agriculture & Animals', 'Antique / Arts Trade', 'Armed Services', 'Aviation', 'Blue Collar', 'Business Professional', 'Cash Collector', 'Child Care', 'Construction', 'Courier', 'Craftsman', 'Design', 'Education and Training ', 'Electrical', 'Emergency Services', 'Engineering', 'Entertainment & Media', 'Finance & Insurance', 'Food', 'Government and Agencies', 'Healthcare', 'Holiday', 'Hotel Pub & Restaurant', 'Installation', 'IT', 'Leisure & Beauty', 'Maintenance & Repair', 'Manufacturing', 'Marketing', 'Mining and Fuel', 'Not in Employment', 'Office Administration', 'Other', 'Parks & Gardens', 'Photography', 'Property', 'Public Service', 'Publishing', 'Retail Food', 'Retail Sales', 'Science', 'Security', 'Social Services', 'Telecommunications', 'Tradesman', 'Transport', 'Missing'],
                                             [0,1,2,3,4,5,6,7,8,9],
                                             ['0 to 1', '1 to 2', '2 to 3', '3 to 4', '4 to 5', 'No Conviction'],
                                             ['1 to 2', '2 to 3', '3 to 4', '4 to 5', '0 to 1', 'No Claim'],
                                             ['1 to 2', '2 to 3', '3 to 4', '4 to 5', '0 to 1', 'No Claim'],
                                             ['0 to 1', '1 to 2', '2 to 3', '3 to 4', '4 to 5', 'No Claim'],
                                             [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100], 
                                             ['AJ-NB', 'AJ_RC', 'NB', 'RB', 'RC'],
                                             ['No Claim', '1', '2', '3', '4', '5'],
                                             ['No Claim', '1', '2', '3', '4', '5'],
                                             ['No Claim', '1', '2', '3', '4', '5'],
                                             ['No Claim', '1', '2', '3', '4', '5'],
                                             ['No Claim', '1', '2', '3', '4', '5'],
                                             ['No Claim', '1', '2', '3', '4', '5+'], 
                                             [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
                                             ['Proposer', 'Spouse', 'Common-Law Partner', 'Family', 'Company', 'Leased Vehicle', 'Other'],
                                             ['Proposer', 'Spouse', 'Common-Law Partner', 'Family', 'Company', 'Leased Vehicle', 'Other'],
                                             ['Under 1000', '1000 - 1999', '2000 - 2999', '3000 - 3999', '4000 - 4999', '5000 - 5999', '6000 - 6999', '7000 - 7999', '8000 - 8999', '9000 - 9999', '10000 - 10999', '11000 - 11999', '12000 - 12999', '13000 - 13999', '14000 - 14999', '15000 - 15999', '16000 - 16999', '17000 - 17999', '18000 - 18999', '19000 - 19999', '20000-20999', '21000-21999', '22000-22999', '23000-23999', '24000-24999', '25000-25999', '26000-26999', '27000-27999', '28000-28999', '29000-29999', '30000-30999', '31000-31999', '32000-32999', '33000-33999', '34000-34999', '35000-35999', '36000-36999', '37000-37999', '38000-38999', '39000-39999', '40000-40999', '41000-41999', '42000-42999', '43000-43999', '44000-44999', '45000-45999', '46000-46999', '47000-47999', '48000-48999', '49000-49999', '50000+'],
                                             ['0', '1', '2', '3', '4', '5', '6+', 'Default'],
                                             ['0 to 99', '100 to 199', '200+', 'Default'],
                                             ['0-74', '75-84','85-94', '95-104', '105-114', '115-124', '125-134', '135-144', '145+', 'Default'],
                                             ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', 'Default']])
vehicle_value_encoded = ordinal_encoder.fit_transform(Saga_NB_dropped_unnecessary_df[[
                            'AgePassedTest', 'Cover', 'Duration', 'Garaged', 'NCDAllowed',
                            'NCDDifference', 'RegistrationYear', 'VehicleAgeatPurchase', 
                            'YearsOwned', 'Class_of_Use', 'Annual_Mileage', 'Age_Difference',
                            'Add_Driving_Experience', 'Add_Other_Vehs_Owned', 'Add_Driver_Age',
                            'Add_Access_to_Other_Vehs', 'Access_to_Other_Vehs', 'PDR_Code',
                            'Other_Vehs_Owned', 'NCD_Protected', 'NCD_Earned', 'Most_Severe_Conviction',
                            'Main_UK_Residency', 'Main_Occupation', 'Main_Marital_Status', 
                            'Main_Licence_Type', 'Main_Full_Time_Employed', 'Main_Employment_Type',
                            'Main_Driving_Experience_Months', 'Main_Employer_Business', 'Main_Driving_Experience',
                            'Latest_Conviction', 'Latest_Fault_Accident_Claim','Latest_Non_Fault_Accident_Claim',
                            'Latest_Windscreen_Claim','Main_Driver_Age', 'Transaction_Type', 'Tot_Windscreen_Claims', 
                            'Tot_Vandalism_Claims', 'Tot_Theft_Claims', 'Tot_Non_Fault_Accident_Claims', 
                            'Tot_Fire_Claims', 'Tot_Fault_Accident_Claims', 'Vehicle_Age', 'Vehicle_Keeper', 
                            'Vehicle_Owner', 'Vehicle_Value', 'Saga_Years_with_Broker', 'SagaCais', 'SagaFactor', 'SagaQuoteLag']])

In [None]:
## Encoding Testing Data 

Saga_NB_transformed_df_test = Saga_NB_dropped_unnecessary_df_test[[
                            'AgePassedTest', 'Cover', 'Duration', 'Garaged', 'NCDAllowed',
                            'NCDDifference', 'RegistrationYear', 'VehicleAgeatPurchase', 
                            'YearsOwned', 'Class_of_Use', 'Annual_Mileage', 'Age_Difference',
                            'Add_Driving_Experience', 'Add_Other_Vehs_Owned', 'Add_Driver_Age',
                            'Add_Access_to_Other_Vehs', 'Access_to_Other_Vehs', 'PDR_Code',
                            'Other_Vehs_Owned', 'NCD_Protected', 'NCD_Earned', 'Most_Severe_Conviction',
                            'Main_UK_Residency', 'Main_Occupation', 'Main_Marital_Status', 
                            'Main_Licence_Type', 'Main_Full_Time_Employed', 'Main_Employment_Type',
                            'Main_Driving_Experience_Months', 'Main_Employer_Business', 'Main_Driving_Experience',
                            'Latest_Conviction', 'Latest_Fault_Accident_Claim','Latest_Non_Fault_Accident_Claim',
                            'Latest_Windscreen_Claim','Main_Driver_Age', 'Transaction_Type', 'Tot_Windscreen_Claims', 
                            'Tot_Vandalism_Claims', 'Tot_Theft_Claims', 'Tot_Non_Fault_Accident_Claims', 
                            'Tot_Fire_Claims', 'Tot_Fault_Accident_Claims', 'Vehicle_Age', 'Vehicle_Keeper', 
                            'Vehicle_Owner', 'Vehicle_Value', 'Saga_Years_with_Broker', 'SagaCais', 'SagaFactor', 'SagaQuoteLag']]
ordinal_encoder = OrdinalEncoder(categories=[[-1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87],
                                             ['Comp', 'TPFT'],
                                             [0,1,2,3,4,5,6],
                                             ['Car Park', 'Carport', 'Garaged', 'Locked Building', 'Locked Compound', 'On Drive', 'Private Property', 'Public Road', 'Third Party Premises', 'Unlocked Building', 'Unlocked Compund', 'Business Address', 'N/A'],
                                             [0,1,2,3,4,5,6,7,8,9], #Missing? Can't mix int and strings... what to do?
                                             [0,1,2,3,4,5,6,7,8,9],
                                             [1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021],
                                             [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
                                             [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
                                             ['SDP', 'SDP & C', 'Class 1', 'Class 2', 'Class 3', 'Class 1 (Policyholder Only)', 'Class 1 (Spouse Only)'],
                                             ['1 - 1000', '1001 - 2000', '2001 - 3000', '3001 - 4000', '4001 - 5000', '5001 - 6000', '6001 - 7000', '7001 - 8000', '8001 - 9000', '9001 - 10000', '10001 - 11000', '11001 - 12000', '12001 - 13000', '13001 - 14000', '14001 - 15000', '15001 - 16000', '16001 - 17000', '17001 - 18000', '18001 - 19000', '19001 - 20000', '20001 - 21000', '21001 - 22000', '22001 - 23000', '23001 - 24000', '24001 - 25000', '25001 - 26000', '26001 - 27000', '27001 - 28000', '28001 - 29000', '29001 - 30000', 'Over 30000'],
                                             ['IOD', '-15+', '-10 to -14', '-5 to -9', '0 to -4',  '1 to 5','6+'],
                                             ['IOD', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
                                             ['IOD', '0', '1', '2', '3', '4'],
                                             ['IOD', '17 to 24', '25 to 29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59', '60-64', '65+'],
                                             ['IOD', '0', '1', '2', '3', '4'],
                                             [0,1,2,3,4,5],
                                             ['Any', 'I&1', 'I&2', 'I&3', 'I&F', 'I&P', 'I&S', 'IOD'],
                                             [0,1,2,3,4,5],
                                             ['N', 'Y'],
                                             [0,1,2,3,4,5,6,7,8,9], 
                                             ['No Conviction', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'],
                                             [0,1,2,3],#Missing? Can't mix int and strings... what to do?
                                             ['Not in Employment', 'Armed Services', 'Bar/Restaurant', 'Building & Construction', 'Driving/Motor Trade', 'Education/Research', 'Farming/Animals', 'Government/Public Services', 'Healthcare', 'Leisure/Beauty', 'Manual/Unskilled', 'Office/Professional', 'Other', 'Security Cash & Courier', 'Skilled', 'Sports/Entertainment'],
                                             ['Common Law', 'Divorced', 'Married', 'Partnered', 'Separated', 'Single', 'N/A'],
                                             ['Full (UK)', 'International', 'Provisional', 'Foreign', 'Restricted < 3 Years', 'Restricted >= 3 Years'],
                                             ['N/A', 'N', 'Y'],
                                             ['Employed', 'Employed (Temporary)','Financially Assisted', 'Household Duties', 'In Full Or Part Time Education', 'Independent Means', 'Unemployed', 'Own Company', 'Retired', 'Self Employed', 'Voluntary Work', 'Contractor', 'Missing'],
                                             [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119],
                                             ['Agriculture & Animals', 'Antique / Arts Trade', 'Armed Services', 'Aviation', 'Blue Collar', 'Business Professional', 'Cash Collector', 'Child Care', 'Construction', 'Courier', 'Craftsman', 'Design', 'Education and Training ', 'Electrical', 'Emergency Services', 'Engineering', 'Entertainment & Media', 'Finance & Insurance', 'Food', 'Government and Agencies', 'Healthcare', 'Holiday', 'Hotel Pub & Restaurant', 'Installation', 'IT', 'Leisure & Beauty', 'Maintenance & Repair', 'Manufacturing', 'Marketing', 'Mining and Fuel', 'Not in Employment', 'Office Administration', 'Other', 'Parks & Gardens', 'Photography', 'Property', 'Public Service', 'Publishing', 'Retail Food', 'Retail Sales', 'Science', 'Security', 'Social Services', 'Telecommunications', 'Tradesman', 'Transport', 'Missing'],
                                             [0,1,2,3,4,5,6,7,8,9],
                                             ['0 to 1', '1 to 2', '2 to 3', '3 to 4', '4 to 5', 'No Conviction'],
                                             ['1 to 2', '2 to 3', '3 to 4', '4 to 5', '0 to 1', 'No Claim'],
                                             ['1 to 2', '2 to 3', '3 to 4', '4 to 5', '0 to 1', 'No Claim'],
                                             ['0 to 1', '1 to 2', '2 to 3', '3 to 4', '4 to 5', 'No Claim'],
                                             [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100], 
                                             ['AJ-NB', 'AJ_RC', 'NB', 'RB', 'RC'],
                                             ['No Claim', '1', '2', '3', '4', '5'],
                                             ['No Claim', '1', '2', '3', '4', '5'],
                                             ['No Claim', '1', '2', '3', '4', '5'],
                                             ['No Claim', '1', '2', '3', '4', '5'],
                                             ['No Claim', '1', '2', '3', '4', '5'],
                                             ['No Claim', '1', '2', '3', '4', '5+'], 
                                             [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
                                             ['Proposer', 'Spouse', 'Common-Law Partner', 'Family', 'Company', 'Leased Vehicle', 'Other'],
                                             ['Proposer', 'Spouse', 'Common-Law Partner', 'Family', 'Company', 'Leased Vehicle', 'Other'],
                                             ['Under 1000', '1000 - 1999', '2000 - 2999', '3000 - 3999', '4000 - 4999', '5000 - 5999', '6000 - 6999', '7000 - 7999', '8000 - 8999', '9000 - 9999', '10000 - 10999', '11000 - 11999', '12000 - 12999', '13000 - 13999', '14000 - 14999', '15000 - 15999', '16000 - 16999', '17000 - 17999', '18000 - 18999', '19000 - 19999', '20000-20999', '21000-21999', '22000-22999', '23000-23999', '24000-24999', '25000-25999', '26000-26999', '27000-27999', '28000-28999', '29000-29999', '30000-30999', '31000-31999', '32000-32999', '33000-33999', '34000-34999', '35000-35999', '36000-36999', '37000-37999', '38000-38999', '39000-39999', '40000-40999', '41000-41999', '42000-42999', '43000-43999', '44000-44999', '45000-45999', '46000-46999', '47000-47999', '48000-48999', '49000-49999', '50000+'],
                                             ['0', '1', '2', '3', '4', '5', '6+', 'Default'],
                                             ['0 to 99', '100 to 199', '200+', 'Default'],
                                             ['0-74', '75-84','85-94', '95-104', '105-114', '115-124', '125-134', '135-144', '145+', 'Default'],
                                             ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', 'Default']])
vehicle_value_encoded_test = ordinal_encoder.fit_transform(Saga_NB_dropped_unnecessary_df_test[[
                            'AgePassedTest', 'Cover', 'Duration', 'Garaged', 'NCDAllowed',
                            'NCDDifference', 'RegistrationYear', 'VehicleAgeatPurchase', 
                            'YearsOwned', 'Class_of_Use', 'Annual_Mileage', 'Age_Difference',
                            'Add_Driving_Experience', 'Add_Other_Vehs_Owned', 'Add_Driver_Age',
                            'Add_Access_to_Other_Vehs', 'Access_to_Other_Vehs', 'PDR_Code',
                            'Other_Vehs_Owned', 'NCD_Protected', 'NCD_Earned', 'Most_Severe_Conviction',
                            'Main_UK_Residency', 'Main_Occupation', 'Main_Marital_Status', 
                            'Main_Licence_Type', 'Main_Full_Time_Employed', 'Main_Employment_Type',
                            'Main_Driving_Experience_Months', 'Main_Employer_Business', 'Main_Driving_Experience',
                            'Latest_Conviction', 'Latest_Fault_Accident_Claim','Latest_Non_Fault_Accident_Claim',
                            'Latest_Windscreen_Claim','Main_Driver_Age', 'Transaction_Type', 'Tot_Windscreen_Claims', 
                            'Tot_Vandalism_Claims', 'Tot_Theft_Claims', 'Tot_Non_Fault_Accident_Claims', 
                            'Tot_Fire_Claims', 'Tot_Fault_Accident_Claims', 'Vehicle_Age', 'Vehicle_Keeper', 
                            'Vehicle_Owner', 'Vehicle_Value', 'Saga_Years_with_Broker', 'SagaCais', 'SagaFactor', 'SagaQuoteLag']])

In [None]:
vehicle_value_encoded

In [None]:
vehicle_value_encoded_test

In [None]:
## Training Data 

Saga_NB_transformed_df[[
                        'AgePassedTest', 'Cover', 'Duration', 'Garaged', 'NCDAllowed',
                        'NCDDifference', 'RegistrationYear', 'VehicleAgeatPurchase', 
                        'YearsOwned', 'Class_of_Use', 'Annual_Mileage', 'Age_Difference',
                        'Add_Driving_Experience', 'Add_Other_Vehs_Owned', 'Add_Driver_Age',
                        'Add_Access_to_Other_Vehs', 'Access_to_Other_Vehs', 'PDR_Code',
                        'Other_Vehs_Owned', 'NCD_Protected', 'NCD_Earned', 'Most_Severe_Conviction',
                        'Main_UK_Residency', 'Main_Occupation', 'Main_Marital_Status', 
                        'Main_Licence_Type', 'Main_Full_Time_Employed', 'Main_Employment_Type',
                        'Main_Driving_Experience_Months', 'Main_Employer_Business', 'Main_Driving_Experience',
                        'Latest_Conviction', 'Latest_Fault_Accident_Claim','Latest_Non_Fault_Accident_Claim',
                        'Latest_Windscreen_Claim','Main_Driver_Age', 'Transaction_Type', 'Tot_Windscreen_Claims', 
                        'Tot_Vandalism_Claims', 'Tot_Theft_Claims', 'Tot_Non_Fault_Accident_Claims', 
                        'Tot_Fire_Claims', 'Tot_Fault_Accident_Claims', 'Vehicle_Age', 'Vehicle_Keeper', 
                        'Vehicle_Owner', 'Vehicle_Value', 'Saga_Years_with_Broker', 'SagaCais', 'SagaFactor', 'SagaQuoteLag']] = vehicle_value_encoded
Saga_NB_dropped_unnecessary_df[[
                                'AgePassedTest', 'Cover', 'Duration', 'Garaged', 'NCDAllowed',
                                'NCDDifference', 'RegistrationYear', 'VehicleAgeatPurchase', 
                                'YearsOwned', 'Class_of_Use', 'Annual_Mileage', 'Age_Difference',
                                'Add_Driving_Experience', 'Add_Other_Vehs_Owned', 'Add_Driver_Age',
                                'Add_Access_to_Other_Vehs', 'Access_to_Other_Vehs', 'PDR_Code',
                                'Other_Vehs_Owned', 'NCD_Protected', 'NCD_Earned', 'Most_Severe_Conviction',
                                'Main_UK_Residency', 'Main_Occupation', 'Main_Marital_Status', 
                                'Main_Licence_Type', 'Main_Full_Time_Employed', 'Main_Employment_Type',
                                'Main_Driving_Experience_Months', 'Main_Employer_Business', 'Main_Driving_Experience',
                                'Latest_Conviction', 'Latest_Fault_Accident_Claim','Latest_Non_Fault_Accident_Claim',
                                'Latest_Windscreen_Claim','Main_Driver_Age', 'Transaction_Type', 'Tot_Windscreen_Claims', 
                                'Tot_Vandalism_Claims', 'Tot_Theft_Claims', 'Tot_Non_Fault_Accident_Claims', 
                                'Tot_Fire_Claims', 'Tot_Fault_Accident_Claims', 'Vehicle_Age', 'Vehicle_Keeper', 
                                'Vehicle_Owner', 'Vehicle_Value', 'Saga_Years_with_Broker', 'SagaCais', 'SagaFactor', 'SagaQuoteLag']] = Saga_NB_transformed_df[[
                                                                                                                                                                'AgePassedTest', 'Cover', 'Duration', 'Garaged', 'NCDAllowed',
                                                                                                                                                                'NCDDifference', 'RegistrationYear', 'VehicleAgeatPurchase', 
                                                                                                                                                                'YearsOwned', 'Class_of_Use', 'Annual_Mileage', 'Age_Difference',
                                                                                                                                                                'Add_Driving_Experience', 'Add_Other_Vehs_Owned', 'Add_Driver_Age',
                                                                                                                                                                'Add_Access_to_Other_Vehs', 'Access_to_Other_Vehs', 'PDR_Code',
                                                                                                                                                                'Other_Vehs_Owned', 'NCD_Protected', 'NCD_Earned', 'Most_Severe_Conviction',
                                                                                                                                                                'Main_UK_Residency', 'Main_Occupation', 'Main_Marital_Status', 
                                                                                                                                                                'Main_Licence_Type', 'Main_Full_Time_Employed', 'Main_Employment_Type',
                                                                                                                                                                'Main_Driving_Experience_Months', 'Main_Employer_Business', 'Main_Driving_Experience',
                                                                                                                                                                'Latest_Conviction', 'Latest_Fault_Accident_Claim','Latest_Non_Fault_Accident_Claim',
                                                                                                                                                                'Latest_Windscreen_Claim','Main_Driver_Age', 'Transaction_Type', 'Tot_Windscreen_Claims', 
                                                                                                                                                                'Tot_Vandalism_Claims', 'Tot_Theft_Claims', 'Tot_Non_Fault_Accident_Claims', 
                                                                                                                                                                'Tot_Fire_Claims', 'Tot_Fault_Accident_Claims', 'Vehicle_Age', 'Vehicle_Keeper', 
                                                                                                                                                                'Vehicle_Owner', 'Vehicle_Value', 'Saga_Years_with_Broker', 'SagaCais', 'SagaFactor', 'SagaQuoteLag']]

In [None]:
## Testing Data 

Saga_NB_transformed_df_test[[
                        'AgePassedTest', 'Cover', 'Duration', 'Garaged', 'NCDAllowed',
                        'NCDDifference', 'RegistrationYear', 'VehicleAgeatPurchase', 
                        'YearsOwned', 'Class_of_Use', 'Annual_Mileage', 'Age_Difference',
                        'Add_Driving_Experience', 'Add_Other_Vehs_Owned', 'Add_Driver_Age',
                        'Add_Access_to_Other_Vehs', 'Access_to_Other_Vehs', 'PDR_Code',
                        'Other_Vehs_Owned', 'NCD_Protected', 'NCD_Earned', 'Most_Severe_Conviction',
                        'Main_UK_Residency', 'Main_Occupation', 'Main_Marital_Status', 
                        'Main_Licence_Type', 'Main_Full_Time_Employed', 'Main_Employment_Type',
                        'Main_Driving_Experience_Months', 'Main_Employer_Business', 'Main_Driving_Experience',
                        'Latest_Conviction', 'Latest_Fault_Accident_Claim','Latest_Non_Fault_Accident_Claim',
                        'Latest_Windscreen_Claim','Main_Driver_Age', 'Transaction_Type', 'Tot_Windscreen_Claims', 
                        'Tot_Vandalism_Claims', 'Tot_Theft_Claims', 'Tot_Non_Fault_Accident_Claims', 
                        'Tot_Fire_Claims', 'Tot_Fault_Accident_Claims', 'Vehicle_Age', 'Vehicle_Keeper', 
                        'Vehicle_Owner', 'Vehicle_Value', 'Saga_Years_with_Broker', 'SagaCais', 'SagaFactor', 'SagaQuoteLag']] = vehicle_value_encoded_test
Saga_NB_dropped_unnecessary_df_test[[
                                'AgePassedTest', 'Cover', 'Duration', 'Garaged', 'NCDAllowed',
                                'NCDDifference', 'RegistrationYear', 'VehicleAgeatPurchase', 
                                'YearsOwned', 'Class_of_Use', 'Annual_Mileage', 'Age_Difference',
                                'Add_Driving_Experience', 'Add_Other_Vehs_Owned', 'Add_Driver_Age',
                                'Add_Access_to_Other_Vehs', 'Access_to_Other_Vehs', 'PDR_Code',
                                'Other_Vehs_Owned', 'NCD_Protected', 'NCD_Earned', 'Most_Severe_Conviction',
                                'Main_UK_Residency', 'Main_Occupation', 'Main_Marital_Status', 
                                'Main_Licence_Type', 'Main_Full_Time_Employed', 'Main_Employment_Type',
                                'Main_Driving_Experience_Months', 'Main_Employer_Business', 'Main_Driving_Experience',
                                'Latest_Conviction', 'Latest_Fault_Accident_Claim','Latest_Non_Fault_Accident_Claim',
                                'Latest_Windscreen_Claim','Main_Driver_Age', 'Transaction_Type', 'Tot_Windscreen_Claims', 
                                'Tot_Vandalism_Claims', 'Tot_Theft_Claims', 'Tot_Non_Fault_Accident_Claims', 
                                'Tot_Fire_Claims', 'Tot_Fault_Accident_Claims', 'Vehicle_Age', 'Vehicle_Keeper', 
                                'Vehicle_Owner', 'Vehicle_Value', 'Saga_Years_with_Broker', 'SagaCais', 'SagaFactor', 'SagaQuoteLag']] = Saga_NB_transformed_df_test[[
                                                                                                                                                                'AgePassedTest', 'Cover', 'Duration', 'Garaged', 'NCDAllowed',
                                                                                                                                                                'NCDDifference', 'RegistrationYear', 'VehicleAgeatPurchase', 
                                                                                                                                                                'YearsOwned', 'Class_of_Use', 'Annual_Mileage', 'Age_Difference',
                                                                                                                                                                'Add_Driving_Experience', 'Add_Other_Vehs_Owned', 'Add_Driver_Age',
                                                                                                                                                                'Add_Access_to_Other_Vehs', 'Access_to_Other_Vehs', 'PDR_Code',
                                                                                                                                                                'Other_Vehs_Owned', 'NCD_Protected', 'NCD_Earned', 'Most_Severe_Conviction',
                                                                                                                                                                'Main_UK_Residency', 'Main_Occupation', 'Main_Marital_Status', 
                                                                                                                                                                'Main_Licence_Type', 'Main_Full_Time_Employed', 'Main_Employment_Type',
                                                                                                                                                                'Main_Driving_Experience_Months', 'Main_Employer_Business', 'Main_Driving_Experience',
                                                                                                                                                                'Latest_Conviction', 'Latest_Fault_Accident_Claim','Latest_Non_Fault_Accident_Claim',
                                                                                                                                                                'Latest_Windscreen_Claim','Main_Driver_Age', 'Transaction_Type', 'Tot_Windscreen_Claims', 
                                                                                                                                                                'Tot_Vandalism_Claims', 'Tot_Theft_Claims', 'Tot_Non_Fault_Accident_Claims', 
                                                                                                                                                                'Tot_Fire_Claims', 'Tot_Fault_Accident_Claims', 'Vehicle_Age', 'Vehicle_Keeper', 
                                                                                                                                                                'Vehicle_Owner', 'Vehicle_Value', 'Saga_Years_with_Broker', 'SagaCais', 'SagaFactor', 'SagaQuoteLag']]

In [None]:
## Encoded and Banded Training Data 
Saga_NB_dropped_unnecessary_df

In [None]:
## Encoded and Banded Testing Data 
Saga_NB_dropped_unnecessary_df_test

## Scaling the Data 

In [None]:
## Scaling Training Data 
from sklearn.preprocessing import MinMaxScaler
trans = MinMaxScaler()
Saga_NB_scaled_train = pd.DataFrame(trans.fit_transform(Saga_NB_dropped_unnecessary_df), index=Saga_NB_dropped_unnecessary_df.index, columns=Saga_NB_dropped_unnecessary_df.columns)
Saga_NB_scaled_train.head()

In [None]:
## Scaling Testing Data 
from sklearn.preprocessing import MinMaxScaler
trans = MinMaxScaler()
Saga_NB_scaled_test = pd.DataFrame(trans.fit_transform(Saga_NB_dropped_unnecessary_df_test), index=Saga_NB_dropped_unnecessary_df_test.index, columns=Saga_NB_dropped_unnecessary_df_test.columns)
Saga_NB_scaled_test.head()

## Splitting Training and Testing Data 

In [None]:
X_train = Saga_NB_scaled_train.drop(['cancelled'], axis=1)
y_train = Saga_NB_scaled_train['cancelled'].copy()
X_test = Saga_NB_scaled_test.drop(['cancelled'], axis=1)
y_test = Saga_NB_scaled_test['cancelled'].copy()

feature_names = [val for val in X_train]

In [None]:
Saga_NB_scaled_train.columns

In [None]:
factors_emblem_model = "'AgePassedTest', 'YearsOwned', 'Class_of_Use', 'Annual_Mileage',  'Age_Difference', 'Add_Driver_Age', 'Access_to_Other_Vehs', 'PDR_Code', 'NCD_Protected', 'Main_Full_Time_Employed', 'Latest_Fault_Accident_Claim',  'Main_Driver_Age', 'Vehicle_Age',  'Vehicle_Value', 'Saga_Years_with_Broker',  'SagaCais',  'SagaFactor'"
factors_emblem_model.replace(",", " +").replace("'","")

In [None]:
##'C(AgePassedTest) + C(YearsOwned) + C(Class_of_Use) + C(Annual_Mileage) +  C(Age_Difference) + C(Add_Driver_Age) + C(Access_to_Other_Vehs) + C(PDR_Code) + C(NCD_Protected) + C(Main_Full_Time_Employed) + C(Latest_Fault_Accident_Claim) +  C(Main_Driver_Age) + C(Vehicle_Age) +  C(Vehicle_Value) + C(Saga_Years_with_Broker) +  C(SagaCais) +  C(SagaFactor) + C(SagaCais*AgePassedTest) + C(Vehicle_Age*YearsOwned) + C(Vehicle_Age*Age_Difference) + C(Add_Driver_Age*Main_Driver_Age) + C(Main_Full_Time_Employed*Main_Driver_Age)'

In [None]:
## Defining all possible levels for features 
all_AgePassedTest = sorted(set(Saga_NB_scaled_train["AgePassedTest"]).union(set(Saga_NB_scaled_test["AgePassedTest"])))
all_YearsOwned = sorted(set(Saga_NB_scaled_train["YearsOwned"]).union(set(Saga_NB_scaled_test["YearsOwned"])))
all_Class_of_Use = sorted(set(Saga_NB_scaled_train["Class_of_Use"]).union(set(Saga_NB_scaled_test["Class_of_Use"])))
all_Annual_Mileage = sorted(set(Saga_NB_scaled_train["Annual_Mileage"]).union(set(Saga_NB_scaled_test["Annual_Mileage"])))
all_Age_Difference = sorted(set(Saga_NB_scaled_train["Age_Difference"]).union(set(Saga_NB_scaled_test["Age_Difference"])))
all_Add_Driver_Age = sorted(set(Saga_NB_scaled_train["Add_Driver_Age"]).union(set(Saga_NB_scaled_test["Add_Driver_Age"])))
all_Access_to_Other_Vehs = sorted(set(Saga_NB_scaled_train["Access_to_Other_Vehs"]).union(set(Saga_NB_scaled_test["Access_to_Other_Vehs"])))
all_PDR_Code = sorted(set(Saga_NB_scaled_train["PDR_Code"]).union(set(Saga_NB_scaled_test["PDR_Code"])))
all_NCD_Protected = sorted(set(Saga_NB_scaled_train["NCD_Protected"]).union(set(Saga_NB_scaled_test["NCD_Protected"])))
all_Main_Full_Time_Employed = sorted(set(Saga_NB_scaled_train["Main_Full_Time_Employed"]).union(set(Saga_NB_scaled_test["Main_Full_Time_Employed"])))
all_Latest_Fault_Accident_Claim = sorted(set(Saga_NB_scaled_train["Latest_Fault_Accident_Claim"]).union(set(Saga_NB_scaled_test["Latest_Fault_Accident_Claim"])))
all_Main_Driver_Age = sorted(set(Saga_NB_scaled_train["Main_Driver_Age"]).union(set(Saga_NB_scaled_test["Main_Driver_Age"])))
all_Vehicle_Age = sorted(set(Saga_NB_scaled_train["Vehicle_Age"]).union(set(Saga_NB_scaled_test["Vehicle_Age"])))
all_Vehicle_Value = sorted(set(Saga_NB_scaled_train["Vehicle_Value"]).union(set(Saga_NB_scaled_test["Vehicle_Value"])))
all_Saga_Years_with_Broker = sorted(set(Saga_NB_scaled_train["Saga_Years_with_Broker"]).union(set(Saga_NB_scaled_test["Saga_Years_with_Broker"])))
all_SagaCais = sorted(set(Saga_NB_scaled_train["SagaCais"]).union(set(Saga_NB_scaled_test["SagaCais"])))
all_SagaFactor = sorted(set(Saga_NB_scaled_train["SagaFactor"]).union(set(Saga_NB_scaled_test["SagaFactor"])))

In [None]:
mod_banded_emblem_no_interactions = smf.glm(formula = "cancelled ~ (C(AgePassedTest) + C(YearsOwned) + C(Class_of_Use) + C(Annual_Mileage) +  C(Age_Difference) + C(Add_Driver_Age) + C(Access_to_Other_Vehs) + C(PDR_Code) + C(NCD_Protected) + C(Main_Full_Time_Employed) + C(Latest_Fault_Accident_Claim) +  C(Main_Driver_Age) + C(Vehicle_Age) +  C(Vehicle_Value) + C(Saga_Years_with_Broker) +  C(SagaCais) +  C(SagaFactor))", data =Saga_NB_scaled_train , family=sm.families.Binomial()).fit()
mod_banded_emblem_no_interactions.summary()

In [None]:
from statsmodels.stats.outliers_influence import variance_inflation_factor
X =  Saga_NB_scaled_train[['AgePassedTest', 'YearsOwned', 'Class_of_Use', 'Annual_Mileage',  'Age_Difference', 'Add_Driver_Age', 'Access_to_Other_Vehs', 'PDR_Code', 'NCD_Protected', 'Main_Full_Time_Employed', 'Latest_Fault_Accident_Claim',  'Main_Driver_Age', 'Vehicle_Age',  'Vehicle_Value', 'Saga_Years_with_Broker',  'SagaCais',  'SagaFactor']]
X['Intercept'] = 1

vif = pd.DataFrame()
vif['Features'] = X.columns
vif['VIF'] = [variance_inflation_factor(X.values, i) for i in range (X.shape[1])]
vif.sort_values(by="VIF", ascending=False) 

In [None]:
## Removing Add_Driver_Age
mod_banded_emblem_no_interactions_r1 = smf.glm(formula = "cancelled ~ (C(AgePassedTest, levels=all_AgePassedTest) + C(YearsOwned, levels=all_YearsOwned) + C(Class_of_Use, levels=all_Class_of_Use) + C(Annual_Mileage, levels=all_Annual_Mileage) +  C(Age_Difference, levels=all_Age_Difference) + C(Access_to_Other_Vehs, levels=all_Access_to_Other_Vehs) + C(PDR_Code, levels=all_PDR_Code) + C(NCD_Protected, levels=all_NCD_Protected) + C(Main_Full_Time_Employed, levels=all_Main_Full_Time_Employed) + C(Latest_Fault_Accident_Claim, levels=all_Latest_Fault_Accident_Claim) +  C(Main_Driver_Age, levels=all_Main_Driver_Age) + C(Vehicle_Age, levels=all_Vehicle_Age) +  C(Vehicle_Value, levels=all_Vehicle_Value) + C(Saga_Years_with_Broker, levels=all_Saga_Years_with_Broker) +  C(SagaCais, levels=all_SagaCais) +  C(SagaFactor, levels=all_SagaFactor))", data =Saga_NB_scaled_train , family=sm.families.Binomial()).fit()
mod_banded_emblem_no_interactions_r1.summary()

In [None]:
from statsmodels.stats.outliers_influence import variance_inflation_factor
X =  Saga_NB_scaled_train[['AgePassedTest', 'YearsOwned', 'Class_of_Use', 'Annual_Mileage',  'Age_Difference', 'Access_to_Other_Vehs', 'PDR_Code', 'NCD_Protected', 'Main_Full_Time_Employed', 'Latest_Fault_Accident_Claim',  'Main_Driver_Age', 'Vehicle_Age',  'Vehicle_Value', 'Saga_Years_with_Broker',  'SagaCais',  'SagaFactor']]
X['Intercept'] = 1

vif = pd.DataFrame()
vif['Features'] = X.columns
vif['VIF'] = [variance_inflation_factor(X.values, i) for i in range (X.shape[1])]
vif.sort_values(by="VIF", ascending=False) 

In [None]:
from sklearn import metrics
y_predict = mod_banded_emblem_no_interactions_r1.predict(X_test)
roc_auc = metrics.roc_auc_score(y_test, y_predict)
Gini = 2*roc_auc - 1
print(Gini)

In [None]:
from sklearn import metrics
y_predict = mod_banded_emblem_no_interactions_r1.predict(X_test)
roc_auc = metrics.roc_auc_score(y_test, y_predict)
Gini = 2*roc_auc - 1
print(Gini)


import matplotlib.pyplot as plt
import scipy.interpolate
import scipy.integrate

def gini(actual, pred):
    assert (len(actual) == len(pred))
    all = np.asarray(np.c_[actual, pred, np.arange(len(actual))], dtype=np.float)
    all = all[np.lexsort((all[:, 2], -1 * all[:, 1]))]
    totalLosses = all[:, 0].sum()
    giniSum = all[:, 0].cumsum().sum() / totalLosses

    giniSum -= (len(actual) + 1) / 2.
    return giniSum / len(actual)


def gini_normalized(actual, pred):
    return gini(actual, pred) / gini(actual, actual)

gini_predictions = gini(y_test, y_predict)
gini_max = gini(y_test, y_test)
ngini= gini_normalized(y_test, y_predict)
print('Gini: %.3f, Max. Gini: %.3f, Normalized Gini: %.3f' % (gini_predictions, gini_max, ngini))

In [None]:
auc_Saga_NB = metrics.roc_auc_score(y_test, y_predict)
Gini = (2*auc_Saga_NB) - 1
print(Gini)

## Adding Interactions from Fitted Emblem Model

- SagaCais and AgePassedTest 
- Vehicle Age and YearsOwned 
- Vehicle Age and Age Difference 
- Add Driver Age and Main Driver Age 
- Main Full Time Employment and Main Driver Age 