# Life Insurance Customer Churn Analysis


In [48]:
import pandas as pd

In [49]:
# Load the dataset
df = pd.read_csv('customer_churn_dataset.csv')

In [50]:
# Examining the data to make sure everything loaded in as expected
df.head()

Unnamed: 0.1,Unnamed: 0,Customer Name,Customer_Address,Company Name,Claim Reason,Data confidentiality,Claim Amount,Category Premium,Premium/Amount Ratio,Claim Request output,BMI,Churn
0,0,Christine Payne,"7627 Anderson Rest Apt. 265,Lake Heather, DC 3...","Williams, Henderson and Perez",Travel,Low,377,4794,0.07864,No,21,Yes
1,1,Tony Fernandez,"3953 Cindy Brook Apt. 147,East Lindatown, TN 4...",Moore-Goodwin,Medical,High,1440,14390,0.100069,No,24,Yes
2,2,Christopher Kim,"8693 Walters Mountains,South Tony, TX 88407",Smith-Holmes,Phone,Medium,256,1875,0.136533,No,18,Yes
3,3,Nicole Allen,"56926 Webster Coves,Shawnmouth, NV 04853",Harrell-Perez,Phone,Medium,233,1875,0.124267,No,24,Yes
4,4,Linda Cruz,"489 Thomas Forges Apt. 305,Jesseton, GA 36765","Simpson, Kramer and Hughes",Phone,Medium,239,1875,0.127467,No,21,Yes


In [51]:
# Checking data types and # of null values in dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200000 entries, 0 to 199999
Data columns (total 12 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   Unnamed: 0            200000 non-null  int64  
 1   Customer Name         200000 non-null  object 
 2   Customer_Address      200000 non-null  object 
 3   Company Name          200000 non-null  object 
 4   Claim Reason          200000 non-null  object 
 5   Data confidentiality  200000 non-null  object 
 6   Claim Amount          200000 non-null  int64  
 7   Category Premium      200000 non-null  int64  
 8   Premium/Amount Ratio  200000 non-null  float64
 9   Claim Request output  200000 non-null  object 
 10  BMI                   200000 non-null  int64  
 11  Churn                 200000 non-null  object 
dtypes: float64(1), int64(4), object(7)
memory usage: 18.3+ MB


In [52]:
# Checking for any other missing values in the dataset
print(df.isin(['', 'Unknown', 'N/A']).sum())

Unnamed: 0              0
Customer Name           0
Customer_Address        0
Company Name            0
Claim Reason            0
Data confidentiality    0
Claim Amount            0
Category Premium        0
Premium/Amount Ratio    0
Claim Request output    0
BMI                     0
Churn                   0
dtype: int64


In [53]:
# Dropping the Unnamed column
df.drop(columns=['Unnamed: 0'], inplace=True)

In [54]:
# Re-typing variables
df['Claim Reason'] = df['Claim Reason'].astype('category')
df['Data confidentiality'] = df['Data confidentiality'].astype('category')
df['Claim Request output'] = df['Claim Request output'].map({'Yes': 1, 'No': 0})
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

In [55]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200000 entries, 0 to 199999
Data columns (total 11 columns):
 #   Column                Non-Null Count   Dtype   
---  ------                --------------   -----   
 0   Customer Name         200000 non-null  object  
 1   Customer_Address      200000 non-null  object  
 2   Company Name          200000 non-null  object  
 3   Claim Reason          200000 non-null  category
 4   Data confidentiality  200000 non-null  category
 5   Claim Amount          200000 non-null  int64   
 6   Category Premium      200000 non-null  int64   
 7   Premium/Amount Ratio  200000 non-null  float64 
 8   Claim Request output  200000 non-null  int64   
 9   BMI                   200000 non-null  int64   
 10  Churn                 200000 non-null  int64   
dtypes: category(2), float64(1), int64(5), object(3)
memory usage: 14.1+ MB


In [56]:
# Reviewing high-level overview of the quantitative data
df.describe()

Unnamed: 0,Claim Amount,Category Premium,Premium/Amount Ratio,Claim Request output,BMI,Churn
count,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0
mean,1120.47884,8963.783895,0.125024,0.03503,23.007205,0.63636
std,796.660796,6114.737202,0.034742,0.183856,3.164976,0.481048
min,1.0,399.0,0.002506,0.0,18.0,0.0
25%,245.0,1875.0,0.106741,0.0,20.0,0.0
50%,1390.0,14390.0,0.125122,0.0,23.0,1.0
75%,1844.0,14390.0,0.143155,0.0,26.0,1.0
max,2299.0,14390.0,0.24812,1.0,28.0,1.0
