# Travel Insurance Prediction

In [None]:
#import library
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

## Data Extraction

In [None]:
travel = pd.read_csv('../input/travel-insurance-prediction-data/TravelInsurancePrediction.csv')

In [None]:
travel.shape

In [None]:
travel.head()

In [None]:
travel.describe()

In [None]:
travel.info()

In [None]:
#handling missing value
travel.isnull().sum()

In [None]:
#heatmap correlation
plt.figure(figsize = (10,6))
sns.heatmap(travel.corr(), annot = True, cmap = 'OrRd')

## Selection Data

In [None]:
df = travel[['FamilyMembers', 'AnnualIncome', 'TravelInsurance']]
df.head()

## Transform Data

In [None]:
df['TravelInsurance'].value_counts()

In [None]:
df.TravelInsurance[df.TravelInsurance == 0] = "Not Bought"
df.TravelInsurance[df.TravelInsurance == 1] = "Bought"
df.head()

## Decision Tree Model

In [None]:
#split data
X = df.drop('TravelInsurance', axis = 1)
y = df['TravelInsurance']
feature = ['FamilyMembers', 'AnnualIncome', 'TravelInsurance']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
dtc = DecisionTreeClassifier(max_depth = 2)
dtc.fit(X_train, y_train)
print(dtc)

In [None]:
#prediction
y_pred = dtc.predict(X_test)
print(y_pred)

In [None]:
#accuracy score
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy Score : ', accuracy)
print('Percentage : ', int(accuracy * 100), '%')

In [None]:
#confusion matrix
matrix = pd.crosstab(y_test, y_pred, rownames = ['Actual'], colnames = ['Predicted'])
print(matrix)

In [None]:
#classification report
report = classification_report(y_test, y_pred)
print(report)

In [None]:
#plotting model
fig = plt.figure(figsize = (20,10))
_= tree.plot_tree(dtc, feature_names = feature, class_names = y, filled = True, rounded = True)

In [None]:
travel['Employment Type'].value_counts()

In [None]:
employment_insurance = travel.groupby(['Employment Type', 'TravelInsurance']).size().reset_index(name = 'Count')
print(employment_insurance)

In [None]:
plt.figure(figsize = (10,6))
sns.barplot(x = 'Employment Type', y = 'Count', hue = 'TravelInsurance', data = employment_insurance)
plt.title("Employment Type ~ Travel Insurance")
plt.show()

In [None]:
plt.figure(figsize = (5,6))
insurance = [np.count_nonzero(df['TravelInsurance'] == 'Not Bought'),
            np.count_nonzero(df['TravelInsurance'] == 'Bought')]
activities = ['Not Bought', 'Bought']
plt.pie(insurance, labels = activities, radius = 1.3, startangle = 50, autopct = '%.1f%%', shadow = True)
plt.title("Travel Insurance")
plt.show()

In [None]:
sns.displot(data = df, x = 'FamilyMembers', hue = 'TravelInsurance', kind = 'kde')
plt.title("Family Members ~ Travel Insurance")

In [None]:
sns.displot(data = df, x = 'AnnualIncome', hue = 'TravelInsurance', kind = 'kde')
plt.title("Annual Income ~ Travel Insurance")