## EDA and stats on Insurance History Data

In [None]:
import sys
import os


notebook_dir = os.getcwd()  # Get current working directory (notebooks folder)
project_dir = os.path.abspath(os.path.join(notebook_dir, '..'))  # Go up one level
src_dir = os.path.join(project_dir, 'src')  # Path to scripts
sys.path.append(src_dir)

In [None]:
from IPython.display import display
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from load_data import DataLoader
from eda_data import DataCleaner
from visualize_data import Visualizer

## Importing Data

In [None]:
#Import data
file_path = "../data/raw/MachineLearningRating_v3.txt"
data = DataLoader()
df = data.load_data(file_path)

## Data cleaning

In [None]:
cleaner = DataCleaner()
cleaned_df = cleaner.clean_data(df)

## Data Visualization

In [None]:
data = Visualizer(cleaned_df)

## Univariate Analysis

In [None]:
data.univariate_analysis()

## Bivariate and Multivariate Analysis

In [None]:
data.bivariate_multivariate_analysis()

## Outlier Detection

In [None]:
data.outlier_detection()

## Loss Ratio Visualization

In [None]:
data.calc_loss_ratio()

## Plot correlations

In [None]:
data.plot_correlations()

## Hypothesis Testing

In [None]:
df['has_claim'] = df['claim_amount'] > 0
df['claim_frequency'] = df.groupby('PolicyID')['has_claim'].transform('max')
df['claim_severity'] = df['claim_amount'].where(df['claim_amount'] > 0, pd.NA)
df['margin'] = df['TotalPremium'] - df['claim_amount']
