<a href="https://colab.research.google.com/github/kalvinithi/Working-Problems/blob/main/Premium_Pricing_using_Poisson_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
install.packages("dplyr")
library(dplyr)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [4]:
# Set seed for reproducibility
set.seed(123)

In [5]:
# Simulate dataset
n <- 1000  # Number of customers
age <- sample(18:70, n, replace = TRUE)
gender <- sample(c("Male", "Female"), n, replace = TRUE)
vehicle_type <- sample(c("Sedan", "SUV", "Truck"), n, replace = TRUE)
claims <- rpois(n, lambda = 0.2 + 0.01 * age + ifelse(vehicle_type == "Truck", 0.1, 0))

In [6]:
# Combine into a data frame
insurance_data <- data.frame(
  Age = age,
  Gender = gender,
  Vehicle_Type = vehicle_type,
  Claims = claims
)

In [7]:
# Convert categorical variables to factors
insurance_data$Gender <- as.factor(insurance_data$Gender)
insurance_data$Vehicle_Type <- as.factor(insurance_data$Vehicle_Type)

In [8]:
# Inspect the data
head(insurance_data)

Unnamed: 0_level_0,Age,Gender,Vehicle_Type,Claims
Unnamed: 0_level_1,<int>,<fct>,<fct>,<int>
1,48,Male,SUV,1
2,32,Male,Sedan,0
3,68,Male,Sedan,2
4,31,Female,Truck,0
5,20,Male,Truck,0
6,59,Male,Sedan,0


In [9]:
# Fit a Poisson regression model
poisson_model <- glm(Claims ~ Age + Gender + Vehicle_Type,
                     family = poisson(link = "log"),
                     data = insurance_data)

In [10]:
# Summary of the model
summary(poisson_model)


Call:
glm(formula = Claims ~ Age + Gender + Vehicle_Type, family = poisson(link = "log"), 
    data = insurance_data)

Coefficients:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)       -1.270422   0.148767  -8.540  < 2e-16 ***
Age                0.016278   0.002599   6.263 3.77e-10 ***
GenderMale         0.053664   0.077766   0.690   0.4902    
Vehicle_TypeSUV    0.080727   0.099271   0.813   0.4161    
Vehicle_TypeTruck  0.184078   0.093246   1.974   0.0484 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 1118.8  on 999  degrees of freedom
Residual deviance: 1074.6  on 995  degrees of freedom
AIC: 2142.3

Number of Fisher Scoring iterations: 5


In [11]:
# Predict the expected number of claims
insurance_data$Predicted_Claims <- predict(poisson_model, type = "response")

In [12]:
# Calculate the premium based on the predicted number of claims
# Here, we multiply by a base rate to determine the premium
base_rate <- 100
insurance_data$Premium <- base_rate * insurance_data$Predicted_Claims

In [13]:
# Display the final dataset with predicted claims and premium
head(insurance_data)

Unnamed: 0_level_0,Age,Gender,Vehicle_Type,Claims,Predicted_Claims,Premium
Unnamed: 0_level_1,<int>,<fct>,<fct>,<int>,<dbl>,<dbl>
1,48,Male,SUV,1,0.7013897,70.13897
2,32,Male,Sedan,0,0.4986427,49.86427
3,68,Male,Sedan,2,0.8959588,89.59588
4,31,Female,Truck,0,0.5589312,55.89312
5,20,Male,Truck,0,0.4930611,49.30611
6,59,Male,Sedan,0,0.7738618,77.38618


In [14]:
# Summary of average premiums by vehicle type
insurance_data %>%
  group_by(Vehicle_Type) %>%
  summarize(Average_Premium = mean(Premium))

Vehicle_Type,Average_Premium
<fct>,<dbl>
Sedan,61.42433
SUV,64.72492
Truck,73.16384
