<a href="https://colab.research.google.com/github/projjal1/Neural_Networks_Projects/blob/master/Condition_and_Insurance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Code for Competition on Hackerrank about prediction of car condition while claiming insurance and the amount of insurance to be given. 

Challenge Hosted at https://www.hackerearth.com/challenges/competitive/hackerearth-machine-learning-challenge-vehicle-insurance-claim/

In [1]:
import pandas as pd
import tensorflow as tf
import os
import cv2

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!unzip '/content/drive/MyDrive/Colab Notebooks/dataset.zip'

In [3]:
#Importing the train dataset csv
dataset_csv=pd.read_csv("train.csv")

In [4]:
dataset_csv.head()

Unnamed: 0,Image_path,Insurance_company,Cost_of_vehicle,Min_coverage,Expiry_date,Max_coverage,Condition,Amount
0,img_4513976.jpg,BQ,41500.0,1037.5,2026-12-03,36142.68,0,0.0
1,img_7764995.jpg,BQ,50700.0,1267.5,2025-07-10,12753.0,1,6194.0
2,img_451308.jpg,A,49500.0,1237.5,2022-08-11,43102.68,0,0.0
3,img_7768372.jpg,A,33500.0,837.5,2022-08-02,8453.0,1,7699.0
4,img_7765274.jpg,AC,27600.0,690.0,2026-05-01,6978.0,1,8849.0


In [5]:
#Filling missing values
for col in ["Cost_of_vehicle","Min_coverage","Max_coverage","Amount"]:
    dataset_csv[col].fillna(dataset_csv[col].mean(),inplace=True)

In [6]:
#Replace 0.0 in amount with average of all amounts
dataset_csv["Amount"].replace(0.0,dataset_csv["Amount"].mean(),inplace=True)

In [7]:
#Convert expiry date to datetime
dataset_csv["Expiry_date"]=pd.to_datetime(dataset_csv["Expiry_date"])
dataset_csv["Expiry_date"]=dataset_csv["Expiry_date"].values.astype(int)

In [8]:
#Convert Insurance_company to Label Encoding 
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
dataset_csv["Insurance_company"]=le.fit_transform(dataset_csv["Insurance_company"])

In [9]:
#Dataset after preprocessing 
dataset_csv.head()

Unnamed: 0,Image_path,Insurance_company,Cost_of_vehicle,Min_coverage,Expiry_date,Max_coverage,Condition,Amount
0,img_4513976.jpg,6,41500.0,1037.5,1796256000000000000,36142.68,0,4117.144092
1,img_7764995.jpg,6,50700.0,1267.5,1752105600000000000,12753.0,1,6194.0
2,img_451308.jpg,0,49500.0,1237.5,1660176000000000000,43102.68,0,4117.144092
3,img_7768372.jpg,0,33500.0,837.5,1659398400000000000,8453.0,1,7699.0
4,img_7765274.jpg,2,27600.0,690.0,1777593600000000000,6978.0,1,8849.0


In [10]:
#Lets extract Image_path and drop the column
target_img_path=dataset_csv["Image_path"]
dataset_csv.drop(["Image_path"],axis=1,inplace=True)

In [11]:
#Lets extract target condition 
target_img_condition=dataset_csv["Condition"]

In [12]:
#Lets extract Image_path and drop the column
target_amt=dataset_csv["Amount"]
dataset_csv.drop(["Amount"],axis=1,inplace=True)

In [13]:
#dataset after extraction and final for training before scaling
dataset_csv.head()

Unnamed: 0,Insurance_company,Cost_of_vehicle,Min_coverage,Expiry_date,Max_coverage,Condition
0,6,41500.0,1037.5,1796256000000000000,36142.68,0
1,6,50700.0,1267.5,1752105600000000000,12753.0,1
2,0,49500.0,1237.5,1660176000000000000,43102.68,0
3,0,33500.0,837.5,1659398400000000000,8453.0,1
4,2,27600.0,690.0,1777593600000000000,6978.0,1


In [14]:
#Now let's scale the data
from sklearn.preprocessing import RobustScaler 
scale=RobustScaler()
scale.fit(dataset_csv)
#Scale and transform
dataset_csv=scale.transform(dataset_csv)

In [49]:
#Model training 
from sklearn.ensemble import RandomForestRegressor
model_amt=RandomForestRegressor(n_estimators=200,max_depth=30)
model_amt.fit(dataset_csv,target_amt)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=30, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=200, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [None]:
img_dataset=[]
ctr=1

#Now create dataset for image data 
for x in target_img_path.values:
    print("Operating image:",ctr)
    ctr+=1
    img=cv2.imread("trainImages/"+x)
    img=cv2.resize(img,(150,150))
    #Scale the image
    img=img/255.0
    img_dataset.append(img)

In [32]:
#Now we create model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#Model
model = Sequential([
    Conv2D(16, 3, padding='same', activation='relu', input_shape=(150,150,3)),
    MaxPooling2D(),
    Conv2D(32, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Conv2D(64, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(1024, activation='relu'),
    Dense(2, activation='sigmoid')
])

In [33]:
#Compiling the model
import tensorflow as tf

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

In [34]:
#Getting the shape of our image dataset
import numpy as np
img_dataset=np.array(img_dataset)
img_dataset.shape

(1399, 150, 150, 3)

In [35]:
target_cond=np.array(target_img_condition)
target_cond.shape

(1399,)

In [36]:
#Training our model with 10 epochs
model.fit(img_dataset,target_cond,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f69fbefb210>

In [56]:
#Read the test dataset 
dataset_test=pd.read_csv("test.csv")
dataset_test.head()

Unnamed: 0,Image_path,Insurance_company,Cost_of_vehicle,Min_coverage,Expiry_date,Max_coverage
0,img_4538519.jpg,B,23600,590.0,2025-04-12,5978.0
1,img_7766002.jpg,C,28300,707.5,2028-08-24,7153.0
2,img_4637390.jpg,AC,43700,1092.5,2023-11-28,11003.0
3,img_4516108.jpg,BB,46100,1152.5,2028-02-04,11603.0
4,img_4517008.jpg,BB,40700,1017.5,2022-01-03,10253.0


In [38]:
#Creating dataset for test data on images
img_test_dataset=[]
ctr=1

#Now create dataset for image data 
for x in dataset_test["Image_path"].values:
    ctr+=1
    img=cv2.imread("testImages/"+x)
    img=cv2.resize(img,(150,150))
    #Scale the image
    img=img/255.0
    img_test_dataset.append(img)

img_test_dataset=np.array(img_dataset)
print("Shape of test image data:",img_test_dataset.shape)

Shape of test image data: (1399, 150, 150, 3)


In [39]:
#Lets make predictions for our image data 
predictions=model.predict(img_test_dataset)

In [43]:
conditions=[]
for each in predictions:
  conditions.append(np.argmax(each))

In [57]:
path=dataset_test["Image_path"]
dataset_test.drop(["Image_path"],axis=1,inplace=True)

In [62]:
dataset_test["Condition"]=pd.Series(conditions)

In [63]:
dataset_test

Unnamed: 0,Insurance_company,Cost_of_vehicle,Min_coverage,Expiry_date,Max_coverage,Condition
0,B,23600,590.0,2025-04-12,5978.00,0
1,C,28300,707.5,2028-08-24,7153.00,1
2,AC,43700,1092.5,2023-11-28,11003.00,0
3,BB,46100,1152.5,2028-02-04,11603.00,1
4,BB,40700,1017.5,2022-01-03,10253.00,1
...,...,...,...,...,...,...
595,B,30900,772.5,2024-10-23,7803.00,1
596,O,51300,1282.5,2025-02-21,12903.00,1
597,BQ,27000,675.0,2023-07-13,23527.68,1
598,AA,42600,1065.0,2024-05-05,10728.00,1


In [64]:
#Convert expiry date to datetime
dataset_test["Expiry_date"]=pd.to_datetime(dataset_test["Expiry_date"])
dataset_test["Expiry_date"]=dataset_test["Expiry_date"].values.astype(int)

In [65]:
#Convert Insurance_company to Label Encoding 
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
dataset_test["Insurance_company"]=le.fit_transform(dataset_test["Insurance_company"])

In [66]:
#Scaling dataset using scaler previuosly used for scaling train dataset
dataset_test=scale.transform(dataset_test)

In [None]:
#Predicitions on insurance amount
insurance=model_amt.predict(dataset_test)
insurance

In [75]:
#Result 
df=pd.DataFrame()
df["Image_path"]=path
df["Condition"]=pd.Series(conditions)
df["Amount"]=pd.Series(insurance)

df.to_csv("2submision.csv",index=False)