In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Loading required packages

In [None]:
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn import metrics 
from sklearn import preprocessing
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from keras.models import Model
from keras.optimizers import Adam
import cv2
from keras.preprocessing.image import img_to_array
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.layers import Dense,GlobalAveragePooling2D
from keras.applications import MobileNet
from keras.applications.mobilenet import preprocess_input
from keras.optimizers import Adam

## Loading training data

In [None]:
df = pd.read_csv('../input/fast-furious-and-insured/Fast_Furious_Insured/train.csv')
df.head()

## Loading testing data

In [None]:
df_test = pd.read_csv('../input/fast-furious-and-insured/Fast_Furious_Insured/test.csv')
df_test

## Pre-processing

In [None]:
# Get the number of missing data points per column
missing_values_count_train = df.isnull().sum()
print(missing_values_count_train)

In [None]:
# Get the number of missing data points per column
missing_values_count_test = df_test.isnull().sum()
print(missing_values_count_test)

In [None]:
# Filling missing values
df = df.fillna(method='bfill', axis=0).fillna(0)

In [None]:
# Checking different values in Insurance company in the training set
df['Insurance_company'].value_counts()

In [None]:
# Checking different values in Insurance company in the testing set
df_test['Insurance_company'].value_counts()

# Label encoding and scaling

In [None]:
features_num = ['Cost_of_vehicle', 'Min_coverage', 'Max_coverage']
features_cat = ['Insurance_company']

le= LabelEncoder()   
df['Insurance_company'] = le.fit_transform(df['Insurance_company'])
df_test['Insurance_company'] = le.transform(df_test['Insurance_company'])

preprocessor = make_column_transformer(
    (StandardScaler(), features_num),
)

y = df['Amount']
train_imputed = df.loc[:,['Cost_of_vehicle', 'Min_coverage', 'Max_coverage', 'Insurance_company']]
X = preprocessor.fit_transform(train_imputed)

test_imputed = df_test.loc[:,['Cost_of_vehicle', 'Min_coverage',  'Max_coverage', 'Insurance_company']]
test_X = preprocessor.transform(test_imputed)

train_imputed.columns

In [None]:
#Train-test split
train_X, val_X, train_y, val_y = train_test_split(X,y,random_state=1,test_size=0.2)

## Train a random forest regressor

In [None]:
rf_model = RandomForestRegressor(random_state=1, n_estimators = 1000, max_depth=3)
# fit your model
rf_model.fit(train_X,train_y)
val_preds = rf_model.predict(val_X)
# Calculate the mean absolute error of your Random Forest model on the validation data
rf_val_mae = mean_absolute_error(val_y,val_preds)

print("Validation MAE for Random Forest Model: {}".format(rf_val_mae))

## Get the predictions for amount

In [None]:
amount_predictions = rf_model.predict(test_X)

## Prepare the images

In [None]:
X = df.loc[:,['Image_path']]
y = df.loc[:,['Condition']]    
X_test = df.loc[:,['Image_path']]
print('train set shape:', X.shape)
print('test set shape:', X_test.shape)

In [None]:
data = []
labels = []
for (index_label, row_series) in df.iterrows():
        img_path = row_series.values[0]
        condition = row_series.values[-2]
        labels.append(int(condition))
        # load the image, pre-process it, and store it in the data list
        originalImage = cv2.imread('/kaggle/input/fast-furious-and-insured/Fast_Furious_Insured/trainImages/' + img_path)
        image = cv2.resize(originalImage, (224, 224))
        image = img_to_array(image)
        data.append(image)

## Transfer learning with MobileNet

In [None]:
base_model=MobileNet(weights='imagenet',include_top=False) #imports the mobilenet model and discards the last 1000 neuron layer.
x=base_model.output
x=GlobalAveragePooling2D()(x)
x=Dense(256,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better results.
preds=Dense(2,activation='softmax')(x) #final layer with softmax activation
model=Model(inputs=base_model.input,outputs=preds)
   # we want to set the first 20 layers of the network to be non-trainable
for layer in model.layers[:80]:
    layer.trainable=False
for layer in model.layers[80:]:
    layer.trainable=True

In [None]:
from tensorflow.keras import optimizers

print(len(data),len(labels))
data = np.array(data, dtype="float")
labels = np.array(labels)
    
# partition the data into training and testing splits using 80% of
# the data for training and the remaining 20% for testing
(train_images, test_images, train_labels, test_labels) = train_test_split(data,labels, test_size=0.2, random_state=42)

#(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
# Normalize pixel values to be between 0 and 1
train_images, test_images = (train_images / 255.0)-0.5, (test_images / 255.0) -0.5

train_labels = to_categorical(train_labels, 2)
test_labels = to_categorical(test_labels, 2)

#compile and train the model
adam=optimizers.Adam(
                lr=0.002,
                beta_1=0.9,
                beta_2=0.999,
                epsilon=None,
                decay=0.0001,
                amsgrad=False
                )

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

#callback = callbacks.LearningRateScheduler(scheduler)
history = model.fit(train_images, train_labels, batch_size=32,epochs=5,shuffle=True, validation_data=(test_images, test_labels))

## Getting the test prediction

In [None]:
condition_predictions = []
for (index_label, row_series) in df_test.iterrows():
        img_path = row_series.values[0]
        # load the image, pre-process it, and store it in the data list
        originalImage = cv2.imread('/kaggle/input/fast-furious-and-insured/Fast_Furious_Insured/testImages/' + img_path)
        image = cv2.resize(originalImage, (224, 224))
        image = img_to_array(image)
        image = image.reshape((1,224, 224, 3))
        image = np.array(image, dtype="float") / 255.0 - 0.5
        prediction = model.predict(image)
        prediction = prediction[0]
        condition_predictions.append(np.argmax(prediction))
       

## Preparing the submission

In [None]:
submission = pd.DataFrame({'Image_path': df_test.Image_path, 'Condition': condition_predictions, 
                          'Amount': amount_predictions})
submission.to_csv('submission.csv', index=False)
from IPython.display import FileLink
FileLink('submission.csv')
