In [None]:
from fastai.vision.all import *
import torchvision.models as models
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor

In [None]:
train_df = pd.read_csv("../input/fast-furious-and-insured/Fast_Furious_Insured/train.csv")

In [None]:
train_df.head()

In [None]:
train_df.Condition.value_counts()

In [None]:
dls = ImageDataLoaders.from_df(train_df,path="../input/fast-furious-and-insured/Fast_Furious_Insured",
                               folder="trainImages",
                               label_col="Condition",valid_pct=0.2,
                               item_tfms= Resize(224),
                               batch_tfms=aug_transforms(),
                              bs=32)

In [None]:
dls.vocab

In [None]:
dls.show_batch()

In [None]:
learn = cnn_learner(dls,models.densenet121,metrics=accuracy,pretrained=True)

In [None]:
learn.fine_tune(5)

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_top_losses(5, nrows=1, figsize = (25,10))

In [None]:
test_path = "../input/fast-furious-and-insured/Fast_Furious_Insured/testImages/"
test_image_ids = os.listdir(test_path)
test_images = [test_path+f"{img}" for img in test_image_ids]
preds = learn.get_preds(dl=dls.test_dl(test_images, shuffle=False, drop_last=False))

In [None]:
preds = preds[0].cpu().numpy()

In [None]:
predictions = np.argmax(preds,axis=1)

In [None]:
predictions

In [None]:
train_df = pd.read_csv("../input/fast-furious-and-insured/Fast_Furious_Insured/train.csv")
test_df = pd.read_csv("../input/fast-furious-and-insured/Fast_Furious_Insured/test.csv")

In [None]:
test_df.head()

In [None]:
test_df['Condition'] = pd.DataFrame(predictions)

In [None]:
test_df.head()

In [None]:
test_df.Condition.value_counts()

In [None]:
#train_data
train_df.Expiry_date = train_df.Expiry_date.apply(pd.to_datetime)
train_df['month'] = train_df.Expiry_date.apply(lambda x: x.month)
train_df['day'] = train_df.Expiry_date.apply(lambda x: x.day)
train_df['year'] = train_df.Expiry_date.apply(lambda x: x.year)
train_df.drop(['Expiry_date'], 1, inplace = True)

#test_data
test_df.Expiry_date = test_df.Expiry_date.apply(pd.to_datetime)
test_df['month'] = test_df.Expiry_date.apply(lambda x: x.month)
test_df['day'] = test_df.Expiry_date.apply(lambda x: x.day)
test_df['year'] = test_df.Expiry_date.apply(lambda x: x.year)
test_df.drop(['Expiry_date'], 1, inplace = True)

In [None]:
from sklearn.preprocessing import LabelEncoder
le= LabelEncoder()   

#train
train_df['Insurance_company'] = le.fit_transform(train_df['Insurance_company'])
#test
test_df['Insurance_company'] = le.fit_transform(test_df['Insurance_company'])

In [None]:
train_df = train_df.drop(['Image_path'], axis = 1)
test_df= test_df.drop(['Image_path'], axis = 1)

In [None]:
train_df = train_df.astype(float)
test_df = test_df.astype(float)

In [None]:
test_df.isnull().sum()

In [None]:
train_df.isnull().sum()

In [None]:
train_df.dropna(inplace=True)

In [None]:
train_df.isnull().sum()

In [None]:
import seaborn as sns
corr_matrix = train_df.corr()
sns.heatmap(corr_matrix,annot=True)

In [None]:
def correlation(dataset, threshold):
    col_corr = set()  # Set of all the names of correlated columns
    corr_matrix = dataset.corr()
    for i in range(len(corr_matrix.columns)):
        for j in range(i):
            if abs(corr_matrix.iloc[i, j]) > threshold: # we are interested in absolute coeff value
                colname = corr_matrix.columns[i]  # getting the name of column
                col_corr.add(colname)
    return col_corr
correlation(train_df,0.8)

In [None]:
import seaborn as sns
sns.distplot(train_df.Amount)

In [None]:
train_df.Amount = np.log1p(train_df.Amount)

In [None]:
sns.distplot(train_df.Amount)

In [None]:
X = train_df.drop(['Amount'], axis = 1)
y =train_df['Amount']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [None]:
model = RandomForestRegressor()
model.fit(X_train, y_train)
pred_new = model.predict(X_test)

In [None]:
test_pred = np.expm1(model.predict(test_df))


In [None]:
testdf = pd.read_csv("../input/fast-furious-and-insured/Fast_Furious_Insured/test.csv")
submission = pd.DataFrame({'Image_path': testdf.Image_path, 'Condition': test_df.Condition, 
                          'Amount': test_pred})
# you could use any filename. We choose submission here
submission.to_csv('submission_xg.csv', index=False)