## Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import *
from xgboost import XGBRegressor

## Loading Data

In [None]:
date_converter = {'Prod. year':pd.to_datetime}

train=pd.read_csv("../input/mathcohack/train.csv",converters=date_converter)
test=pd.read_csv("../input/mathcohack/test.csv",converters=date_converter)

### Viewing the first 5 rows of train

In [None]:
train.head()

## Preprocessing

In [None]:
X=train.copy()
X['Current Year'] = pd.to_datetime('2021')
X['Age'] = X['Current Year'].dt.year-X['Prod. year'].dt.year
X.drop(["Price","Levy","ID","Current Year","Prod. year"],axis=1,inplace=True)

In [None]:
X.replace("სხვა","Other",inplace=True)

In [None]:
X[["Engine volume","Turbo"]]=X["Engine volume"].str.split(expand=True)

In [None]:
X["Leather interior"]=X["Leather interior"].map({"Yes":1,"No": 0})

In [None]:
X["Turbo"]=X["Turbo"].map({None:0,"Turbo":1})

In [None]:
#X["Leather interior"]=X["Leather interior"].map({"Yes":1,"No": 0})
#X["Turbo"]=X["Turbo"].map({"None":0,"Turbo":1})
X["Wheel"]=X["Wheel"].map({"Left wheel":0,"Right-hand drive":1})
X["Doors"]=X["Doors"].map({"04-May":0,"02-Mar":1,">5":2})
X["Drive wheels"]=X["Drive wheels"].map({"4x4":0,"Front":1,"Front":2})
X["Gear box type"]=X["Gear box type"].map({"Automatic":0,"Tiptronic":1,"Variator":2,"Manual":3})
X["Fuel type"]=X["Fuel type"].map({"Hybrid":0,"Petrol":1,"Diesel":2,"CNG":3,"Plug-in Hybrid":4,"LPG":5,"Hydrogen":6})
X["Category"]=X["Category"].map({"Jeep":0,"Hatchback":1,"Sedan":2,"Microbus":3,"Goods wagon":4,"Universal":5,"Coupe":6,"Minivan":7,"Cabriolet":8,"Limousine":9,"Pickup":10})


In [None]:
X["Drive wheels"].fillna(1, inplace=True)

In [None]:
X["Mileage"]=X["Mileage"].str.split(expand=True)[0].astype("int")

In [None]:
X[["Drive wheels","Cylinders"]]=X[["Drive wheels","Cylinders"]].astype("int")

In [None]:
X["Engine volume"]=X["Engine volume"].astype("float")

In [None]:
h=X.copy()
h.drop(["Manufacturer","Model","Color"], axis=1, inplace=True)

In [None]:
y=train.Price

### Spliting train data


In [None]:
x_train,x_test,y_train,y_test=train_test_split(h,y,train_size=0.8)

### Training XGB Model

In [None]:
xgb=XGBRegressor(max_depth=100,
    learning_rate=0.1,
    n_estimators=1000,
    verbosity=0,
    silent=None,
    objective='reg:linear',
    booster='gbtree',
    n_jobs=-1,
    nthread=None,
    gamma=0,
    min_child_weight=1,
    max_delta_step=0,
    subsample=0.8,
    colsample_bytree=1,
    colsample_bylevel=1,
    colsample_bynode=1,
    reg_alpha=0,
    reg_lambda=1,
    scale_pos_weight=1,
    base_score=0.5,
    random_state=0,
    seed=None,)
xgb.fit(x_train,y_train)

### Predicting

In [None]:
p=xgb.predict(x_test)

### Evalution using RMSLE metric

In [None]:
np.sqrt(sklearn.metrics.mean_squared_log_error(y_test, abs(p)))

## Test


### Preprocession the test data

In [None]:
R=test.copy()
R['Current Year'] = pd.to_datetime('2021')
R['Age'] = R['Current Year'].dt.year-R['Prod. year'].dt.year
R.drop(["Price","Levy","ID","Current Year","Prod. year"],axis=1,inplace=True)

In [None]:
R.replace("სხვა","Other",inplace=True)

In [None]:
R[["Engine volume","Turbo"]]=R["Engine volume"].str.split(expand=True)
R["Leather interior"]=R["Leather interior"].map({"Yes":1,"No": 0})
R["Turbo"]=R["Turbo"].map({None:0,"Turbo":1})

In [None]:
R["Engine volume"]=R["Engine volume"].astype("float")

In [None]:
R["Category"]=R["Category"].map({"Jeep":0,"Hatchback":1,"Sedan":2,"Microbus":3,"Goods wagon":4,"Universal":5,"Coupe":6,"Minivan":7,"Cabriolet":8,"Limousine":9,"Pickup":10})

In [None]:
R["Fuel type"]=R["Fuel type"].map({"Hybrid":0,"Petrol":1,"Diesel":2,"CNG":3,"Plug-in Hybrid":4,"LPG":5,"Hydrogen":6})


In [None]:
R["Gear box type"]=R["Gear box type"].map({"Automatic":0,"Tiptronic":1,"Variator":2,"Manual":3})


In [None]:
R["Drive wheels"]=R["Drive wheels"].map({"4x4":0,"Front":1,"Front":2})


In [None]:
R["Doors"]=test["Doors"].map({"04-May":0,"02-Mar":1,">5":2})


In [None]:
R["Wheel"]=R["Wheel"].map({"Left wheel":0,"Right-hand drive":1})


In [None]:
R["Mileage"]=R["Mileage"].str.split(expand=True)[0].astype("int")

In [None]:
R.head()

In [None]:
R["Drive wheels"].fillna(1, inplace=True)

In [None]:
R["Engine volume"]=R["Engine volume"].astype("float")

In [None]:
R.drop(["Manufacturer","Model","Color"], axis=1, inplace=True)

### Predicting on test data using using the xgb model

In [None]:
l=xgb.predict(R)

In [None]:
prediction=pd.DataFrame(abs(l),columns=['Price'])

In [None]:
prediction.to_csv("solh.csv")