## Importing Library

In [None]:
import numpy as np
import pandas as pd

## Importing Dataset

In [None]:
df = pd.read_csv('../input/used-car-dataset-ford-and-mercedes/audi.csv')
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()

## Split into Features and Target Variable

In [None]:
X = df.loc[:, ('model', 'year', 'transmission', 'mileage', 'fuelType', 'tax', 'mpg', 'engineSize')].values
X

In [None]:
y = df.loc[:, 'price'].values
y

## Encoding Categorical Data

In [None]:
df.loc[:, ('model', 'transmission', 'fuelType')].head()

In [None]:
df['model'].unique()

In [None]:
df['transmission'].unique()

In [None]:
df['fuelType'].unique()

### Encode Column model

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 0] = le.fit_transform(X[:, 0])
X

### Encode Column transmission

In [None]:
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])
X

### Encode Column fuelType

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [4])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
X

In [None]:
# df_X = pd.DataFrame(X)
# df_X.head()

## Split into Training Set and Test Set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

## Training Model

In [None]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=11, random_state=0)
regressor.fit(X_train, y_train)

## Predict

In [None]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

## Evaluate Model

In [None]:
from sklearn.metrics import r2_score, mean_squared_error
print(np.sqrt(mean_squared_error(y_test, y_pred)))
print(r2_score(y_test, y_pred))