## Importing Libraries

In [None]:
import numpy as np
import pandas as pd

## Importing Dataset

In [None]:
df = pd.read_csv('../input/house-price-prediction-challenge/train.csv')
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()

## Add Column CITY

In [None]:
df['ADDRESS'].str.split(',').str[-1]

In [None]:
df['CITY'] = df['ADDRESS'].str.split(',').str[-1]
df.head()

## Split into Features and Target Variable

In [None]:
X = df.loc[:, ('POSTED_BY', 'UNDER_CONSTRUCTION', 'RERA', 'BHK_NO.', 'BHK_OR_RK', 'SQUARE_FT', 'READY_TO_MOVE', 'RESALE', 'LONGITUDE', 'LATITUDE', 'CITY')].values
X

In [None]:
y = df.iloc[:, -2].values
y

## Encoding Column Categorical Data

In [None]:
df.loc[:, ('POSTED_BY', 'BHK_OR_RK', 'CITY')].head()

In [None]:
df['POSTED_BY'].unique()

In [None]:
df['BHK_OR_RK'].unique()

In [None]:
df['CITY'].unique()

### Encode Column CITY

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 10] = le.fit_transform(X[:, 10])
X

### Encode Column BHK_OR_RK

In [None]:
le = LabelEncoder()
X[:, 4] = le.fit_transform(X[:, 4])
X

### Encode Column POSTED_BY

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encode', OneHotEncoder(), [0])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
X

## Split into Training Set and Test Set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

## Training Model

In [None]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=11, random_state=0)
regressor.fit(X_train, y_train)

## Predict

In [None]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

## Evaluate Model

In [None]:
from sklearn.metrics import r2_score, mean_squared_error, mean_squared_log_error
print(np.sqrt(mean_squared_log_error(y_test, y_pred)))
print(np.sqrt(mean_squared_error(y_test, y_pred)))
print(r2_score(y_test, y_pred))

## Predicting test.csv

### Importing test.csv

In [None]:
df_test = pd.read_csv('../input/house-price-prediction-challenge/test.csv')
df_test.head()

In [None]:
df_test.info()

In [None]:
df_test.isnull().sum()

### Add Column CITY

In [None]:
df_test['CITY'] = df_test['ADDRESS'].str.split(',').str[-1]
df_test.head()

### Make Features Variable 

In [None]:
test = df_test.loc[:, ('POSTED_BY', 'UNDER_CONSTRUCTION', 'RERA', 'BHK_NO.', 'BHK_OR_RK', 'SQUARE_FT', 'READY_TO_MOVE', 'RESALE', 'LONGITUDE', 'LATITUDE', 'CITY')].values
test

### Encoding Column Categorical Data

#### Encode Column CITY

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
test[:, 10] = le.fit_transform(test[:, 10])
test

#### Encode Column BHK_OR_RK

In [None]:
le = LabelEncoder()
test[:, 4] = le.fit_transform(test[:, 4])
test

#### Encode Column POSTED_BY

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encode', OneHotEncoder(), [0])], remainder='passthrough')
test = np.array(ct.fit_transform(test))
test

### Predict

In [None]:
y_result = regressor.predict(test)
y_result

### Output Predictions File

In [None]:
pd.DataFrame({'TARGET(PRICE_IN_LACS)': y_result}).to_csv('submission.csv', index=False)