# Importing the Library

In [57]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Importing the Dataset

In [58]:
train = pd.read_csv('Dataset/train.csv')
test = pd.read_csv('Dataset/test.csv')

x_train = train.drop(columns=['SalePrice'])
y_train = train['SalePrice']

# Cleaning the Dataset

In [59]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
def clean_data(df):
    df.drop(columns=['Id'], inplace=True)
    cols_to_drop = ['Alley','Neighborhood', 'PoolQC', 'Fence', 'MiscFeature']
    df.drop(columns=cols_to_drop, inplace=True, errors='ignore')

    for col in df.columns:
        if df[col].dtype == 'object':
            df[col] = encoder.fit_transform(df[col])
    return df

In [60]:
x_train = clean_data(x_train)
x_test = clean_data(test)

# Viewing the Cleaned Data

In [61]:
x_train

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,...,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,60,3,65.0,8450,1,3,3,0,4,0,...,61,0,0,0,0,0,2,2008,8,4
1,20,3,80.0,9600,1,3,3,0,2,0,...,0,0,0,0,0,0,5,2007,8,4
2,60,3,68.0,11250,1,0,3,0,4,0,...,42,0,0,0,0,0,9,2008,8,4
3,70,3,60.0,9550,1,0,3,0,0,0,...,35,272,0,0,0,0,2,2006,8,0
4,60,3,84.0,14260,1,0,3,0,2,0,...,84,0,0,0,0,0,12,2008,8,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,60,3,62.0,7917,1,3,3,0,4,0,...,40,0,0,0,0,0,8,2007,8,4
1456,20,3,85.0,13175,1,3,3,0,4,0,...,0,0,0,0,0,0,2,2010,8,4
1457,70,3,66.0,9042,1,3,3,0,4,0,...,60,0,0,0,0,2500,5,2010,8,4
1458,20,3,68.0,9717,1,3,3,0,4,0,...,0,112,0,0,0,0,4,2010,8,4


# Standard Scaling

In [62]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(test)

# Training the Model with Random Forest Regression

In [63]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=200, random_state=0)
regressor.fit(x_train, y_train)

# Testing the Accuracy

In [64]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(x_train, y_train, test_size = 0.2, random_state = 0)
Y_pred = regressor.predict(X_test)
results = np.column_stack((Y_pred, Y_test.values))
results = pd.DataFrame(results, columns=['Predicted', 'Actual'])
print(results.head())
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)
print("Mean Squared Error:", mse)
print("R2 Score:", r2)

    Predicted    Actual
0  204757.945  200624.0
1  140704.500  133000.0
2  110349.600  110000.0
3  206159.495  192000.0
4   89886.355   88000.0
Mean Squared Error: 173275141.85023144
R2 Score: 0.97490893333139


# Predicting The Results and Creating Submission File

In [68]:
y_pred = regressor.predict(x_test)
test = pd.read_csv('Dataset/test.csv')
submission = pd.DataFrame({'Id': test['Id'], 'SalePrice': y_pred})
submission.to_csv('submission.csv', index=False)