In [82]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor

In [83]:
data = pd.read_csv('Housing.csv')

In [84]:
label_encoder = LabelEncoder()
for column in ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']:
   data[column] = label_encoder.fit_transform(data[column])

In [85]:
X = data.drop('price', axis=1)
y = data['price']


In [86]:
X.head()

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,7420,4,2,3,1,0,0,0,1,2,1,0
1,8960,4,4,4,1,0,0,0,1,3,0,0
2,9960,3,2,2,1,0,1,0,0,2,1,1
3,7500,4,2,2,1,0,1,0,1,3,1,0
4,7420,4,1,2,1,1,1,0,1,2,0,0


In [87]:
y

0      13300000
1      12250000
2      12250000
3      12215000
4      11410000
         ...   
540     1820000
541     1767150
542     1750000
543     1750000
544     1750000
Name: price, Length: 545, dtype: int64

In [88]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [89]:
X_test

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
316,5900,4,2,2,0,0,1,0,0,1,0,2
77,6500,3,2,3,1,0,0,0,1,0,1,0
360,4040,2,1,1,1,0,0,0,0,0,0,1
90,5000,3,1,2,1,0,0,0,1,0,0,1
493,3960,3,1,1,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
15,6000,4,1,2,1,0,1,0,0,2,0,1
357,6930,4,1,2,0,0,0,0,0,1,0,0
39,6000,4,2,4,1,0,0,0,1,1,0,1
54,6000,3,2,2,1,1,0,0,1,1,0,1


In [90]:
rf_regressor = RandomForestRegressor(random_state=42)
rf_regressor.fit(X_train, y_train)


In [91]:
y_pred = rf_regressor.predict(X_test)
print(y_pred)  

[ 5211325.          7425040.          3762158.75        4517170.
  3733800.          3488380.          5106360.          5199390.
  2561930.          2878067.5        10000159.4         3358267.5
  2983913.33333333  3509240.          3641225.          3849860.
  3332756.          5001850.          3632790.          4080825.
  5340930.          5909540.          3077981.66666667  3637410.
  4760140.          6568870.          3129350.          4727030.
  6484030.          3283490.          5738040.          3160640.
  6910260.          4331040.          2971220.          6170780.
  4828810.          3743915.          3058545.          4345670.
  4126080.          2801225.          7582190.          4224850.
  4022060.          4398065.          7188399.4         4080160.
  3052490.          3074610.          8041388.6         2547020.
  4080090.          4508350.          3284726.66666667  2858226.
  6929615.          3007270.          5093130.          3123761.66666667
  4034660.      

In [92]:
single_row_prediction = rf_regressor.predict([X_test.iloc[0]])

print("Single row prediction:", single_row_prediction)



Single row prediction: [5211325.]




In [93]:

new_record = [[6500,3,2,3,1,0,0,0,1,0,1,0]]  

new_record_prediction = rf_regressor.predict(new_record)
print("New record prediction:", new_record_prediction)


New record prediction: [7425040.]




In [94]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error


r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"R-squared: {r2}")
print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")


R-squared: 0.6115321143409216
Mean Absolute Error: 1025289.6821100918
Mean Squared Error: 1963538216518.6526
