In [None]:


import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns 



import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



In [None]:
df = pd.read_csv('/kaggle/input/housing-prices-dataset/Housing.csv')
df.head()

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
label_encoder = LabelEncoder()
ordinal_encoder = OrdinalEncoder(categories=[['unfurnished','semi-furnished','furnished']])
df['mainroad'] = label_encoder.fit_transform(df['mainroad'])
df['guestroom'] = label_encoder.fit_transform(df['guestroom'])
df['hotwaterheating'] = label_encoder.fit_transform(df['hotwaterheating'])
df['airconditioning'] = label_encoder.fit_transform(df['airconditioning'])
df['prefarea'] = label_encoder.fit_transform(df['prefarea'])
df['furnishingstatus'] = ordinal_encoder.fit_transform(df[['furnishingstatus']])
df['basement'] = label_encoder.fit_transform(df['basement'])
df.head()

In [None]:
df.describe()

In [None]:
df.loc[df.duplicated()]

In [None]:
df.isna().sum()

In [None]:
import warnings
warnings.filterwarnings("ignore")

plt.figure(figsize=(20,8))
plt.subplot(1,2,1)
plt.title('House Price Distribution Plot')
sns.distplot(df.price)

plt.subplot(1,2,2)
sns.boxplot(df.price)
plt.title('House Pricing Spread')

plt.show()

In [None]:
cor_matrix = df[['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking']].corr()
plt.figure(figsize=(12, 8))
sns.heatmap(cor_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.show()

In [None]:
X = df[['area', 'bedrooms', 'bathrooms', 'stories','mainroad', 'basement', 'hotwaterheating','airconditioning','parking','furnishingstatus']]
y = df['price']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(X,y, test_size=0.2)

In [None]:
X_train.shape

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
scalar = StandardScaler()
scalar.fit(X_train)
X_train = scalar.transform(X_train)
X_test = scalar.transform(X_test)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor

lr = LinearRegression()
gb = GradientBoostingRegressor()
rf = RandomForestRegressor()
xgb = XGBRegressor()


In [None]:
estimators = [('lr',lr),('gb',gb),('rf',rf),('xgb',xgb)]

In [None]:
for estimator in estimators:
  scores = cross_val_score(estimator[1],X_train,y_train,scoring='r2',cv=10)
  print(estimator[0],np.round(np.mean(scores),2))

In [None]:
from sklearn.metrics import r2_score 
from sklearn.ensemble import VotingRegressor
vr = VotingRegressor(estimators)
vr.fit(X_train,y_train)
y_pred = vr.predict(X_test)
r2_score(y_test, y_pred)

In [None]:
fig = plt.figure()
plt.scatter(y_test,y_pred)
fig.suptitle('y_test vs y_pred', fontsize=20)              
plt.xlabel('y_test', fontsize=18)                          
plt.ylabel('y_pred', fontsize=16)

In [None]:
scores = cross_val_score(vr,X_train,y_train,scoring='r2',cv=15)
print("Voting Regressor",np.round(np.mean(scores),2))

In [None]:
for i in range(1,4):
    for j in range(1,4):
        for k in range(1,4):
            for l in range(1,4):
                vr = VotingRegressor(estimators,weights=[i,j,k,l])
                scores = cross_val_score(vr,X_train,y_train,scoring='r2',cv=10)
                print("For i={},j={},k={},l={}".format(i,j,k,l),np.round(np.mean(scores),2))          

In [None]:
#dt1 = DecisionTreeRegressor(max_depth=1)
#dt2 = DecisionTreeRegressor(max_depth=3)
#dt3 = DecisionTreeRegressor(max_depth=5)
#dt4 = DecisionTreeRegressor(max_depth=7)
#dt5 = DecisionTreeRegressor(max_depth=6)

In [None]:
#estimators = [('dt1',dt1),('dt2',dt2),('dt3',dt3),('dt4',dt4),('dt5',dt5)]

In [None]:
#for estimator in estimators:
  #scores = cross_val_score(estimator[1],X_train,y_train,scoring='r2',cv=10)
  #print(estimator[0],np.round(np.mean(scores),2))

In [None]:
#vr = VotingRegressor(estimators)
#scores = cross_val_score(vr,X_train,y_train,scoring='r2',cv=10)
#print("Voting Regressor",np.round(np.mean(scores),2))
     