In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
np.set_printoptions(precision=4)
from sklearn.inspection import permutation_importance
from sklearn.model_selection import GridSearchCV
from IPython.display import display,Markdown,HTML
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('/kaggle/input/real-estate-dataset/data.csv')
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.duplicated().sum()

In [None]:
df.isna().sum()

In [None]:
df = df.dropna()

In [None]:
target_column = "MEDV"

df_all = df.copy()

In [None]:
def plot_pie(column, title="All"):
    fig,axs = plt.subplots(1,1)
    data = df_all[column].value_counts()
    plt.pie(data,autopct='%1.2f%%',labels=data.index)
    plt.title(title)
    plt.show()
    
def plot_hist(column, title="All"):
    plt.hist(df_all[column],density=True)
    plt.title(title)
    plt.show()

def plot_bar(column, sort=False, title="all"):
    if sort:
        data_all = df_all[column].value_counts().sort_index()
    else:
        data_all = df_all[column].value_counts()
    plt.bar(data_all.index.astype(str),data_all)
    plt.title(title)
    plt.show()

def plot_boxplot(column, title=""):
    ax = sns.boxplot(y=column,data=df)
    plt.show()

In [None]:
def eda(df):
    display(HTML('<h1>Exploratory Data Analysis<h1>'))
    
    for column in df.columns:
        display(HTML('<h2>{}<h2>'.format(column)))
        if df[column].dtype == 'int64' or df[column].dtype == 'float64':
            if df[column].nunique()>10 :
                df[column].describe()
                plot_hist(column)
                plot_boxplot(column)
            else:
                plot_bar(column)
                plot_pie(column)
        elif df[column].dtype == 'object':
            if df[column].nunique()>10 :
                df[column].value_counts().head(5)
            else:
                plot_bar(column)
                plot_pie(column)
        else:
            None

In [None]:
eda(df)

In [None]:
data = df.corr()
sns.heatmap(data)

In [None]:
data = data.sort_values(by='MEDV', ascending=False)
data['MEDV']

In [None]:
data[(data['MEDV']> -0.4) & (data['MEDV']< 0.4)]['MEDV'].index

In [None]:
X = df.copy()

y = X[target_column]

X = X.drop([target_column,'ZN', 'B', 'DIS', 'CHAS', 'AGE', 'CRIM', 'RAD'], axis=1)

In [None]:
X.info()

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1234)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
# Import ML Libraries
from sklearn.linear_model import LinearRegression, SGDRegressor, ElasticNet, BayesianRidge
from lightgbm import LGBMRegressor
from xgboost.sklearn import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

regressors = [[LinearRegression(),'LinearRegression'],[SGDRegressor(),'SGDRegressor'], [ElasticNet(),'ElasticNet'], 
    [BayesianRidge(), 'BayesianRidge'], [LGBMRegressor(),'LGBMRegressor'], [XGBRegressor(),'XGBRegressor'],[CatBoostRegressor(verbose=0),'CatBoostRegressor'],
              [KernelRidge(),'KernelRidge'],[GradientBoostingRegressor(),'GradientBoostingRegressor'],[SVR(),'SVR'],[AdaBoostRegressor(),"AdaBoostRegressor"],[DecisionTreeRegressor(),"DecisionTreeRegressor"]]

In [None]:
from sklearn import metrics


for rgs in regressors:
    model = rgs[0]
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    print(rgs[1])
    print("Mean Absolute Error = ", metrics.mean_absolute_error(y_test,y_pred))
    print("Mean Squared Error = ", metrics.mean_squared_error(y_test,y_pred))
    print("Root Mean Squared Error = ", np.sqrt(metrics.mean_squared_error(y_test,y_pred)))
    print("R2 score = ", metrics.r2_score(y_test, y_pred))
    print("\n\n")

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import Dropout
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.losses import BinaryCrossentropy
from numpy.random import seed

seed(1234)
tf.random.set_seed(1234)

In [None]:
# Initialising the ANN
model = Sequential()

# Adding the input layer and the first hidden layer
model.add(Dense(32, activation = 'relu', input_shape=(X_train.shape[1],)))



# Adding the second hidden layer
model.add(Dense(units = 32, activation = 'relu'))


# Adding the third hidden layer
model.add(Dense(units = 32, activation = 'relu'))



# Adding the output layer
model.add(Dense(units = 1))

In [None]:
opt = Adam(learning_rate=0.001)
earlystopper = tf.keras.callbacks.EarlyStopping(monitor='loss',mode='min',patience=15, verbose=1,restore_best_weights=True)
model.compile(optimizer = opt, loss = 'mean_squared_error')
model.fit(X_train, y_train, batch_size = 10, epochs = 100, callbacks = [earlystopper])

In [None]:
y_pred = model.predict(X_test)

In [None]:
print("Neural Network")
print("Mean Absolute Error = ", metrics.mean_absolute_error(y_test,y_pred))
print("Mean Squared Error = ", metrics.mean_squared_error(y_test,y_pred))
print("Root Mean Squared Error = ", np.sqrt(metrics.mean_squared_error(y_test,y_pred)))
print("R2 score = ", metrics.r2_score(y_test, y_pred))

Best Algorithm so far
* XGBRegressor
* Mean Absolute Error =  2.1804080626543834
* Mean Squared Error =  8.616639899537468
* Root Mean Squared Error =  2.9354113680262035
* R2 score =  0.9049522583519426

Youtube Video :  https://www.youtube.com/watch?v=VrC5pSfAKeQ