In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [None]:
df = pd.read_csv('/kaggle/input/housesalesprediction/kc_house_data.csv')
df.head()

In [None]:
df.isnull().sum()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
plt.figure(figsize=(12,8))
sns.distplot(df['price'])

In [None]:
df.sort_values('price',ascending=False).head(20)

In [None]:
non_top_1_perc = df.sort_values('price',ascending=False).iloc[216:]

In [None]:
plt.figure(figsize=(12,8))
sns.scatterplot(x='long',y='lat',
                data=non_top_1_perc,hue='price',
                palette='RdYlGn',edgecolor=None,alpha=0.2)

In [None]:
df = df.drop('id',axis=1)

In [None]:
df['date'] = pd.to_datetime(df['date'])
df['month'] = df['date'].apply(lambda date:date.month)
df['year'] = df['date'].apply(lambda date:date.year)

In [None]:
plt.figure(figsize=(12,10))

plt.subplot(2, 2, 1)
sns.boxplot(x='year',y='price',data=df)

plt.subplot(2, 2, 2)
sns.boxplot(x='month',y='price',data=df)

In [None]:
df.head()

In [None]:
df = df.drop('date',axis=1)

In [None]:
df = df.drop('zipcode',axis=1)

In [None]:
X = df.drop('price',axis=1)
y = df['price']

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=101)

scaler = MinMaxScaler()

X_train= scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(X_train.shape)
print(X_test.shape)

In [None]:
from sklearn import metrics

def print_evaluate(true, predicted, train=True):  
    mae = metrics.mean_absolute_error(true, predicted)
    mse = metrics.mean_squared_error(true, predicted)
    rmse = np.sqrt(metrics.mean_squared_error(true, predicted))
    r2_square = metrics.r2_score(true, predicted)
    if train:
        print("========Training Result=======")
        print('MAE: ', mae)
        print('MSE: ', mse)
        print('RMSE: ', rmse)
        print('R2 Square: ', r2_square)
    elif not train:
        print("=========Testing Result=======")
        print('MAE: ', mae)
        print('MSE: ', mse)
        print('RMSE: ', rmse)
        print('R2 Square: ', r2_square)

# ANN

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Activation, Dropout
from tensorflow.keras.optimizers import Adam

In [None]:
model = Sequential()

model.add(Dense(X_train.shape[1], activation = 'relu'))
model.add(Dense(32, activation = 'relu'))

model.add(Dense(64, activation = 'relu'))

model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(1))

model.compile(optimizer = Adam(0.001), loss='mse')

In [None]:
# Training the model

In [None]:
r = model.fit(X_train, y_train.values, validation_data=(X_test, y_test.values), 
             batch_size=120, epochs=500)

In [None]:
plt.figure(figsize=(10, 6))

plt.plot(r.history['loss'], label='loss')
plt.plot(r.history['val_loss'], label='val_loss')
plt.legend()

In [None]:
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

print_evaluate(y_train, y_train_pred, train=True)
print_evaluate(y_test, y_test_pred, train=False)

# Linear Regression

In [None]:
# Comparing with LinearRegression


from sklearn.linear_model import LinearRegression

lr = LinearRegression()

lr.fit(X_train, y_train)

In [None]:
lr.score(X_test, y_test)