## Importing the libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go

## Checking the data

In [None]:
df = pd.read_csv('../input/housesalesprediction/kc_house_data.csv')

In [None]:
df.head()

In [None]:
df.isnull().sum()

In [None]:
df.describe().transpose()

## Exploratory Data Analysis

In [None]:
plt.figure(figsize = (12,8))
sns.distplot(df['price'])

In [None]:
plt.figure(figsize = (10,6))
sns.countplot(df['bedrooms'])

In [None]:
hist2 = [go.Histogram(x=df.yr_built,xbins=dict(start=np.min(df.yr_built),size=1,end=np.max(df.yr_built)),marker=dict(color='rgb(0,102,0)'))]

histlayout2 = go.Layout(title="Built Year Counts",xaxis=dict(title="Years"),yaxis=dict(title="Built Counts"))

histfig2 = go.Figure(data=hist2,layout=histlayout2)

iplot(histfig2)

In [None]:
sns.countplot(x='floors',data=df, palette='Set2')

In [None]:
plt.figure(figsize = (12,8))
sns.scatterplot(x='price',y='sqft_living',data=df)

In [None]:
plt.figure(figsize = (12,8))
sns.boxplot(x='bedrooms',y='price',data=df)

In [None]:
sns.boxplot(x='waterfront',y='price',data=df)

## Geographical Plotting

In [None]:
plt.figure(figsize = (12,8))
sns.scatterplot(x='price',y='long',data=df)

In [None]:
plt.figure(figsize = (12,8))
sns.scatterplot(x='price',y='lat',data=df)

In [None]:
plt.figure(figsize = (12,8))
sns.scatterplot(x='long',y='lat',data=df,hue='price')

In [None]:
df.sort_values('price',ascending=False).head(20)

In [None]:
len(df)*0.01

In [None]:
non_top_1_perc = df.sort_values('price',ascending=False).iloc[216:]

In [None]:
plt.figure(figsize = (12,8))
sns.scatterplot(x='long',y='lat',data=non_top_1_perc,hue='price',palette='RdYlGn',edgecolor=None,alpha=0.2)

## Working with feature data

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df = df.drop('id',axis=1)

In [None]:
df.head()

## Feature engineering from date

In [None]:
df['date'] = pd.to_datetime(df['date'])

In [None]:
df['month'] = df['date'].apply(lambda date:date.month)

In [None]:
df['year'] = df['date'].apply(lambda date:date.year)

In [None]:
df.head(2)

In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(x='year',y='price',data=df)

In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(x='month',y='price',data=df)

In [None]:
df.groupby('month').mean()['price'].plot()

In [None]:
df = df.drop('date',axis=1)

In [None]:
df = df.drop('zipcode',axis=1)

In [None]:
df.head()

In [None]:
X = df.drop('price',axis=1)
y = df['price']

## Train-test Split 

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

## Scaling the data

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()

In [None]:
X_train = scaler.fit_transform(X_train)

In [None]:
X_test = scaler.transform(X_test)

In [None]:
X_train.shape

## Creating a model

In [None]:
from tensorflow.keras.models import Sequential

In [None]:
from tensorflow.keras.layers import Dense

In [None]:
model = Sequential()

model.add(Dense(19,activation='relu'))
model.add(Dense(19,activation='relu'))
model.add(Dense(19,activation='relu'))
model.add(Dense(19,activation='relu'))

model.add(Dense(1))

model.compile(optimizer='adam',loss='mse')

## Training the  model

In [None]:
model.fit(x=X_train,y=y_train.values,
          validation_data=(X_test,y_test.values),
          batch_size=128,epochs=400)

In [None]:
losses = pd.DataFrame(model.history.history)

In [None]:
losses.plot()

## Evaluation on test data 

In [None]:
from sklearn.metrics import mean_squared_error,mean_absolute_error,explained_variance_score

In [None]:
predictions = model.predict(X_test)

In [None]:
mean_absolute_error(y_test,predictions)

In [None]:
mean_squared_error(y_test,predictions)

In [None]:
mean_squared_error(y_test,predictions)**0.5

In [None]:
explained_variance_score(y_test,predictions)

In [None]:
# Our predictions
plt.figure(figsize=(10,6))
plt.scatter(y_test,predictions)

# Perfect predictions
plt.plot(y_test,y_test,'r')

## Comparing with linear regression

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
lr = LinearRegression()

In [None]:
lr.fit(X_train,y_train)

In [None]:
predictionslr = lr.predict(X_test)

In [None]:
mean_absolute_error(y_test,predictionslr)

In [None]:
mean_squared_error(y_test,predictionslr)

In [None]:
mean_squared_error(y_test,predictionslr)**0.5

In [None]:
explained_variance_score(y_test,predictionslr)