### Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
from sklearn.metrics import mean_squared_error,mean_absolute_error,explained_variance_score

## Data

In [None]:
df = pd.read_csv('../../../data/kc_house_data.csv')
display(df.head())
display(df.shape)

# Exploratory Data Analysis

In [None]:
df.isnull().sum().sum()

In [None]:
df.describe().transpose()

In [None]:
sns.displot(df['price']);
plt.title('Price Distribution');

In [None]:
# Number of bedrooms 
sns.countplot(x='bedrooms', data=df);
plt.title('Number of Bedrooms');

In [None]:
sns.scatterplot(x='price',y='sqft_living',data=df);
plt.title('Price vs Square Feet');

In [None]:
sns.boxplot(x='bedrooms',y='price',data=df);
plt.title('Bedrooms vs Price');

In [None]:
sns.scatterplot(x='long',y='lat',data=df,hue='price');
plt.title('Latitude and Longitude vs Price');

In [None]:
df.sort_values('price',ascending=False).head(20).plot(kind='bar',x='id',y='price', colormap='winter_r');
plt.title('Top 20 Most Expensive Houses');

In [None]:
sns.boxplot(x='waterfront',y='price',data=df);
plt.title('Waterfront vs Price');

### Feature Engineering

In [None]:
df = df.drop('id',axis=1)

### Feature Engineering from Date

In [None]:
df['date'] = pd.to_datetime(df['date'])
df['month'] = df['date'].apply(lambda date:date.month)
df['month'] = df['month'].values.astype(np.float64)
df['year'] = df['date'].apply(lambda date:date.year)
df['year'] = df['year'].values.astype(np.float64)
df = df.drop('date',axis=1)

In [None]:
df.dtypes

In [None]:
df.drop('zipcode', axis=1, inplace=True)

In [None]:
sns.boxplot(x='year',y='price',data=df);
plt.title('Year vs Price');

In [None]:
sns.boxplot(x='month',y='price',data=df);
plt.title('Month vs Price');

## Scaling and Train Test Split

In [None]:
X = df.drop('price',axis=1)
y = df['price']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=101)

### Scaling

In [None]:
scaler = MinMaxScaler()
X_train= scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train.shape

In [None]:
X_test.shape

## Creating a Model

In [None]:
tf.config.list_physical_devices('GPU')

In [None]:
model = Sequential()

model.add(Dense(19,activation='relu'))
model.add(Dense(19,activation='relu'))
model.add(Dense(19,activation='relu'))
model.add(Dense(19,activation='relu'))
model.add(Dense(1))

model.compile(optimizer='adam',loss='mse')

## Training the Model

In [None]:
model.fit(x=X_train,y=y_train.values,
          validation_data=(X_test,y_test.values),
          batch_size=128,epochs=400)

In [None]:
losses = pd.DataFrame(model.history.history)

In [None]:
losses.plot()

# Evaluation on Test Data

https://scikit-learn.org/stable/modules/model_evaluation.html#regression-metrics

#### Predicting on Brand New Data

In [None]:
X_test

In [None]:
predictions = model.predict(X_test)

In [None]:
mean_absolute_error(y_test,predictions)

In [None]:
np.sqrt(mean_squared_error(y_test,predictions))

In [None]:
explained_variance_score(y_test,predictions)

In [None]:
df['price'].mean()

In [None]:
df['price'].median()

In [None]:
# Our predictions
plt.scatter(y_test,predictions);

# Perfect predictions
plt.plot(y_test,y_test,'r');

In [None]:
errors = y_test.values.reshape(6484, 1) - predictions

In [None]:
sns.displot(errors);

-------------
### Predicting on a brand new house

In [None]:
single_house = df.drop('price',axis=1).iloc[0]

In [None]:
single_house = scaler.transform(single_house.values.reshape(-1, 19))

In [None]:
single_house

In [None]:
model.predict(single_house)

In [None]:
df.iloc[0]