# Regression Keras

### Install Requirements

In [1]:
! pip install memory_profiler



In [0]:
%load_ext memory_profiler

### Import Libraries

In [0]:
from IPython import display
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import keras
from keras.layers import Input, Dense
from keras.models import Model, Sequential
from sklearn.preprocessing import MinMaxScaler

tf.logging.set_verbosity(tf.logging.WARN)
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.5f}'.format

# Pretty Display of Variables
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [0]:
def process_california_data(ratio=0.8):
    """ Downloads the california housing dataset, preprocess the data and splits it.
    Args:
        ratio: Split ratio. Default is 0.8
    
    """

    print('Downloading data from web..')
    # Load training & test data set.
    train_df = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv", sep=",")
    test_df = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/california_housing_test.csv", sep=",")

    print('Merging data..')
    # Merge data
    df = pd.concat([train_df, test_df])
    
    # Drop NA values
    df.dropna(inplace=True)
    
    print('Creating a synthetic feature..')
    # Create a synthetic feature.
    df["rooms_per_person"] = df["total_rooms"] / df["population"]
    
    # Scale the huge value columns
    df["median_house_value"] = df["median_house_value"] / 1000 

    print('Shuffling the data..')
    # Shuffle
    df = df.reindex(np.random.permutation(df.index))

    print('Heatmap of our data..')
    sample = df.sample(n=500)
    sns.heatmap(
        sample.corr(), 
        xticklabels=sample.columns.values,
        yticklabels=sample.columns.values,
        fmt=".2f",
        annot=True
    )

    print('Scaling the data between 0-1..')
    # Scale the data between 0-1
    scaler = MinMaxScaler(feature_range=(0,1))
    scale_columns = list(df.columns[2:])
    scaled = df.copy()
    scaled[scale_columns] = scaler.fit_transform(df[scale_columns])
    print("Data was multiplied by {:.6f} and added {:.4f}".format(scaler.scale_[0], scaler.min_[0]))


    print('Splitting train & test frames..')
    # Split
    
    split_size_head = round(ratio*scaled.shape[0])
    split_size_tail = scaled.shape[0] - split_size_head
    train = scaled.head(split_size_head)
    test = scaled.tail(split_size_tail)

    return train, test, scaler

### Data Process

In [9]:
train, test, scaler = process_california_data()

Data values were scaled by multiplying by 0.0196078431 and adding -0.0196


In [10]:
train.head(10)

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,rooms_per_person
15403,-122.3,37.92,0.62745,0.04252,0.0419,0.01982,0.0467,0.24693,0.46186,0.04088
5334,-118.15,34.17,0.88235,0.06725,0.08644,0.04868,0.08074,0.10537,0.23196,0.02625
4780,-118.09,34.14,0.76471,0.08146,0.08504,0.04075,0.08798,0.33364,0.73979,0.03811
10571,-120.45,34.94,0.45098,0.04481,0.06921,0.03467,0.06841,0.13167,0.20722,0.02454
3479,-117.9,34.1,0.66667,0.07215,0.07356,0.04142,0.07926,0.28038,0.3332,0.03317
14462,-122.13,37.45,0.70588,0.03387,0.03042,0.01421,0.03371,0.51054,1.0,0.04539
5180,-118.13,33.91,0.64706,0.02409,0.02498,0.01539,0.0268,0.30419,0.4268,0.02973
13580,-121.97,37.25,0.60784,0.07618,0.07682,0.03335,0.08074,0.38835,0.72742,0.04359
13631,-121.98,37.28,0.4902,0.03111,0.0478,0.01729,0.05016,0.19947,0.52392,0.03421
5874,-118.2,33.92,0.86275,0.03377,0.03911,0.02864,0.04062,0.19171,0.26021,0.02235


In [11]:
train.describe()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,rooms_per_person
count,13600.0,13600.0,13600.0,13600.0,13600.0,13600.0,13600.0,13600.0,13600.0,13600.0
mean,-119.56415,35.62332,0.54149,0.06936,0.08332,0.03991,0.08203,0.2331,0.39662,0.03559
std,2.00298,2.13409,0.24675,0.05686,0.06528,0.03231,0.0632,0.13115,0.23886,0.02219
min,-124.35,32.55,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,-121.79,33.93,0.33333,0.03846,0.04562,0.022,0.04588,0.14234,0.21629,0.02726
50%,-118.49,34.25,0.54902,0.05594,0.06712,0.03257,0.06693,0.21012,0.34113,0.03481
75%,-118.0,37.71,0.70588,0.08243,0.1004,0.04819,0.0992,0.29402,0.51608,0.04123
max,-114.31,41.95,1.0,0.86002,1.0,1.0,1.0,1.0,1.0,1.0


## Linear Regression

**Model Parameters**

In [0]:
learning_rate = 0.02
steps = 5
batch_size = 100

**Linear Regression with One Variable**

In [0]:
feature_columns = ["median_income"]
label = "median_house_value"

In [0]:
X = train[feature_columns].values
Y = train[label].values
X_test = test[feature_columns].values
Y_test = test[label].values

In [0]:
inputs = Input(shape=(1,))
outputs = Dense(1,activation='linear')(inputs)
opt = keras.optimizers.Adam(lr=learning_rate)
model = Model(inputs=inputs,outputs=outputs)
model.compile(optimizer=opt, loss="mse")

In [28]:
model.fit(X, Y, batch_size=batch_size, epochs=steps, validation_data=(X_test, Y_test), shuffle=False)

Train on 13600 samples, validate on 3400 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f262d8cad30>

In [29]:
%memit model.fit(X, Y, batch_size=batch_size, epochs=steps, validation_data=(X_test, Y_test), shuffle=False)

Train on 13600 samples, validate on 3400 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
peak memory: 288.96 MiB, increment: 2.73 MiB


In [30]:
%time model.fit(X, Y, batch_size=batch_size, epochs=steps, validation_data=(X_test, Y_test), shuffle=False)

Train on 13600 samples, validate on 3400 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 649 ms, sys: 43.1 ms, total: 692 ms
Wall time: 619 ms


<keras.callbacks.History at 0x7f263229b128>

In [31]:
%prun model.fit(X, Y, batch_size=batch_size, epochs=steps, validation_data=(X_test, Y_test), shuffle=False)

Train on 13600 samples, validate on 3400 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
 

**Results**

**Train Loss (RMSE)**: 0.0297  
**Validation Loss(RMSE)**: 0.0323


**%time**    
CPU times: user 649 ms, sys: 43.1 ms, total: 692 ms
Wall time: 619 ms

**%prun**   
283249 function calls (269649 primitive calls) in 0.726 seconds

**%memit**   
peak memory: 288.96 MiB, increment: 2.73 MiB


**Model Parameters**  
learning_rate =  0.02
steps = 5
batch_size = 100 
periods = 10  


## Neural Networks

**Model Parameters**

In [0]:
learning_rate = 0.02
steps = 5
batch_size = 100

**Neural Networks**

In [0]:
feature_columns = ["households", "median_income", "rooms_per_person", "total_rooms", "housing_median_age"]
label = "median_house_value"

In [0]:
X = train[feature_columns].values
Y = train[label].values
X_test = test[feature_columns].values
Y_test = test[label].values

In [0]:
model = Sequential()

In [0]:
model.add(Dense(50, input_dim=5, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='linear'))

In [0]:
opt = keras.optimizers.Adam(lr=learning_rate)
model.compile(optimizer=opt, loss="mse")

In [41]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 50)                300       
_________________________________________________________________
dense_3 (Dense)              (None, 100)               5100      
_________________________________________________________________
dense_4 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 51        
Total params: 10,501
Trainable params: 10,501
Non-trainable params: 0
_________________________________________________________________


In [44]:
model.fit(X, Y, batch_size=batch_size, epochs=steps, validation_data=(X_test, Y_test), shuffle=False)

Train on 13600 samples, validate on 3400 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f262cfc6320>

In [45]:
%memit model.fit(X, Y, batch_size=batch_size, epochs=steps, validation_data=(X_test, Y_test), shuffle=False)

Train on 13600 samples, validate on 3400 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
peak memory: 305.30 MiB, increment: 1.44 MiB


In [46]:
%time model.fit(X, Y, batch_size=batch_size, epochs=steps, validation_data=(X_test, Y_test), shuffle=False)

Train on 13600 samples, validate on 3400 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 2.07 s, sys: 123 ms, total: 2.19 s
Wall time: 1.54 s


<keras.callbacks.History at 0x7f26344518d0>

In [47]:
%prun model.fit(X, Y, batch_size=batch_size, epochs=steps, validation_data=(X_test, Y_test), shuffle=False)

Train on 13600 samples, validate on 3400 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
 

**Results**

**Train Loss (RMSE)**: 0.0232  
**Validation Loss(RMSE)**: 0.0232


**%time**    
CPU times: user 2.07 s, sys: 123 ms, total: 2.19 s  
Wall time: 1.54 s

**%prun**   
347882 function calls (334282 primitive calls) in 1.655 seconds

**%memit**   
peak memory: 305.30 MiB, increment: 1.44 MiB


**Model Parameters**  
learning_rate =  0.02
steps = 5
batch_size = 100 
periods = 10  
