In [13]:
import tensorflow as tf
import pandas as pd
import numpy as np
import random
import datetime

## Reading Data

In [14]:
# https://archive.ics.uci.edu/ml/datasets/auto+mpg
autompg = pd.read_csv('auto_mpg.csv')
autompg_disp = autompg['displacement'].astype(float)
autompg_mpg = autompg['mpg'].astype(float)
mean_disp = np.mean(autompg_disp)
min_disp = np.min(autompg_disp)
max_disp = np.max(autompg_disp)
print(mean_disp, max_disp, min_disp, max_disp-min_disp)
autompg_disp = autompg_disp.apply(lambda x:x/100)
print(autompg_disp.describe())
autompg_mpg = autompg_mpg.apply(lambda x: x/100)
print(autompg_mpg.describe())

193.425879397 455.0 68.0 387.0
count    398.000000
mean       1.934259
std        1.042698
min        0.680000
25%        1.042500
50%        1.485000
75%        2.620000
max        4.550000
Name: displacement, dtype: float64
count    398.000000
mean       0.235146
std        0.078160
min        0.090000
25%        0.175000
50%        0.230000
75%        0.290000
max        0.466000
Name: mpg, dtype: float64


In [3]:
# A function to get a batch of data
def getbatch(xval, yval, arraylength, batchsize=30):
    count = 0 
    while count < arraylength/batchsize:
        randstart = random.randint(0, arraylength-batchsize-1)
        count += 1
        yield (xval[randstart:randstart+batchsize], yval[randstart:randstart+batchsize])

# Test
#for i in getbatch(train_X, train_Y, n_samples):
#    print(i)

## Setting hyper-parameters

In [15]:
learning_rate = 0.003
n_epochs = 3000
display_step = 100
train_X = np.asarray(autompg_disp)
train_Y = np.asarray(autompg_mpg)
n_samples = train_X.shape[0]
n_samples

398

## Defining X and Y as placeholders

In [5]:
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)

## Initializing Weights and Biases

In [6]:
W = tf.Variable(np.random.randn())
b = tf.Variable(np.random.randn())

## Defining model

In [7]:
# predicted is X*W+b. 
pred = tf.add(tf.multiply(X, W), b)


## Define cost, optimizer and accuracy

In [11]:
# The cost function is ((predicted-actual)^2)/2*n_samples. 
cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples)

optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# Add all ops that need to be initialized
# The initilization needs to be run only after session is created
# as in session.run(init) below.
init = tf.global_variables_initializer()

## Running the session 
This will run the graph and use all the tensors that were previously defined

In [12]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for (x, y) in getbatch(train_X, train_Y, n_samples):
            sess.run(optimizer, feed_dict={X: x, Y: y})
        if (epoch+1) % display_step == 0:
            c = sess.run(cost, feed_dict={X: train_X, Y:train_Y})
            print("Epoch = {:04d} and cost = {:.9f}".format(epoch+1, c))

    training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
    finalW = sess.run(W)
    finalb = sess.run(b)
    print("The final W = %0.4f and b = %0.4f" %(finalW, finalb))

Epoch = 0100 and cost = 0.182815567
Epoch = 0200 and cost = 0.062778436
Epoch = 0300 and cost = 0.053206019
Epoch = 0400 and cost = 0.047320731
Epoch = 0500 and cost = 0.042143021
Epoch = 0600 and cost = 0.037409283
Epoch = 0700 and cost = 0.033323165
Epoch = 0800 and cost = 0.029665982
Epoch = 0900 and cost = 0.026475605
Epoch = 1000 and cost = 0.023622980
Epoch = 1100 and cost = 0.021083884
Epoch = 1200 and cost = 0.018840501
Epoch = 1300 and cost = 0.016828144
Epoch = 1400 and cost = 0.015064315
Epoch = 1500 and cost = 0.013499096
Epoch = 1600 and cost = 0.012093076
Epoch = 1700 and cost = 0.010835682
Epoch = 1800 and cost = 0.009711892
Epoch = 1900 and cost = 0.008731600
Epoch = 2000 and cost = 0.007870960
Epoch = 2100 and cost = 0.007106712
Epoch = 2200 and cost = 0.006422998
Epoch = 2300 and cost = 0.005820717
Epoch = 2400 and cost = 0.005285838
Epoch = 2500 and cost = 0.004814483
Epoch = 2600 and cost = 0.004394567
Epoch = 2700 and cost = 0.004020926
Epoch = 2800 and cost = 0.00