# Old Faithful Data Set

## Read in data from csv

In [None]:
%matplotlib inline  
# numpy efficiently deals with numerical multi-dimensional arrays.
import numpy as np
# matplotlib is a plotting library, and pyplot is its easy-to-use module.
import matplotlib.pyplot as plt

# This just sets the default plot size to be bigger.
plt.rcParams['figure.figsize'] = (16.0, 8.0)

# Adapted from https://stackoverflow.com/questions/25614749/how-to-import-csv-file-as-numpy-array-in-python
errupt, wait = np.loadtxt('data/faithful.csv', delimiter=',', usecols=(0,1), unpack=True, dtype=float)

print("Erruptions Data %s:" % errupt)
print("Waiting Data %s:" % wait)


## Investigate The Data

In [None]:
plt.plot(errupt, wait, 'k.')
plt.xlabel('Erruption Duration (min.)')
plt.ylabel('Waiting time (min.)')
plt.show()

## Fit a Straight Line

In [None]:
m,c = np.polyfit(errupt, wait, 1)
print("Best fit is m = %f and c = %f" % (m, c))

# Plot the best fit line.
plt.plot(errupt, wait, 'k.', label='Original data')
plt.plot(errupt, m * errupt+ c, 'b-', label='Best fit: $%0.1f x + %0.1f$' % (m,c))
plt.legend()
plt.show()

# Calculate Gradient Decesnt

In [None]:
def grad_m(x, y, m, c):
  return -2.0 * np.sum(x * (y - m * x - c))

def grad_c(x, y, m , c):
  return -2.0 * np.sum(y - m * x - c)

eta = 0.0001
m, c = 1.0, 1.0
change = True

while change:
  mnew = m - eta * grad_m(errupt, wait, m, c)
  cnew = c - eta * grad_c(errupt, wait, m, c)
  if m == mnew and c == cnew:
    change = False
  else:
    m, c = mnew, cnew
    print("m: %20.16f  c: %20.16f" % (m, c))