In [32]:
#The optimal values of m and b can be actually calculated with way less effort than doing a linear regression. 
#this is just to demonstrate gradient descent

import plotly.figure_factory as ff
from numpy import *

# y = mx + b
# m is slope, b is y-intercept
def compute_error_for_line_given_points(b, m, points):
    totalError = 0
    for i in range(0, len(points)):
        x = points.x[i]
        y = points.y[i]
        totalError += (y - (m * x + b)) ** 2
    return totalError / float(len(points))

def step_gradient(b_current, m_current, points, learningRate):
    b_gradient = 0
    m_gradient = 0
    N = float(len(points))
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        b_gradient += -(2/N) * (y - ((m_current * x) + b_current))
        m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))
    new_b = b_current - (learningRate * b_gradient)
    new_m = m_current - (learningRate * m_gradient)
    return [new_b, new_m]

def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):
    b = starting_b
    m = starting_m
    error = 0
    rsss = pd.DataFrame(dict(iteration=[],RSS=[]))
    for i in range(num_iterations):
        b, m = step_gradient(b, m, array(points), learning_rate)
        error = compute_error_for_line_given_points(b, m, points)
        #print("After {0} iterations b = {1}, m = {2}, error = {3}".format(i+1, b, m, error))
        rsss = rsss.append({'iteration': i+1,'RSS': error}, ignore_index=True)
    

    table = ff.create_table(rsss)
    py.iplot(table, filename='simple_table')

    trace = go.Scatter(
        x = rsss.iteration,
        y = rsss.RSS,
        mode = "lines+markers",
        name = "decreasing error",
        marker = dict(color = 'rgba(80, 26, 80, 0.8)'),
        text= rsss.RSS)
    data = [trace]
    layout = dict(title = 'RSS per each iteration',yaxis= dict(title= 'RSS',ticklen= 5,zeroline= False),xaxis= dict(title= 'Iteration',ticklen= 5,zeroline= False))
    fig = dict(data = data, layout = layout)
    py.iplot(fig)
    
    return [b, m]

def run():
    points = pd.read_csv("income.csv")
    
    learning_rate = 0.0001
    initial_b = 0 # initial y-intercept guess
    initial_m = 0 # initial slope guess
    num_iterations = 1000
    print("Starting gradient descent at b = {0}, m = {1}, error = {2}".format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points)))
    print("Running...")
    #print(points.info())
    [b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)
    #print("After {0} iterations b = {1}, m = {2}, error = {3}".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))

    
if __name__ == '__main__':
    run()


Starting gradient descent at b = 0, m = 0, error = 2946.6344970460204
Running...


In [15]:

import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd

def run():

    print("Running...")
    
    pts = pd.read_csv("income.csv")
    
    
    print(pts.info())

    # Creating trace1
    trace1 = go.Scatter(
        x = pts.x,
        y = pts.y,
        mode = "lines",
        name = "citations",
        marker = dict(color = 'rgba(16, 112, 2, 0.8)'),
        text= pts.x)
    # Creating trace2
    trace2 = go.Scatter(
        x = pts.x,
        y = pts.y,
        mode = "lines+markers",
        name = "teaching",
        marker = dict(color = 'rgba(80, 26, 80, 0.8)'),
        text= pts.x)
    data = [trace1, trace2]
    layout = dict(title = 'Citation and Teaching vs World Rank of Top 100 Universities',xaxis= dict(title= 'World Rank',ticklen= 5,zeroline= False))
    fig = dict(data = data, layout = layout)
    py.iplot(fig)
    
    print(py)
    print("Running...")

if __name__ == '__main__':
    run()

Running...
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 2 columns):
x    30 non-null float64
y    30 non-null float64
dtypes: float64(2)
memory usage: 520.0 bytes
None


<module 'plotly.offline' from 'C:\\Users\\tclem\\Anaconda3\\lib\\site-packages\\plotly\\offline\\__init__.py'>
Running...


In [14]:
from plotly import version
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
print(version)
init_notebook_mode(connected=True)
iplot([{"x": [1, 2, 3], "y": [3, 1, 6]}])

<module 'plotly.version' from 'C:\\Users\\tclem\\Anaconda3\\lib\\site-packages\\plotly\\version.py'>
