In [1]:
from bokeh.plotting import figure
from bokeh.io import show
from bokeh.io import output_notebook
import statsmodels.api as sm
import random
 


In [2]:
output_notebook()

In [3]:
def graph1(leng = 25, slope = 1.3):
    x = list(range(leng))
    y = [x * slope + random.randrange(1,5) for x in x]
    p = figure()
    p.circle(x = x, y = y)
    X = list(zip(*[x]))
    xm = sm.add_constant(X)
    model = sm.OLS(y, xm) 
    result = model.fit()
    y_hat2 = [x * result.params[1] + result.params[0] for x in x]
    y_hat = result.predict(xm)
    p.line(x = x, y = y_hat)
    p.line(x = x, y = y_hat2)
    return p
graph1()

In [4]:
def graph2(leng = 25, slope = 1.3, max_range = 5):
    x = list(range(leng))
    y = [x * slope + random.randrange(1,max_range) for x in x]
    p = figure()
    p.circle(x = x, y = y)
    X = list(zip(*[x]))
    xm = sm.add_constant(X)
    model = sm.OLS(y, xm) 
    result = model.fit()
    print(result.conf_int())
    int_low, int_high = result.conf_int()[0]
    y_hat = result.predict(xm)
    p.line(x = x, y = y_hat)

    return p
show(graph2(max_range = 10))

[[0.57388721 4.6938051 ]
 [1.28670623 1.58098608]]


In [5]:
#show(graph1())
graph1()

In [6]:
def graph3(leng = 25, slope = 1.3, max_range = 5):
    x = list(range(leng))
    y = [x * slope + random.randrange(1,max_range) for x in x]
    p = figure()
    p.circle(x = x, y = y)
    X = list(zip(*[x]))
    xm = sm.add_constant(X)
    model = sm.OLS(y, xm) 
    result = model.fit()
    predictions = result.get_prediction(xm)
    frame = predictions.summary_frame(alpha=0.05)
    #frame.obs_ci_upper
    #obs_ci_upper is prediction interval
    y_hat = result.predict(xm)
    p.line(x =x, y = frame.mean_ci_lower, color = 'red')
    p.line(x =x, y = frame.mean_ci_upper, color = 'red')
    p.line(x = x, y = y_hat)
    return p
show(graph3(max_range = 15))
#graph3()

In [7]:
def graph4(leng = 25, slope = 1.3, max_range = 5):
    def get_pred_slope(conf_i):
        """
        calculate slope and intercepts for confidence intervals
        """
        sl_upper = (conf_i[-1][1]  - conf_i[0][0])/(x[-1] - x[0])
        sl_lower = (conf_i[-1][0] - conf_i[0][1])/(x[-1] -x[0])
        upper = [x * sl_upper + conf_i[0][0] for x in x]
        lower = [x * sl_lower + conf_i[0][1] for x in x]
        return (conf_i[0][1],sl_lower, ), (conf_i[0][0], sl_upper)
    x = list(range(leng))
    y = [x * slope + random.randrange(1,max_range) for x in x]
    p = figure()
    p.circle(x = x, y = y)
    X = list(zip(*[x]))
    xm = sm.add_constant(X)
    model = sm.OLS(y, xm) 
    result = model.fit()
    predictions = result.get_prediction(xm)
    frame = predictions.summary_frame(alpha=0.05)
    #frame.obs_ci_upper
    #obs_ci_upper is prediction interval
    y_hat = result.predict(xm)
    p.line(x =x, y = frame.mean_ci_lower, color = 'red')
    p.line(x =x, y = frame.mean_ci_upper, color = 'red')
    p.line(x = x, y = y_hat)
    l_, u_ = get_pred_slope(predictions.conf_int())
    p.line(x = x, y = [x * u_[1] + u_[0] for x in x], color = 'green')
    p.line(x = x, y = [x * l_[1] + l_[0] for x in x], color = 'green')
    x2  = [25 + x for x in range(10)]
    proj_upper = [frame.mean_ci_upper.tolist()[-1] + u_[1] * x for x in range(10)]
    proj_lower = [frame.mean_ci_lower.tolist()[-1] + l_[1] * x for x in range(10)]
    p.line(x = x2, y = proj_lower)
    p.line(x = x2, y = proj_upper)

    return p
show(graph4(max_range = 15))

In [54]:
import pandas as pd
import statsmodels.formula.api as smf
def multi1(df, leng = 25, slope = 1.3):
    dependent_variable = 'death_rate'
    cols = ['per_nonwhite', 'poor_families']
    res = smf.ols(formula='{dep} ~ {cols}'.format(
      dep = dependent_variable, cols = '+ '.join(cols)) , data=df).fit()
    return res.params.tolist()
    
df = pd.read_csv('big_data/data/death_rate.csv')
df.head()
multi1(df)

[900.6400193695688, 4.916666767971076, -1.29815464712045]

In [76]:
def multi2(df, leng = 25, slope = 1.3):
    y = df['death_rate'].to_list()
    per_nonwhite = df['per_nonwhite'].tolist()
    poor_families = df['poor_families'].tolist()
    X = list(zip(per_nonwhite, poor_families))
    xm = sm.add_constant(X)
    model = sm.OLS(y, xm) 
    result = model.fit()
    return result.params
    
l = multi2(df)
b =l[0]
m1 = l[1]
m2 = l[2]
per_nonwhite = df['per_nonwhite'].tolist()
poor_families = df['poor_families'].tolist()
y_hat = []
for counter, i in enumerate(per_nonwhite):
    y_hat.append(b + m1 * i + m2 * poor_families[counter])
y_hat
p = figure()
x = range(len(y_hat))
p.line(x = x, y = y_hat)
show(p)
