## HW: Regression ##

### due 2019-10-29 ###

HW submission by group (up to 4 people)
* Taoyu Cai <taoyuc3@illinois.edu>

### imports ###

In [None]:
import numpy
import pandas
import time
import random
import matplotlib
#%matplotlib notebook
import matplotlib.pyplot as plt
import scipy.stats
import matplotlib.offsetbox as offsetbox
from matplotlib.ticker import StrMethodFormatter
from scipy.optimize import minimize


In [None]:
params={
    "font.size":15,
    "lines.linewidth":5,
}
plt.rcParams.update(params)

**Problem:**  Download historical ticker prices for AAPL and MSFT from <https://finance.yahoo.com/>
Compute and then plot the returns
Regress AAPL on MSFT and then regress MSFT on AAPL.  Which is a "better" regression (i.e., does MSFT drive AAPL or vice versa?)

In [None]:
MS="MSFT"
AP="AAPL"

In [1]:
def getdata(ticker):
    data=pandas.read_csv(ticker +".csv",index_col=0)
    data.index = pandas.to_datetime(data.index)
    S=pandas.Series(data["Adj Close"],name=ticker)
    return S.copy()

ms=getdata(MS)
ap=getdata(AP)

print(ms.tail(n=10))
print("\n",ap.tail(n=10))

NameError: name 'MS' is not defined

In [None]:
data_raw=pandas.concat([ms,ap],axis="columns",join="inner")
data_raw.tail(n=10)

In [None]:
prices=data_raw.copy()
prices.tail(n=10)

In [None]:
returns=prices/prices.shift()-1
returns=returns.dropna(axis="index")
returns.tail(n=10)

In [None]:
# plt.figure()
plt.plot(returns.index,returns["MSFT"].values,label=MS,color="orange",linewidth=3)
plt.plot(returns.index,returns["AAPL"].values,label=AP,color="purple",linewidth=2)
plt.legend()
plt.title("Returns")
plt.xticks(rotation=25)
# saver("returns")
plt.show()
# plt.close()

## Regress AAPL on MSFT

In [None]:
plt.figure()
plt.scatter(returns[MS],returns[AP],color="red")
plt.xlabel(MS+" returns (x)")
plt.ylabel(AP+" returns (y)")
plt.title("Daily Returns")
# saver("returns_scatter")
plt.show()
plt.close()

In [None]:
data=100*returns.rename(columns={MS:"x",AP:"y"})

In [None]:
class Regression:
    def __init__(self,data):
        self.xvals=data["x"]
        self.yvals=data["y"]
        self.reset()
        
    def reset(self):
        self.ctr=0
        self.callbacktext=[]
        
    def make_linear_function(self,m,b):
        def linear_function(x):
            return m*x+b
        return linear_function
    
    def Err(self,x):
        (m,b)=x
        linear_function=self.make_linear_function(m,b)
        err=self.yvals-linear_function(self.xvals)
        return 0.5*numpy.mean(err**2)

    
    def callback(self,x,verbose=False):
        (m,b)=x
        outstr="ctr={0:}; (m,b)=({1:.3f},{2:.2E}); error={3:.2E}".format(self.ctr,m,b,self.Err(x))
        self.callbacktext.append(outstr)
        if verbose:
            print(outstr)
        self.ctr+=1

In [None]:
myregression=Regression(data)

In [None]:
bestline=minimize(myregression.Err,[0,0],callback=myregression.callback)
(m_best,b_best)=bestline["x"]
print("m={0:}; b={1:}".format(m_best,b_best))

In [None]:
myregression.callbacktext

In [None]:
plt.figure()
plt.scatter(data["x"],data["y"],label=None,color="red")
plt.xlabel("MSFT")
plt.ylabel("APPL")
xlims=numpy.array(plt.gca().get_xlim())
plt.plot(xlims,m_best*xlims+b_best,color="blue",label="{0:.3}x+{1:.3E}".format(m_best,b_best))
plt.legend()
plt.title("APPL on MSFT")
plt.show()
plt.close()

## Regress MSFT on APPL

In [None]:
plt.figure()
plt.scatter(returns[AP],returns[MS],color="red")
plt.xlabel(AP+" returns (x)")
plt.ylabel(MS+" returns (y)")
plt.title("Daily Returns")
# saver("returns_scatter")
plt.show()
plt.close()

In [None]:
data1=100*returns.rename(columns={AP:"x",MS:"y"})

In [None]:
myregression1=Regression(data1)

In [None]:
bestline=minimize(myregression1.Err,[0,0],callback=myregression1.callback)
(m_best,b_best)=bestline["x"]
print("m={0:}; b={1:}".format(m_best,b_best))

In [None]:
myregression1.callbacktext

In [None]:
plt.figure()
plt.scatter(data["y"],data["x"],label=None,color="red")
plt.xlabel("APPL")
plt.ylabel("MSFT")
xlims=numpy.array(plt.gca().get_xlim())
plt.plot(xlims,m_best*xlims+b_best,color="blue",label="{0:.3}x+{1:.3E}".format(m_best,b_best))
plt.legend()
plt.title("MSFT on APPL")
plt.show()
plt.close()

# As a result, it is clear to see that the linear regression MSFT on APPL has a smaller error of 0.63 compare to APPL on MSFT which has a error of 1.02. 
# The dataset is derived over the year, and we can draw a conclusion during this time interval Apple drives the returns of Microsoft.