# Problem Set 6
## Suleyman Gozen
### I thank Yung-Hsu Tsui for his valuable comments.

## The Lucas Model

In [None]:
import numpy as np
import pandas as pd
pd.core.common.is_list_like = pd.api.types.is_list_like #importing pandas_datareader gives me an error without this line
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import datetime
from scipy import optimize as opt

### Exercise 1

In [None]:
def convert_to_daily_with_ffill(df):

    new_idx = pd.date_range('01-01-1950', '07-01-2018')
    df = df.reindex(new_idx)
    df = df.fillna(method="ffill")
    return df

start = datetime.datetime(1950, 1,1)
end = datetime.datetime(2018, 7, 1)
con = web.DataReader('PCECC96', 'fred', start, end)
w5000 = web.DataReader('WILL5000INDFC', 'fred', start, end)
w5000 = convert_to_daily_with_ffill(w5000)
defl = web.DataReader('GDPDEF', 'fred', start, end)
tbill = web.DataReader('TB3MS', 'fred', start, end)

df = pd.DataFrame(columns=['con', 'w5000', 'tbill', 'defl'], index = con.index)
df['con'] = con
df['w5000'] = w5000
df['defl'] = defl
df['infl'] = df['defl']/df['defl'].shift(1) 
df['w5000_d'] = df['w5000']/df['infl']     
df['R_nom'] = df['w5000'].shift(-1)/df['w5000']
df['R'] = df['R_nom']/df['infl']
df['tbill'] = tbill/100. + 1   
df['con_1'] = con.shift(-1)
df  = df.dropna(axis=0) 
df.head(10)

In [None]:
c=np.array(df['con'], dtype=float)
cp1 = np.array(df['con_1'], dtype =float)
t = np.array(df['tbill'], dtype=float) #risk free rate
R = np.array(df["R"],dtype=float)
w = np.array(df['w5000_d'])
π = np.array(df['infl'], dtype=float)

In [None]:
def m_tp1(ct, ctp1, γ):   
    
    β=.99
    #m_tp1 = β*((ctp1/ct)**(-γ))
    m_tp1= β/((ctp1/ct)**γ)
    return m_tp1

def mvec(γ):
    
    mvec = np.zeros(np.size(c))
    for i in range(np.size(c)):
        ct = float(c[i])
        ctp1 = float(cp1[i])
        mvec[i] = m_tp1(ct,ctp1,γ)
        
    return mvec

In [None]:
def emp5(γ):
    
    m = mvec(γ)

    #build a matrix with Rvec on first row, mvec on 2nd row, so we can determine covariance using np.cov
    mat = np.vstack([R, m])  
    
    #compute covariance:
    cov_mat = np.cov(mat)
    cov = cov_mat[0,1] 
    
    #compute expected values:
    E_R = R.mean()
    E_m = m.mean()
    
    output = cov + E_R * E_m 
    return output

In [None]:
def crit(γ):
    '''this is the function that we will optimize to find the equilibrium value of γ'''
    
    func = emp5(γ) - 1 #equation 5 
    return func

In [None]:
# find equilibrium γ
γ_guess=40
result = opt.broyden1(crit,γ_guess)
print("equilibrium γ:",result)

### Exercise 3

In [None]:
def crit3(γ):
    
    #get expected value of m 
    m = mvec(γ)
    Em = m.mean()
    
    #equation (7):
    error = t - 1/Em  
    squarred_error = np.sum(error**2)
    return squarred_error

In [None]:
# find equilibrium γ
γ_guess3=2
result3 = opt.root(crit3,γ_guess3)
print("equilibrium γ:",result3.x)

### Exercise 4

In [None]:
#functions that get m_t : 
def m_tp1_2(ct, ctp1, γ, β):   
    
    m_tp1= β/((ctp1/ct)**γ)
    return m_tp1

def mvec2(γ,β):
    
    mvec = np.zeros(np.size(c))
    for i in range(np.size(c)):
        ct = float(c[i])
        ctp1 = float(cp1[i])
        mvec[i] = m_tp1_2(ct,ctp1,γ,β)
        
    return mvec

In [None]:
def err4(params):
    
    γ,β= params
    
    m = mvec2(γ,β)
    
    #build a matrix with Rvec on first row, mvec on 2nd row, so we can determine covariance using np.cov
    mat = np.vstack([R, m])  
    
    #compute covariance:
    cov_mat = np.cov(mat)
    cov = cov_mat[0,1] 
    
    #compute expected values:
    E_R = R.mean()
    E_m = m.mean()
    
    #equation 5:
    moment1 = cov + E_R * E_m - 1
    
    Em = m.mean()
    
    #equation (7):
    error = t - 1/Em  
    moment2 = np.sum(error**2)
    
    moments = np.array([moment1,moment2])
    
    return moments

In [None]:
def crit4(params, W):
    γ,β= params
    
    error = err4(params)
    
    crit = error.T@W@error
    return crit

In [None]:
γ_guess = 4
β_guess = .95
W = np.eye(2)
guess4 = np.array([γ_guess,β_guess])
args4=W

result4 = opt.minimize(crit4, guess4, args = args4,method = 'L-BFGS-B', bounds = ((1e-10, None), (1e-10, 1)))

print("optimal γ:", result4.x[0],"\noptimal β:",result4.x[1])

In [None]:
n = 60
N = 5
γ_grid = np.linspace(0, 5, n)
β_grid = np.linspace(0.95, 0.99, N)
crit4_values = np.zeros((n, N))
for i in range(n):
    for j in range(N):
        crit4_values[i, j] = crit4((gamma_grid[i], beta_grid[j]), W)
        
for j in range(N):
    plt.plot(γ_grid, crit4_values[:, j], label=rf'$beta$ = {beta_grid[j]:.2f}')
plt.xlabel('gamma')
plt.legend()
plt.show()

## The Kyle Model

In [None]:
import numpy as np
import pandas as pd
import pickle
from matplotlib import pyplot as plt
import statsmodels.formula.api as sm

In [None]:
# load data from pickle files
AAPL = pickle.load(open("AAPL", "rb"))
C = pickle.load(open("C", "rb"))
GBX = pickle.load(open("GBX", "rb"))
SPY = pickle.load(open("SPY", "rb"))
THO = pickle.load(open("THO", "rb"))

### Exercise 1

In [None]:
class orderbook:
    
    def __init__(self, orderbook):
        
        self.timestamp = orderbook[0]
        orderbook = pd.DataFrame.from_dict(orderbook[1], orient='index')
        orderbook.drop(columns=[2, 3], inplace=True)
        orderbook.reset_index(inplace=True)
        orderbook.columns = ['price', 'quantity', 'side']
        orderbook.sort_values('price', inplace=True)
        buy = orderbook[orderbook.side == 'B'].sort_values('price', ascending=False)
        buy['y'] = -buy['quantity'].cumsum()  # buying = selling negative assets
        
        sell = orderbook[orderbook.side == 'S'].sort_values('price', ascending=True)  
        sell['y'] = sell['quantity'].cumsum()
        
        orderbook = pd.concat([buy, sell])
        orderbook.sort_values('price', inplace=True)
        self.orderbook = orderbook
        p0 = self.find_p0()  # get midpoint of BBO
        
        #dataframe with data within 50% of BBO
        self.orderbook50 = orderbook[(orderbook.price <= p0 * 1.5)  & (orderbook.price >= p0 * 0.5)]
        
        #dataframe with data within 5% of BBO
        self.orderbook5 = orderbook[(orderbook.price <= p0 * 1.05) & (orderbook.price >= p0 * 0.95)]

    def plot(self):
        '''plot data'''
        orderbook = self.orderbook
        plt.plot(orderbook.price, orderbook.y, label='Orderbook') 
        plt.xlabel('price')
        plt.ylabel('quantity')
        plt.legend()

    def plot50(self):
        '''plot data within 50% of BBO'''
        orderbook50 = self.orderbook50
        plt.plot(orderbook50.price, orderbook50.y, label='Orderbook') 
        plt.xlabel('price')
        plt.ylabel('quantity')
        plt.legend()

    def OLS(self, orderbook):
        '''regress cumulative sum on price'''
        result = sm.ols(formula="y ~ price", data=orderbook).fit()  
        return result.params

    def plot_estimates(self):
        orderbook5 = self.orderbook5
        orderbook50 = self.orderbook50
        ols_params = self.OLS(orderbook5)  # fit linear regression to data within 5% of BBO
        γ = ols_params[0]
        β = ols_params[1]
        estimate = γ + β * orderbook50.price
        plt.plot(orderbook50.price, estimate, label='Estimate')
        plt.xlim(orderbook50.price.min(), orderbook50.price.max())
        plt.ylim(orderbook50.y.min() * 1.1, orderbook50.y.max() * 1.1)
        plt.xlabel('price')
        plt.ylabel('quantity')
        plt.legend()

    def find_p0(self):
        orderbook = self.orderbook
        best_offer = orderbook[orderbook.side == 'S'].price.min()
        best_bid = orderbook[orderbook.side == 'B'].price.max()
        return (best_offer + best_bid) / 2  # actual midpoint of BBO
    


    
    def find_p0_5(self):
        '''find p for data within 5% of BBO'''
     
        orderbook = self.orderbook
        p0 = self.find_p0()
        best_offer = orderbook[(orderbook.side == 'S') & (orderbook.price <= p0 * 1.05)& (orderbook.price >= p0 * 0.95)].price.min()
        best_bid = orderbook[(orderbook.side == 'B')& (orderbook.price <= p0 * 1.05)& (orderbook.price >= p0 * 0.95)].price.max()
        return (best_offer + best_bid) / 2  # actual midpoint of BBO

In [None]:
aapl0 = orderbook(C[0])
aapl0.plot()
aapl0.plot_estimates()
plt.title("AAPL, timestamp 34260, all data")
plt.show()

c0 = orderbook(C[0])
c0.plot()
c0.plot_estimates()
plt.title("C, timestamp 34260, all data")
plt.show()

gbx0 = orderbook(GBX[0])
gbx0.plot()
gbx0.plot_estimates()
plt.title("GBX, timestamp 34260, all data")
plt.show()

tho0 = orderbook(THO[0])
tho0.plot()
tho0.plot_estimates()
plt.title("THO, timestamp 34260, all data")
plt.show()

spy0 = orderbook(SPY[0])
spy0.plot()
spy0.plot_estimates()
plt.title("SPY, timestamp 34260, all data")
plt.show()

In [None]:
aapl0.plot50()
aapl0.plot_estimates()
plt.title("AAPL, timestamp 34260, data within 50% of BBO")
plt.show()

c0.plot50()
c0.plot_estimates()
plt.title("C, timestamp 34260, data within 50% of BBO")
plt.show()

gbx0.plot50()
gbx0.plot_estimates()
plt.title("GBX, timestamp 34260, data within 50% of BBO")
plt.show()

tho0.plot50()
tho0.plot_estimates()
plt.title("THO, timestamp 34260, data within 50% of BBO")
plt.show()

spy0.plot50()
spy0.plot_estimates()
plt.title("SPY, timestamp 34260, data within 50% of BBO")
plt.show()

### Exercise 2

In [None]:
class ticker:
    
    def __init__(self, orderbooks):
        self.orderbooks = [orderbook(Orderbook) for Orderbook in orderbooks]  # index over all orderbook
        self.timestamps = [orderbook.timestamp for orderbook in self.orderbooks]  # all timestamps
        #self.orderbooks5 = [orderbook(Orderbook).orderbook5 for Orderbook in orderbooks]
    
    def estimateβ(self):
        '''computes estimates of β for each orderbook
        uses data within 5% of the BBO'''
        orderbooks = self.orderbooks
        β = np.array([orderbook.OLS(orderbook.orderbook5)[1] for orderbook in orderbooks]) 
        return β
    
    def estimateγ(self):
        '''computes estimates of γ for each orderbook
        uses data within 5% of the BBO'''
        orderbooks = self.orderbooks
        γ = np.array([orderbook.OLS(orderbook.orderbook5)[0] for orderbook in orderbooks])  
        return γ
    
    def plotβ(self):
        β = self.estimateβ()  
        plt.plot(self.timestamps, β)
        plt.xlabel('timestamp')
        plt.ylabel('Noise/Signal Ratio (β)')
        
    def plot_p0_estimates(self):
        β = self.estimateβ()
        γ = self.estimateγ()
        p0 = - γ / β  # estimate BBO
        plt.plot(self.timestamps, p0, label='Estimated p_0')
        plt.xlabel('Seconds after midnight')
        plt.ylabel('BB0')
        plt.legend()
    
    def plot_p0(self):
        '''plots'''
        orderbooks = self.orderbooks
        p0 = np.array([orderbook.find_p0() for orderbook in orderbooks]) 
        plt.plot(self.timestamps, p0, label='data BBO, all data')
        plt.xlabel('timestamp')
        plt.ylabel('BBO')
        plt.legend()
        
    def plot_p0_5(self):
        '''plots actual p0 for data within 5% of BBO'''
        orderbooks = self.orderbooks
        p0 = np.array([orderbook.find_p0_5() for orderbook in orderbooks]) 
                
        #p0 = np.array([orderbook.find_p0() for orderbook in orderbooks]) 

        plt.plot(self.timestamps, p0, label='data BBO, data within 5% of BBO')
        plt.xlabel('timestamp')
        plt.ylabel('BBO')
        plt.legend()

In [None]:
aapl = ticker(AAPL)

In [None]:
aapl.plot_p0_5()
#aapl.plot_p0()
aapl.plot_p0_estimates()

In [None]:
aapl = ticker(AAPL)
tho = ticker(THO)
spy = ticker(SPY)
gbx = ticker(GBX)
c = ticker(C)

In [None]:
aapl.plotβ()
plt.title("AAPL, β estimates")
plt.show()

tho.plotβ()
plt.title("THO, β estimates")
plt.show()

spy.plotβ()
plt.title("SPY, β estimates")
plt.show()

c.plotβ()
plt.title("C, β estimates")
plt.show()

gbx.plotβ()
plt.title("GBX, β estimates")
plt.show()

### Exercise 3

In [None]:
aapl.plot_p0_estimates()
aapl.plot_p0()
plt.title("AAPL estimated vs. data BBO")
plt.show()

spy.plot_p0_estimates()
spy.plot_p0()
plt.title("SPY estimated vs. data BBO")
plt.show()

c.plot_p0_estimates()
c.plot_p0()
plt.title("C estimated vs. data BBO")
plt.show()

gbx.plot_p0_estimates()
gbx.plot_p0()
plt.title("GBX estimated vs. data BBO")
plt.show()

tho.plot_p0_estimates()
tho.plot_p0()
plt.title("THO estimated vs. data BBO")
plt.show()