# Optimization
## Homework 3

In [None]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
import statsmodels.api as sm
from sklearn import linear_model

### Problem 1

In [None]:
# objective function will be minimized
def neg_machines(resources):    
    #resources[0] = labor
    #resources[1] = capital
    return (-0.05*(resources[0]**(2/3))*(resources[1]**(1/3)))

In [None]:
# inequality constraints must be >= 0
def confun(resources):
    return (-12*resources[0]-15*resources[1]+100000)

In [None]:
constr1 = {'type':'ineq', 'fun': confun}
constraints = [constr1]

In [None]:
opt_resources = minimize(neg_machines,[1000,1000],constraints=constraints) 

In [None]:
#Display solution
opt_resources.x

In [None]:
#Display objective
-opt_resources.fun

### Problem 2

In [None]:
#read csv
df = pd.read_csv(r'.\homework4stocks.csv')

In [None]:
threshold_return = 0.01
nstocks = len(df.columns)-1
meanvec = df.mean(axis = 0)
Sigma = df.cov()

w = np.ones(nstocks)/3

In [None]:
# objective function will be minimized
def obj_fun(x):
    return np.sum([x[i]*x[j]*Sigma[i,j] for i in range(len(x)) for j in range(len(x))])
    

In [None]:
# inequality constraints must be >= 0
def mean_con_fun(x):
    return np.sum(np.array(x) * meanvec) - threshold_return

In [None]:
# equality constraints must be =0
def all_invest_con(x):
    return np.sum(x)-1

In [None]:
con1 = {'type':'eq', 'fun': all_invest_con}
con2 = {'type':'ineq', 'fun': mean_con_fun}
cons = [con1,con2]
bds = [(0,1)]*nstocks # all weights must be between 0-1

In [None]:
opt_port = minimize(obj_fun,w,constraints=cons,bounds=bds) 

In [None]:
opt_port.x

In [None]:
#
opt_port.fun

### Problem 3


For each case, we run the ordinary linear regression and then we choose the one with the smallest sum of squared errors.

In [None]:
df = pd.read_csv(r'.\variable_selection.csv')
SSE = np.zeros(6)

In [None]:
X = df[['x1']]
y = df['y']
X = sm.add_constant(X)
model = sm.OLS(y,X).fit()
predictions = model.predict(X)
SSE[0] = sum(np.square(predictions - y)) 

In [None]:
X = df[['x2']]
y = df['y']
X = sm.add_constant(X)
model = sm.OLS(y,X).fit()
predictions = model.predict(X)
SSE[1] = sum(np.square(predictions - y)) 

In [None]:
X = df[['x3']]
y = df['y']
X = sm.add_constant(X)
model = sm.OLS(y,X).fit()
predictions = model.predict(X)
SSE[2] = sum(np.square(predictions - y)) 

In [None]:
X = df[['x1','x2']]
y = df['y']
X = sm.add_constant(X)
model = sm.OLS(y,X).fit()
predictions = model.predict(X)
SSE[3] = sum(np.square(predictions - y)) 

In [None]:
X = df[['x1','x3']]
y = df['y']
X = sm.add_constant(X)
model = sm.OLS(y,X).fit()
predictions = model.predict(X)
SSE[4] = sum(np.square(predictions - y)) 

In [None]:
X = df[['x2','x3']]
y = df['y']
X = sm.add_constant(X)
model = sm.OLS(y,X).fit()
predictions = model.predict(X)
SSE[5] = sum(np.square(predictions - y)) 

In [None]:
SSE

Since the 4th pair has the smallest sum of squared error, we chose it. The relative coefficients are 2.9992, 3.9692 and 0, which is super close to the true values $\beta_1 = 3, \beta_2 = 4, \beta_3 = 0$.

### Problem 4

Let the $i^{th}$ team's rating be $x_i$. the actual average of the ratings being 85 is equivalent to 
\begin{align}
\dfrac{\sum_{i=1}^{32}x_i}{32} = 85.
\end{align}
Rearrange it to have 
\begin{align}
x_{32}=85\times 32 - \sum_{i=1}^{32}x_i.
\end{align}
That is to say, instead of having 32 team ratings, we only need the first 31, and the last one can be calculated. Besides the ratings, the home team advantage is also a variable. As a result, there are 32 variables in total. The initial value to use for optimization is that all the ratings are the same, namely 85, and there is no home team advantage.

In [None]:
df = pd.read_csv(r'.\nflratings.csv',header=None)
df.columns = ['week','home','away','hscore','vscore']

In [None]:
def SSE(xopt):
    #The last element of x is the home team advantage
    home_advantage = xopt[31]
    #Because the avg. rating is 85, the rating of the last team can be calc. from
    #other 31 teams
    x = np.concatenate((np.transpose(xopt[0:31]),np.array([85*32-sum(xopt[0:31])])))
    N = len(df.week)
    
    SSE = 0
    for i in range(0,N):
        pred_spread = x[df.home[i]-1] - x[df.away[i]-1] + home_advantage
        real_spread = df.hscore[i] - df.vscore[i]
        SSE = SSE + (pred_spread - real_spread)**2
    
    return SSE


In [None]:
ini = np.concatenate((85*np.ones(31),np.zeros(1)))
opt = minimize(SSE,ini) 
print(opt.x)
print(opt.fun)

In [None]:
rank = np.concatenate((np.transpose(opt.x[0:31]),np.array([85*32-sum(opt.x[0:31])])))
home_advantage = opt.x[31]

N = len(df)
pred_spread = np.zeros(N)

for i in range(0,N):
    pred_spread[i] = rank[df.home[i]-1] - rank[df.away[i]-1] + home_advantage
    
real_spread = df.hscore-df.vscore

In [None]:
pred_result = (pred_spread >= 0)
real_result = (real_spread >= 0)

precision = sum(pred_result == real_result)/N
precision

In [None]:
print("The confusion matrix is:")
pd.crosstab(pred_result,real_result,rownames='p',colnames='r')

In [None]:
print("Correct predictions:")
72+109