In [56]:
# This piece of code enables display of multiple output from one cell.
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

## Linear Regression

In [57]:
import numpy as np
import pandas as pd

In [58]:
indata = pd.read_csv('train.xlsx', index_col=None)
indata.shape
indata.head()

(19, 2)

Unnamed: 0,hardwood_conc,tensile_strength
0,1.0,6.3
1,1.5,11.1
2,2.0,20.0
3,3.0,24.0
4,4.0,26.1


In [59]:
hardwood_conc_centered = (indata.iloc[:,0] - np.mean(indata.iloc[:,0])) 
X_mat = pd.concat((pd.Series(np.ones(indata.shape[0])),hardwood_conc_centered, hardwood_conc_centered**2), axis= 1) 
X_mat.head()
y_vec = indata.iloc[:,1]

Unnamed: 0,0,hardwood_conc,hardwood_conc.1
0,1.0,-6.263158,39.227147
1,1.0,-5.763158,33.213989
2,1.0,-5.263158,27.700831
3,1.0,-4.263158,18.174515
4,1.0,-3.263158,10.648199


### Lets implement estimating OLS Parameters

In [60]:
def estimate_beta(X_mat, y_vec):
    beta= np.linalg.inv(X_mat.T.dot(X_mat)).dot(X_mat.T).dot(y_vec)
    return beta

In [61]:
help(estimate_beta) 

Help on function estimate_beta in module __main__:

estimate_beta(X_mat, y_vec)



In [62]:
estimate_beta(X_mat, y_vec)

array([45.29497313,  2.54634404, -0.63454917])

### Check if implementation is correct

In [63]:
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(X_mat, y_mat)
model.intercept_
model.coef_

45.29497313476155

array([ 0.        ,  2.54634404, -0.63454917])

## Logistic Regression

### Lets implement Logistic regression
1. Need a function for computing the sigmoid function
2. Create a function for calculating predicted values
3. Need a "cost function" or entropy computation
4. Need a optimization routine to update the beta parameters 
5. Need a "train" function 

In [64]:
def train(y_actual, x, learning_rate, beta, iterations):
    for i in range(iterations):
        y_pred = calc_pred_func(beta, x)
        beta = gradient_descent(y_pred, y_actual, x, learninig_rate, beta)
        cost=calc_error(y_pred, y_actual)
    return beta,y_pred

In [65]:
def calc_sigmoid(z):
    p=1/(1+np.exp(-z))
    return p
def calc_pred_func(beta,x):
    z=np.dot(beta,np.transpose(x))
    return calc_sigmoid(z)

In [66]:
def calc_entropy(y_pred, y_actual):
    entropy= (-y_actual*np.log(y_pred) - (1-y_actual)*np.log(1-y_pred)).sum()/len(y_actual)
    return entropy

In [67]:
def gradient_descent(y_pred,y_actual,x, learning_rate, beta):
    jacobian=(-(np.dot(np.transpose(x), y_actual-y_pred)))/len(y_actual)
    beta=beta-learning_rate*jacobian
    return beta

In [68]:
def train(y_actual, x, learning_rate, beta, iterations):
    for i in range(iterations):
        y_pred = calc_pred_func(beta, x)
        beta= gradient_descent(y_pred,y_actual,x, learning_rate, beta)
        entropy = calc_entropy(y_pred, y_actual)
    return beta, y_pred
            

In [69]:
conc = pd.DataFrame([0.10, 0.15,0.20, 0.30, 0.50,0.70,0.95])
x = pd.concat((pd.DataFrame(np.ones(conc.shape[0])),conc), axis=1)
x
y_actual = np.array([0.17, 0.264, 0.436, 0.615, 0.826, 0.926, 0.962])

Unnamed: 0,0,0.1
0,1.0,0.1
1,1.0,0.15
2,1.0,0.2
3,1.0,0.3
4,1.0,0.5
5,1.0,0.7
6,1.0,0.95


In [70]:
beta_init = np.array([0,0])
beta , y_pred = train(y_actual, x, 0.1,beta_init , 1000)
beta
y_pred

array([-0.92705218,  3.66702993])

array([0.36356976, 0.40693654, 0.45180394, 0.54317577, 0.71221622,
       0.83742742, 0.92794263])

### Check if implementation is correct

In [71]:
import numpy as np
import statsmodels.api as sm
model = sm.Logit(y_actual, x)
result = model.fit(method='newton')
result.params
result.predict(x)


Optimization terminated successfully.
         Current function value: 0.385251
         Iterations 7


0   -1.762506
0    6.367254
dtype: float64

0    0.244941
1    0.308441
2    0.380116
3    0.536851
4    0.805514
5    0.936706
6    0.986432
dtype: float64