In [5]:
from sklearn.metrics import confusion_matrix
import numpy as np
import pandas as pd

we input the features array in this case so we know that index 0 means "A" and index 1 means "B"

In [33]:
y = np.array(["A", "A", "A", "A", "A", "B"])
yhat = np.array(["B", "A", "B", "A", "A", "B"])

In [34]:
features = ["A", "B"]

In [35]:
cm = confusion_matrix(y,yhat,features)
cm

array([[3, 2],
       [0, 1]])

In [36]:
cm_df = pd.DataFrame(cm, columns = features, index = features)
cm_df

Unnamed: 0,A,B
A,3,2
B,0,1


In [37]:
# if we sum across the rows, we get 5 A and 1 B
# which is our true labels
cm_df.sum(axis = 1)

A    5
B    1
dtype: int64

In [38]:
# if we sum down the columns, we see 3 As and 3 Bs
# which is our predictions
cm_df.sum(axis = 0)

A    3
B    3
dtype: int64

In [39]:
# Going through the value of each cell, starting at the top and moving left
# 3 items predicted A and are A
# 2 items predicted B that are A
# 0 item predicted A that are B
# 1 item predicted B that is B
cm_df

Unnamed: 0,A,B
A,3,2
B,0,1


if we flip it the order we input the items

In [40]:
# Going through the value of each cell, starting at the top and moving left
# 3 items predicted A and are A
# 0 item predicted A that are B
# 2 items predicted B that are A
# 1 item predicted B that is B
cm2 = confusion_matrix(yhat,y,features)
cm_df2 = pd.DataFrame(cm2, columns = features, index = features)
cm_df2

Unnamed: 0,A,B
A,3,0
B,2,1


In [41]:
# this is also just the tranpose of our initial confusion matrix df
cm_df.T

Unnamed: 0,A,B
A,3,0
B,2,1


if we flip the ordering of the feature list this will change the ordering of the rows and columns in our confusion matrix

In [42]:
features = ["B", "A"]
cm_flip = confusion_matrix(y,yhat,features)
cm_flip_df = pd.DataFrame(cm_flip, columns = features, index = features)
cm_flip_df

Unnamed: 0,B,A
B,1,0
A,2,3


In [49]:
cm_df

Unnamed: 0,A,B
A,3,2
B,0,1


### Regression Class

In [73]:
from sklearn.linear_model import LinearRegression

In [79]:
x = np.array([1,5,2,2,5,2,1])
y = np.array([4,1,3,5,6,1,2])

In [82]:
lr = LinearRegression()
lr.fit(x.reshape(-1, 1), y.reshape(-1, 1))
print(lr.intercept_, lr.coef_)

[2.79032258] [[0.13709677]]


In [64]:
# b1 = SSxy/SSxx
# b0 = y_mu - b1*x_mu
# SSxy = sum((x - x_mu)(y-y_mu))
# SSxx = sum((x-x_mu)**2)

In [66]:
def get_ssxx(x):
    return np.sum((x - np.mean(x))**2)

def get_ssxy(x,y):
    return np.sum((x-np.mean(x))*(y-np.mean(y)))

def get_beta_1(ssxy, ssxx):
    return ssxy/ssxx

def get_beta_0(y,b1,x):
    return np.mean(y) - b1*np.mean(x)

In [69]:
ssxx = get_ssxx(x)
ssxy = get_ssxy(x,y)
b1 = get_beta_1(ssxy, ssxx)
b0 = get_beta_0(y,b1,x)

print(b0, b1)

2.790322580645161 0.1370967741935484


In [96]:
class SimpleRegression:
    
    @staticmethod
    def get_ssxx(x):
        return np.sum((x - np.mean(x))**2)

    @staticmethod
    def get_ssxy(x,y):
        return np.sum((x-np.mean(x))*(y-np.mean(y)))

    @staticmethod
    def get_beta_1(ssxy, ssxx):
        return ssxy/ssxx

    @staticmethod
    def get_beta_0(y,b1,x):
        return np.mean(y) - b1*np.mean(x)
    
    def __init__(self):
        pass
    
    def fit(self,x,y):
        self.ssxx = SimpleRegression.get_ssxx(x)
        self.ssxy = SimpleRegression.get_ssxy(x,y)
        self.b1 = SimpleRegression.get_beta_1(self.ssxy, self.ssxx)
        self.b0 = SimpleRegression.get_beta_0(y,self.b1,x)
        
    def predict(self,x):
        assert hasattr(self, "b0"), "need to fit the model"
        return x*self.b1 + self.b0

In [97]:
my_lr = SimpleRegression()
my_lr.fit(x,y)
my_lr.predict(5)

3.475806451612903

In [101]:
my_lr.predict(np.array([5,5,5,5]))

array([3.47580645, 3.47580645, 3.47580645, 3.47580645])

In [103]:
my_lr.predict(np.array([3,1,3,5]))

array([3.2016129 , 2.92741935, 3.2016129 , 3.47580645])