In [139]:
import numpy as np
import os

In [140]:
PATH = './linearRegression_data'

In [141]:
TRAIN_LABEL_FILENAME = 'orientations_train.npy'
TRAIN_LABEL_FULL_PATH = os.path.join(PATH, TRAIN_LABEL_FILENAME)

y_train = np.load(TRAIN_LABEL_FULL_PATH)
y_train = y_train.reshape(10000, 1)

In [142]:
TRAIN_SET_FOLDER_NAME = '3dshapes_train'
TRAIN_SET_FULL_PATH = os.path.join(PATH, TRAIN_SET_FOLDER_NAME)

TRAIN_SIZE = len(os.listdir(TRAIN_SET_FULL_PATH))
X_train = np.zeros((TRAIN_SIZE, 4096))

In [143]:
import cv2

In [144]:
for i in range(TRAIN_SIZE):
    img = cv2.imread(os.path.join(TRAIN_SET_FULL_PATH, '{}.jpg'.format(i)))
    gray_scale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    flat_img = gray_scale.flatten()
    X_train[i] = flat_img / 255.

In [145]:
missing_X = np.isnan(X_train)
missing_y = np.isnan(y_train)

n_missing_X = np.sum(missing_X)
n_missing_y = np.sum(missing_y)

print(f"Number of missing values in X: {n_missing_X}")
print(f"Number of missing values in y: {n_missing_y}")

Number of missing values in X: 0
Number of missing values in y: 0


In [146]:
X_train = np.hstack((np.ones((X_train.shape[0], 1)), X_train))
print('X shape: ', X_train.shape)

X shape:  (10000, 4097)


In [147]:
class LinearRegression:

    def __init__(self, alpha=0.001, lambd=0.01, max_iter=10000, tol=1e-2) -> None:
        self.alpha = alpha #learning rate
        self.lambd = lambd #regularization
        self.max_iter = max_iter
        self.tol = tol
        self.w = None

    def fit(self, X, y):
        m, n = X.shape
        self.w = np.random.randn(n, 1)
        print('w shape: ', self.w.shape)
        
        prev_cost = np.inf

        for i in range(self.max_iter):
            y_pred = X.dot(self.w)

            SSE = np.sum((y_pred - y) ** 2) / 2
            L2 = self.lambd * np.sum(self.w ** 2) / 2

            cost = SSE + L2

            if abs(prev_cost - cost) < self.tol:
                break

            prev_cost = cost

            grad = X.T.dot(y_pred - y) / m + self.lambd * self.w / m
            self.w -= self.alpha * grad

        print(i)

    def predict(self, X):
        return X.dot(self.w)


In [148]:
clf = LinearRegression()
clf.fit(X_train, y_train)

w shape:  (4097, 1)
y pred shape:  (10000, 1)
y shape:  (10000, 1)
grad shape:  (4097, 1)

y pred shape:  (10000, 1)
y shape:  (10000, 1)
grad shape:  (4097, 1)

y pred shape:  (10000, 1)
y shape:  (10000, 1)
grad shape:  (4097, 1)

y pred shape:  (10000, 1)
y shape:  (10000, 1)
grad shape:  (4097, 1)

y pred shape:  (10000, 1)
y shape:  (10000, 1)
grad shape:  (4097, 1)

y pred shape:  (10000, 1)
y shape:  (10000, 1)
grad shape:  (4097, 1)

y pred shape:  (10000, 1)
y shape:  (10000, 1)
grad shape:  (4097, 1)

y pred shape:  (10000, 1)
y shape:  (10000, 1)
grad shape:  (4097, 1)

y pred shape:  (10000, 1)
y shape:  (10000, 1)
grad shape:  (4097, 1)

y pred shape:  (10000, 1)
y shape:  (10000, 1)
grad shape:  (4097, 1)

y pred shape:  (10000, 1)
y shape:  (10000, 1)
grad shape:  (4097, 1)

y pred shape:  (10000, 1)
y shape:  (10000, 1)
grad shape:  (4097, 1)

y pred shape:  (10000, 1)
y shape:  (10000, 1)
grad shape:  (4097, 1)

y pred shape:  (10000, 1)
y shape:  (10000, 1)
grad shape

In [149]:
TEST_SET_FOLDER_NAME = '3dshapes_test'
TEST_SET_FULL_PATH = os.path.join(PATH, TEST_SET_FOLDER_NAME)

TEST_SIZE = len(os.listdir(TEST_SET_FULL_PATH))
X_test = np.zeros((TEST_SIZE, 4096))

In [150]:
for i in range(TEST_SIZE):
    img = cv2.imread(os.path.join(TEST_SET_FULL_PATH, '{}.jpg'.format(i)))
    gray_scale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    flat_img = gray_scale.flatten()
    X_test[i] = flat_img / 255.

In [151]:
X_test = np.hstack((np.ones((X_test.shape[0], 1)), X_test))
print('X test shape: ', X_test.shape)

X test shape:  (1000, 4097)


In [152]:
TEST_LABEL_FILENAME = 'orientations_test.npy'
TEST_LABEL_FULL_PATH = os.path.join(PATH, TEST_LABEL_FILENAME)

y_test = np.load(TEST_LABEL_FULL_PATH)
y_test = y_test.reshape(1000, 1)

In [153]:
y_pred = clf.predict(X_test)

In [158]:
for i in range(len(y_test)):
    print(y_test[i], y_pred[i])

[25.71428571] [26.01243573]
[-12.85714286] [-10.32797814]
[-4.28571429] [-4.81531688]
[-17.14285714] [-16.35573596]
[8.57142857] [9.38552727]
[8.57142857] [8.12586379]
[4.28571429] [6.25697092]
[8.57142857] [7.84573282]
[0.] [2.01507621]
[0.] [0.29995385]
[21.42857143] [24.28607034]
[-12.85714286] [-13.71639294]
[-12.85714286] [-14.1707273]
[0.] [-0.98818062]
[-25.71428571] [-26.51346034]
[25.71428571] [26.03793906]
[21.42857143] [22.83799386]
[-8.57142857] [-11.6446721]
[25.71428571] [24.71179531]
[0.] [-0.06916861]
[-8.57142857] [-7.20275832]
[21.42857143] [22.45494221]
[-21.42857143] [-20.84916425]
[-12.85714286] [-11.21860826]
[4.28571429] [2.39297976]
[-8.57142857] [-7.81328745]
[8.57142857] [6.90202149]
[4.28571429] [2.68544145]
[21.42857143] [23.56657853]
[8.57142857] [5.38071501]
[30.] [28.11499122]
[21.42857143] [23.51828155]
[17.14285714] [17.03715496]
[4.28571429] [3.8335199]
[12.85714286] [12.54593642]
[-12.85714286] [-13.27896363]
[21.42857143] [22.74038589]
[-8.57142857] 

In [155]:
rmse = np.sqrt(np.mean((y_pred - y_test) ** 2))

In [156]:
print(rmse)

1.8494504082988015


In [157]:
print(clf.w)

[[-0.25905842]
 [-1.32237951]
 [ 0.75453745]
 ...
 [-0.89088405]
 [-0.01711741]
 [-1.02495324]]
