# Implement linear regression on tensorflow with gradient tape

In [1]:
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

In [None]:
DATASETS = "datasets/"

# Read dataset

In [None]:
with open(DATASETS+os.sep+"winequality-red.csv", "r") as file:
    raw_data = file.read()

# Remove columns

In [None]:
raw_dataset = raw_data.split("\n")[1:]
columns = raw_data.split("\n")[0].split(",")

N_ROWS = len(raw_dataset)
N_FEATURES = len(columns)

In [None]:
columns

In [None]:
print("ROWS: ", N_ROWS)
print("Features: ", N_FEATURES)

In [None]:
dataset = np.zeros((N_ROWS, N_FEATURES))

i = 0
for row in raw_dataset:
    j = 0
    for feature in row.split(","):
        dataset[i][j] = float(feature)
        j+=1
    i+=1

In [None]:
dataset[0].shape

In [None]:
dataset[:, 7:]

# Split dataset into features and target

In [None]:
N_SEQ_FEATURES = 10

In [None]:
x_train = tf.constant(dataset[:, :N_SEQ_FEATURES], dtype="float32")
y_train = tf.constant(dataset[:, -1], dtype="float32")

# Split dataset into train and validation

into 70/30

- Shuffle dataset
- Select N for Train and N for Validation base on the proportio 70/20/10

In [None]:
shuffled_dataset = np.random.shuffle(dataset)
N_train = int(dataset.shape[0]*0.7)+1
N_val = int(dataset.shape[0]*0.2)+1
N_test = dataset.shape[0]-N_train-N_val

In [None]:
print("Train rows: ", N_train)
print("Validation rows: ", N_val)
print("Test rows: ", N_test)

# Split data 
Into 
- Train
- Val
- Test 

And X for features and y for the target

In [None]:
train_X = dataset[:N_train, :N_SEQ_FEATURES]
train_y = dataset[:N_train, -1]

val_X = dataset[:N_val, :N_SEQ_FEATURES]
val_y = dataset[:N_val, -1]

test_X = dataset[:N_test, :N_SEQ_FEATURES]
test_y = dataset[:N_test, -1]

# First model Baseline AVG 

In [None]:
baseline_prediction = train_y.mean()

print("Pred for baseline: ", baseline_prediction)

# Metric

In [None]:
((val_y - baseline_prediction)**2).sum()/val_y.shape[0]

# Scale data

In [None]:
np.array([4,4,4,4])/np.array([2, 2, 2, 2])

In [None]:
train_X.max(axis=0).shape

In [None]:
train_X.max(axis=0)

In [None]:
train_X.min(axis=0)

In [None]:
train_max = train_X.max(axis=0)
train_min = train_X.min(axis=0)

Q_factor = 100

train_X -= train_min
train_X /= (train_max-train_min) * Q_factor

val_X -= train_min
val_X /= (train_max-train_min) * Q_factor

test_X -= train_min
test_X /= (train_max-train_min) * Q_factor

# First model Regresion

Y = W.X + b

In [None]:
W_N_DIMS = train_X.shape[1]
B_N_DIMS = train_X.shape[0]

In [None]:
W = tf.Variable(np.random.ranf((1, W_N_DIMS)), dtype='float32')
b = tf.Variable(np.random.ranf((B_N_DIMS, 1)), dtype='float32')

In [None]:
W

In [None]:
b

# Inputs

In [None]:
train_x_tensor = tf.constant(train_X, dtype='float32')
train_y_tensor = tf.constant(train_y, dtype='float32')

val_x_tensor = tf.constant(val_X, dtype='float32')
val_y_tensor = tf.constant(val_y, dtype='float32')

test_x_tensor = tf.constant(test_X, dtype='float32')
test_y_tensor = tf.constant(test_y, dtype='float32')

W (1, DIM) * X (N, DIM)

# Try random values first

In [None]:
y = tf.matmul(W, tf.transpose(train_x_tensor)) + b

In [None]:
tf.math.reduce_mean(tf.pow(tf.subtract(y, train_y_tensor), 2))

# Try with gradien tape to fix the weights

Problems I ran into

- I got none because I have declared W and b as constants!!! derivative is 0!
- Im getting nan an inf!!?
    - Standarizing values too small result in some of them being too big I had to adjust that (max-min) by Q=100
- Mac tensorflow cant work with float16!

In [None]:
with tf.GradientTape() as tape:
    y = tf.matmul(train_x_tensor, tf.transpose(W)) + b
    loss = tf.math.reduce_mean(tf.pow(tf.subtract(y, train_y_tensor), 2))

gradient_loss_w_b = tape.gradient(loss, [W, b])

W [1, 11]
train_x_tensor[1120, 11]

W*train_x_tensor [1120, 1]

b [1120, 1]

# Training loop

In [None]:
train_x_tensor[0]

In [None]:
W[0]

In [None]:
b[0]

In [None]:
#init params
W = tf.Variable(np.random.ranf((1, W_N_DIMS)), dtype='float32')
b = tf.Variable(np.random.ranf((B_N_DIMS, 1)), dtype='float32')

#init epsilon
epsilon = tf.constant(0.01, dtype='float32')

for epoc in tqdm(range(5000)):
    # Feed-forward pass
    with tf.GradientTape() as tape:
        y = tf.matmul(train_x_tensor, tf.transpose(W)) + b
        loss = tf.reduce_mean(tf.square(train_y_tensor-y))

    if epoc%1000==0:
        print("TRAIN Loss: ", loss)
        print(f"First sample training set prediction {y[0]} - real value {train_y_tensor[0]}")

    #backward - pass
    w_grad, b_grad = tape.gradient(loss, [W, b])

    W.assign_sub(epsilon*w_grad)
    b.assign_sub(epsilon*b_grad)

# Batch approach is faster to converge

as it is able to adjust weights faster

In [None]:
#init params
BATCH_SIZE = 32
W = tf.Variable(np.random.ranf((1, W_N_DIMS)), dtype='float32')
b = tf.Variable(np.random.ranf((BATCH_SIZE, 1)), dtype='float32')
MAX_EPOCHS = 5000

#init epsilon
epsilon = tf.constant(0.01, dtype='float32')

for epoc in tqdm(range(MAX_EPOCHS)):
    
    # Feed-forward pass
    for batch in range(0, B_N_DIMS, BATCH_SIZE):
        with tf.GradientTape() as tape:
            y = tf.matmul(train_x_tensor[batch:batch+BATCH_SIZE], tf.transpose(W)) + b
            loss = tf.reduce_mean(tf.square(train_y_tensor[batch:batch+BATCH_SIZE]-y))
        
        #backward - pass
        w_grad, b_grad = tape.gradient(loss, [W, b])

        W.assign_sub(epsilon*w_grad)
        b.assign_sub(epsilon*b_grad)

    if epoc%1000==0:
        print(f"advance: {np.round(epoc/MAX_EPOCHS, 2)*100}%")
        val_loss = 0
        for batch in range(0, val_x_tensor.shape[0], BATCH_SIZE):
            y_val = tf.matmul(val_x_tensor[batch:batch+BATCH_SIZE], tf.transpose(W)) + b
            val_loss += tf.reduce_mean(tf.square(val_y_tensor[batch:batch+BATCH_SIZE]-y_val))
        print("TRAIN Loss: ", loss)
        print("VAL loss: ", val_loss/(int(val_x_tensor.shape[0]/BATCH_SIZE)+1))
        print(f"First sample training set prediction {y[0]} - real value {train_y_tensor[0]}")

# Batch with momemtum

# With RMS to avoid getting stuck in a local minima around [0.557] en val

In [None]:
#init params
BATCH_SIZE = 32
MAX_EPOCHS = 5000

W = tf.Variable(np.random.ranf((1, W_N_DIMS)), dtype='float32')
b = tf.Variable(np.random.ranf((BATCH_SIZE, 1)), dtype='float32')

#RMS
velocity_w = tf.Variable(np.zeros((1, W_N_DIMS)), dtype='float32')
velocity_b = tf.Variable(np.zeros((BATCH_SIZE, 1)), dtype='float32')
past_velocity_w = tf.Variable(np.zeros((1, W_N_DIMS)), dtype='float32')
past_velocity_b = tf.Variable(np.zeros((BATCH_SIZE, 1)), dtype='float32')
momentum = tf.constant(0.1, dtype='float32')

#init epsilon
epsilon = tf.constant(0.01, dtype='float32')

for epoc in tqdm(range(MAX_EPOCHS)):
    # Feed-forward pass
    for batch in range(0, B_N_DIMS, BATCH_SIZE):
        with tf.GradientTape() as tape:
            # print(f"Range: {batch} , {batch+BATCH_SIZE}")
            y = tf.matmul(train_x_tensor[batch:batch+BATCH_SIZE], tf.transpose(W)) + b
            loss = tf.reduce_mean(tf.square(train_y_tensor-y))
 
        #backward - pass
        w_grad, b_grad = tape.gradient(loss, [W, b])

        velocity_w.assign(past_velocity_w*momentum-epsilon*w_grad)
        velocity_b.assign(past_velocity_b*momentum-epsilon*b_grad)

        W.assign_add(velocity_w*momentum-epsilon*w_grad)
        b.assign_add(velocity_b*momentum-epsilon*b_grad)

        past_velocity_w.assign(velocity_w)
        past_velocity_b.assign(velocity_b)

    if epoc%1000==0:
        print(f"advance: {np.round(epoc/MAX_EPOCHS, 2)}%")
        val_loss = 0
        for batch in range(0, val_x_tensor.shape[0], BATCH_SIZE):
            y_val = tf.matmul(val_x_tensor[batch:batch+BATCH_SIZE], tf.transpose(W)) + b
            val_loss += tf.reduce_mean(tf.square(val_y_tensor[batch:batch+BATCH_SIZE]-y_val))
        print("TRAIN Loss: ", loss)
        print("VAL loss: ", val_loss/(int(val_x_tensor.shape[0]/BATCH_SIZE)+1))
        print(f"First sample training set prediction {y[0]} - real value {train_y_tensor[0]}")