In [None]:
%matplotlib inline
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import copy


class GradientDescentModel:

    w = None
    b = None

    def __init__(self, data, learning_rate, nb_features, nb_epochs, ping, lmbd):
        self.nb_features = nb_features
        self.data = copy.deepcopy(training_data)
        self.data['x'] = GradientDescentModel.create_feature_matrix(data['x'], self.nb_features)
        self.learning_rate = learning_rate
        self.adam = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)
        self.nb_epochs = nb_epochs
        self.ping = ping
        self.avg_loss = 0
        self.lmbd = lmbd


    @staticmethod
    def create_feature_matrix(x, nb_features):
        tmp_features = []
        for deg in range(1, nb_features+1):
            tmp_features.append(np.power(x, deg))

        return np.column_stack(tmp_features)


    def test_model(self, xs=np.linspace(-2, 2, 100, dtype='float32')):
      xs_features = GradientDescentModel.create_feature_matrix(xs, self.nb_features);
      return xs, self.pred(xs_features)


    def train_model(self):
      self.w = tf.Variable(tf.zeros(self.nb_features))
      self.b = tf.Variable(0.0)

      total_loss = 0

      samples_num = self.data['x'].shape[0]

      for epoch in range(self.nb_epochs):

        epoch_loss = 0

        for sample in range(samples_num):
          x = self.data['x'][sample].reshape((1, self.nb_features))
          y = self.data['y'][sample]

          curr_loss = self.train_step(x, y)
          epoch_loss += curr_loss

        epoch_loss /= samples_num
        total_loss += epoch_loss

        if (epoch + 1) % self.ping == 0:
            print(f'Model with polynomial degree {self.nb_features} and lambda {self.lmbd} | Epoch: {epoch+1}/{self.nb_epochs}| Average loss: {epoch_loss:.7f}')

      self.avg_loss = total_loss/nb_epochs

      return


    def train_step(self, x, y):
        w_grad, b_grad, loss = self.calc_grad(x, y)


        self.adam.apply_gradients(zip([w_grad, b_grad], [self.w, self.b]))

        return loss


    def calc_grad(self, x, y):
        with tf.GradientTape() as tape:
            loss_val = self.loss(x, y)

        w_grad, b_grad = tape.gradient(loss_val, [self.w, self.b])

        return w_grad, b_grad, loss_val


    def pred(self, x):
        w_col = tf.reshape(self.w, (self.nb_features, 1))
        hyp = tf.add(tf.matmul(x, w_col), self.b)

        return hyp

    def loss(self, x, y):
        prediction = self.pred(x)
        y_col = tf.reshape(y, (-1, 1))
        mse = tf.reduce_mean(tf.square(prediction - y_col))
        reg = self.lmbd * tf.reduce_mean(tf.square(self.w))

        return tf.add(mse, reg)


# --------------------------------------------------------- #
tadija_path = '/content/drive/MyDrive/ML2024_D1/bottle.csv'
mina_path = '/content/drive/MyDrive/6003 ML/data/bottle.csv'
# --------------------------------------------------------- #


training_data = dict()
nb_features = 4
load_rows = 300
samples_num = 200

training_data['x'], training_data['y'] = np.genfromtxt(tadija_path, dtype='float32', delimiter=',',
                                                       skip_header=1, usecols=(5, 6), unpack=True, max_rows=load_rows)

mask = np.isnan(training_data['x']) | np.isnan(training_data['y'])

filtered_x = training_data['x'][~mask]
filtered_y = training_data['y'][~mask]

training_data['x'] = filtered_x[:samples_num]
training_data['y'] = filtered_y[:samples_num]

# shuffling data
indices = np.random.permutation(samples_num)
training_data['x'] = training_data['x'][indices]
training_data['y'] = training_data['y'][indices]

# normalization
training_data['x'] = (training_data['x'] - np.mean(training_data['x'])) / np.std(training_data['x'])
training_data['y'] = (training_data['y'] - np.mean(training_data['y'])) / np.std(training_data['y'])


nb_epochs = 100
losses = []
lmbd = [0, 0.001, 0.01, 0.1, 1, 10, 100]

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.scatter(training_data['x'], training_data['y'], color='black', label='Data')
plt.xlabel("Salinity of Water")
plt.ylabel("Temperature of Water (°C)")


for l in lmbd:

  model = GradientDescentModel(data=training_data, learning_rate=0.001, nb_features = nb_features, nb_epochs=nb_epochs, ping=20, lmbd = l)
  model.train_model()
  loss_val = model.loss(model.data['x'], model.data['y']).numpy()
  losses.append(loss_val)
  xs, ys = model.test_model(xs=np.linspace(-2, 2, 100, dtype='float32'))
  color = (np.random.rand(), np.random.rand(),np.random.rand())
  plt.plot(xs, ys.numpy().tolist(), color=color, label = str(model.lmbd) + ' lambda')

  print('-----')

plt.legend()

plt.subplot(1, 2, 2)
plt.bar(np.arange(len(lmbd)), losses, tick_label=[str(l) for l in lmbd])
plt.xlabel('lambda parameter')
plt.ylabel('Loss Function')
plt.title('Dependency of Final Loss Function on Entire Set based on parameter lambda')
plt.show()



Za male lambda vrednosti, visoki koeficijenti su 'kaznjeni' relativno slabo, i samim tim funkcija troska je konzistenta ukoliko je lambda vrednosti : 0, 0.001, 0.01, 0.1, 1. Iz ovoga vidimo da L2 regularizacija ne utice preterano na ponasanje modela.
Kada su vrednosti lambda 10 i 100, medjutim, dolazi do znacajnog rasta u vrednosti funkcije troska. Kaada je stroza 'kazna', model ima znacaja da drzi koeficijente jako malim kako bi se trosak minimizovao. Zbog ovoga fitovanje modela ispasta, jer je akcenat na odrzavanju malih koeficijenata. Ovo moze dovesti do underfittinga, samim tim uspeh modela znacajno opada zbog preterane regularizacije.

