In [1]:
import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import tensorflow as tf

In [2]:
train = pd.read_csv('admission_train.csv') # TODO: load the admission_train.csv file
test = pd.read_csv('admission_test.csv')  # TODO: load the admission_test.csv file

In [3]:
train.head()
train.shape

In [4]:
train.info()

In [5]:
train.isnull().sum()

In [6]:
train_tf = tf.convert_to_tensor(train) # TODO: convert the train dataframe to a tensor format

In [7]:
train_tf = tf.random.shuffle(train_tf)

In [8]:
VALIDATION_SIZE = 50  

train_data = train_tf[VALIDATION_SIZE:]         
validation_data = train_tf[:VALIDATION_SIZE]    

In [9]:
print("X_train shape:", train_data[1][:-1].shape)

In [10]:
train_data = np.array(train_data)
X_train = train_data[:, :-1]  # همه‌ی ویژگی‌ها
y_train = train_data[:, -1]   # فقط برچسب (label)

validation_data = np.array(validation_data)
X_val = validation_data[:, :-1]
y_val = validation_data[:, -1]

In [11]:
X_test = tf.convert_to_tensor(test) # TODO: convert the test dataframe to a tensor format

In [12]:
class Normalizer(tf.Module):
  def __init__(self):
    self.mean = None
    self.std = None

  def fit(self, x):
    self.mean = tf.math.reduce_mean(x,axis=0) # TODO: calculate the mean of the input using tf.math.reduce_mean
    self.std = tf.math.reduce_std(x,axis=0) # TODO: calculate the standard deviation of the input using tf.math.reduce_std

  def transform(self, x):
    return (x-self.mean)/self.std # TODO: normalize the input by subtracting the mean and dividing by the standard deviation

  def fit_transform(self, x):
    self.fit(x)# first call the fit method to calculate the mean and standard deviation
    return self.transform(x) # TODO: then call the transform method to normalize the input and return the result

  def inverse_transform(self, x):
    return (x*self.std)+self.mean # TODO: denormalize the input by multiplying by the standard deviation and adding the mean

In [13]:
print("X_train shape:", X_train.shape)
print("X_val shape:", X_val.shape)
print("X_test shape:", X_test.shape)

In [14]:
normalizer = Normalizer()

X_train_norm = normalizer.fit_transform(X_train) # TODO: fit the normalizer on the training data and then transform it to get the normalized version
X_val_norm = normalizer.transform(X_val)
X_test_norm = normalizer.transform(X_test)

In [15]:
class LinearRegression(tf.Module):

  def __init__(self):
    self.built = False

  @tf.function
  def __call__(self, x):
    if not self.built:
      input_dim = tf.shape(x)[1]  # به صورت داینامیک تعداد ویژگی‌ها را بگیر
      rand_w = tf.random.uniform(shape=[input_dim, 1], dtype=tf.float32)
      rand_b = tf.random.uniform(shape=[], dtype=tf.float32)
      self.w = tf.Variable(rand_w)
      self.b = tf.Variable(rand_b)
      self.built = True

    x = tf.cast(x, tf.float32)  # اطمینان از تطابق نوع داده
    y = tf.add(tf.matmul(x, self.w), self.b)
    output = tf.squeeze(y, axis=1)
    return output

In [16]:
regressor = LinearRegression()

In [17]:
regressor(X_train_norm[:1])

In [18]:
def mse_loss(y_pred, y_true):
    return tf.reduce_mean(tf.square(y_pred - y_true))

In [19]:
BATCH_SIZE = 10 # TODO: set the batch size to your preference

train_dataset = tf.data.Dataset.from_tensor_slices((X_train_norm, y_train))
train_dataset = train_dataset.shuffle(buffer_size=X_train_norm.shape[0]).batch(BATCH_SIZE)

val_dataset = tf.data.Dataset.from_tensor_slices((X_val_norm, y_val))
val_dataset = val_dataset.shuffle(buffer_size=X_val_norm.shape[0]).batch(BATCH_SIZE)

In [20]:
# set hyperparameters
EPOCHS = 10 # TODO: set the number of epochs to your preference
LR = 0.001 # TODO: set the learning rate to your preference
train_losses, val_losses = [], []

# create the model
regressor = LinearRegression()

# train the model
for epoch in range(EPOCHS):
  batch_losses_train, batch_losses_val = [], []

  # iterate through the training data
  for X_batch, y_batch in train_dataset:

    with tf.GradientTape() as tape:
        y_pred_batch = regressor(X_batch) # TODO: pass current batch to the regressor to get the predictions
        batch_loss = mse_loss(y_batch,y_pred_batch) # TODO: calculate the loss using the mse_loss function

    # get the gradients of the weights with respect to the loss
    grads = tape.gradient(batch_loss, regressor.variables)

    # update the weights of the model
    for g, v in zip(grads, regressor.variables):
      v.assign_sub(LR*g) # TODO: update the variable by subtracting the product of the gradient and the learning rate using v.assign_sub
    
    # keep track of batch-level training performance 
    batch_losses_train.append(batch_loss)

  # iterate through the validation data
  for X_batch, y_batch in val_dataset:

    y_pred_batch = regressor(X_batch) # TODO: pass current batch to the regressor to get the predictions
    batch_loss = mse_loss(y_batch,y_pred_batch) # TODO: calculate the loss using the mse_loss function

    # keep track of batch-level validation performance 
    batch_losses_val.append(batch_loss)

  # keep track of epoch-level model performance
  train_loss = tf.reduce_mean(batch_losses_train)
  val_loss = tf.reduce_mean(batch_losses_val)
  train_losses.append(train_loss)
  val_losses.append(val_loss)
  if epoch % 10 == 0:
    print(f'Mean squared error for epoch {epoch}: {train_loss.numpy():0.3f}')

# output final losses
print(f"\nFinal train loss: {train_loss:0.3f}")
print(f"Final validation loss: {val_loss:0.3f}")

In [21]:
def mse_loss(y_pred, y_true):
    y_pred = tf.cast(y_pred, tf.float32)
    y_true = tf.cast(y_true, tf.float32)
    return tf.reduce_mean(tf.square(y_pred - y_true))

In [22]:
BATCH_SIZE = 10 # TODO: set the batch size to your preference

train_dataset = tf.data.Dataset.from_tensor_slices((X_train_norm, y_train))
train_dataset = train_dataset.shuffle(buffer_size=X_train_norm.shape[0]).batch(BATCH_SIZE)

val_dataset = tf.data.Dataset.from_tensor_slices((X_val_norm, y_val))
val_dataset = val_dataset.shuffle(buffer_size=X_val_norm.shape[0]).batch(BATCH_SIZE)

In [23]:
# set hyperparameters
EPOCHS = 10 # TODO: set the number of epochs to your preference
LR = 0.001 # TODO: set the learning rate to your preference
train_losses, val_losses = [], []

# create the model
regressor = LinearRegression()

# train the model
for epoch in range(EPOCHS):
  batch_losses_train, batch_losses_val = [], []

  # iterate through the training data
  for X_batch, y_batch in train_dataset:

    with tf.GradientTape() as tape:
        y_pred_batch = regressor(X_batch) # TODO: pass current batch to the regressor to get the predictions
        batch_loss = mse_loss(y_batch,y_pred_batch) # TODO: calculate the loss using the mse_loss function

    # get the gradients of the weights with respect to the loss
    grads = tape.gradient(batch_loss, regressor.variables)

    # update the weights of the model
    for g, v in zip(grads, regressor.variables):
      v.assign_sub(LR*g) # TODO: update the variable by subtracting the product of the gradient and the learning rate using v.assign_sub
    
    # keep track of batch-level training performance 
    batch_losses_train.append(batch_loss)

  # iterate through the validation data
  for X_batch, y_batch in val_dataset:

    y_pred_batch = regressor(X_batch) # TODO: pass current batch to the regressor to get the predictions
    batch_loss = mse_loss(y_batch,y_pred_batch) # TODO: calculate the loss using the mse_loss function

    # keep track of batch-level validation performance 
    batch_losses_val.append(batch_loss)

  # keep track of epoch-level model performance
  train_loss = tf.reduce_mean(batch_losses_train)
  val_loss = tf.reduce_mean(batch_losses_val)
  train_losses.append(train_loss)
  val_losses.append(val_loss)
  if epoch % 10 == 0:
    print(f'Mean squared error for epoch {epoch}: {train_loss.numpy():0.3f}')

# output final losses
print(f"\nFinal train loss: {train_loss:0.3f}")
print(f"Final validation loss: {val_loss:0.3f}")

In [24]:
matplotlib.rcParams['figure.figsize'] = [9, 6]

plt.plot(range(EPOCHS), train_losses, label = "Training loss")
plt.plot(range(EPOCHS), val_losses, label = "Validation loss")
plt.xlabel("Epoch")
plt.ylabel("Mean squared error loss")
plt.legend()
plt.title("MSE loss vs training iterations");

In [25]:
predictions = regressor(X_test_norm) # TODO: make predictions on the X_test_norm data using the regressor (a numpy array)
submission = pd.DataFrame(predictions, columns=["Admission"])
submission

In [26]:
predictions = regressor(X_test_norm).numpy()# TODO: make predictions on the X_test_norm data using the regressor (a numpy array)
submission = pd.DataFrame(predictions, columns=["Admission"])
submission

In [27]:
BATCH_SIZE = 5 # TODO: set the batch size to your preference

train_dataset = tf.data.Dataset.from_tensor_slices((X_train_norm, y_train))
train_dataset = train_dataset.shuffle(buffer_size=X_train_norm.shape[0]).batch(BATCH_SIZE)

val_dataset = tf.data.Dataset.from_tensor_slices((X_val_norm, y_val))
val_dataset = val_dataset.shuffle(buffer_size=X_val_norm.shape[0]).batch(BATCH_SIZE)

In [28]:
# set hyperparameters
EPOCHS = 120 # TODO: set the number of epochs to your preference
LR = 0.001 # TODO: set the learning rate to your preference
train_losses, val_losses = [], []

# create the model
regressor = LinearRegression()

# train the model
for epoch in range(EPOCHS):
  batch_losses_train, batch_losses_val = [], []

  # iterate through the training data
  for X_batch, y_batch in train_dataset:

    with tf.GradientTape() as tape:
        y_pred_batch = regressor(X_batch) # TODO: pass current batch to the regressor to get the predictions
        batch_loss = mse_loss(y_batch,y_pred_batch) # TODO: calculate the loss using the mse_loss function

    # get the gradients of the weights with respect to the loss
    grads = tape.gradient(batch_loss, regressor.variables)

    # update the weights of the model
    for g, v in zip(grads, regressor.variables):
      v.assign_sub(LR*g) # TODO: update the variable by subtracting the product of the gradient and the learning rate using v.assign_sub
    
    # keep track of batch-level training performance 
    batch_losses_train.append(batch_loss)

  # iterate through the validation data
  for X_batch, y_batch in val_dataset:

    y_pred_batch = regressor(X_batch) # TODO: pass current batch to the regressor to get the predictions
    batch_loss = mse_loss(y_batch,y_pred_batch) # TODO: calculate the loss using the mse_loss function

    # keep track of batch-level validation performance 
    batch_losses_val.append(batch_loss)

  # keep track of epoch-level model performance
  train_loss = tf.reduce_mean(batch_losses_train)
  val_loss = tf.reduce_mean(batch_losses_val)
  train_losses.append(train_loss)
  val_losses.append(val_loss)
  if epoch % 10 == 0:
    print(f'Mean squared error for epoch {epoch}: {train_loss.numpy():0.3f}')

# output final losses
print(f"\nFinal train loss: {train_loss:0.3f}")
print(f"Final validation loss: {val_loss:0.3f}")

In [29]:
matplotlib.rcParams['figure.figsize'] = [9, 6]

plt.plot(range(EPOCHS), train_losses, label = "Training loss")
plt.plot(range(EPOCHS), val_losses, label = "Validation loss")
plt.xlabel("Epoch")
plt.ylabel("Mean squared error loss")
plt.legend()
plt.title("MSE loss vs training iterations");

In [30]:
predictions = regressor(X_test_norm).numpy()# TODO: make predictions on the X_test_norm data using the regressor (a numpy array)
submission = pd.DataFrame(predictions, columns=["Admission"])
submission

In [31]:
shape_b = regressor.variables[0].numpy().shape
shape_w = regressor.variables[1].numpy().shape
print("Bias shape:", shape_b)
print("Weights shape:", shape_w)

shape_mean = normalizer.mean.numpy().shape
shape_std = normalizer.std.numpy().shape
print("Mean shape:", shape_mean)
print("Standard deviation shape:", shape_std)