<a href="https://colab.research.google.com/github/kahlus2001/kahlus2001/blob/main/3EBX_assignment_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#3EBX0 Machine Learnign for Science 2022
Assignment 1. Group: Tutti Frutti

In [None]:
# import libraries
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# import data
data_train = pd.read_csv('water_waves_training_data.csv')

In [None]:
g = 9.81 #gravitational acceleration [ms^-2]

# Compute dimensionally homogeneous inputs for the neural network
data_train['sqrthg']= np.sqrt(data_train['h']*g)
data_train['sqrtlamg']= np.sqrt(data_train['lam']*g)

In [None]:
data_train_scaled = data_train.div(data_train['c'], axis=0)  # scale data
x0 = data_train_scaled['sqrthg']    
x1 = data_train_scaled['sqrtlamg']
data_train_scaled

Unnamed: 0,h,lam,c,sqrthg,sqrtlamg
0,0.640323,21.064516,1.0,1.006556,5.773171
1,0.655502,15.661882,1.0,1.012716,4.9502
2,0.728986,14.144928,1.0,1.018051,4.484462
3,0.997763,11.711409,1.0,1.046356,3.584845
4,1.669118,13.242647,1.0,1.097257,3.090669
5,1.212766,8.351064,1.0,1.125017,2.952171
6,2.550802,15.481283,1.0,1.156784,2.849818
7,1.93,6.78,1.0,1.375983,2.578988
8,2.380468,6.429298,1.0,1.541306,2.533025
9,2.473795,6.205451,1.0,1.594932,2.526079


In [None]:
def relu(x: int) -> int:
  """Return output of rectified linear unit with input x.
  :examples: 
  >>> relu(2)
  2
  >>> relu(-1)
  0
  """

  return x*(x>0)


In [None]:
def network(x0, x1, W, eps=1e-16): 
  """ Neural network with two inputs and two ReLU activation functions.
  """
  
  F = - relu(W[0]*x0 + W[1]*x1) + relu(W[2]*x0 + W[3]*x1) + eps
  return x0/F, x1/F, F

We visualize the dataset and the output of the neural network using an interactive plot:

In [None]:
%matplotlib inline
import ipywidgets as ip

def interactive_fit(w0, w1, w2, w3):
  """ Plot interactive figure with posibility of adjusting weights using 4 sliders.
  """
    
  fig, ax = plt.subplots( figsize=(8,5) )
  N, M, F = network(data_train["sqrthg"], data_train["sqrtlamg"], [w0, w1, w2, w3])
  Mape    = 100 * np.mean(np.abs( F/data_train["c"] - 1))
  ax.scatter(data_train_scaled.sqrthg, data_train_scaled.sqrtlamg, color='r', label='Data')
  plt.xlabel('x0 = sqrthg')
  plt.ylabel('x1 = sqrtlamg')
  # fitting on the scaled data
  ax.scatter(N, M, marker="x", label="Prediction")
  ax.legend(loc='best')    
  ax.set_title(f"MAPE = {Mape:.2f} %" )
    
ip.interact(interactive_fit, w0=(-1.00, 1.00, 0.01), w1=(-1.00, 1.00, 0.01), w2=(-1.00, 1.00, 0.01), w3=(-1.00, 1.00, 0.01));

interactive(children=(FloatSlider(value=0.0, description='w0', max=1.0, min=-1.0, step=0.01), FloatSlider(valu…

In [None]:
data_train_scaled

Unnamed: 0,h,lam,c,sqrthg,sqrtlamg
0,0.640323,21.064516,1.0,1.006556,5.773171
1,0.655502,15.661882,1.0,1.012716,4.9502
2,0.728986,14.144928,1.0,1.018051,4.484462
3,0.997763,11.711409,1.0,1.046356,3.584845
4,1.669118,13.242647,1.0,1.097257,3.090669
5,1.212766,8.351064,1.0,1.125017,2.952171
6,2.550802,15.481283,1.0,1.156784,2.849818
7,1.93,6.78,1.0,1.375983,2.578988
8,2.380468,6.429298,1.0,1.541306,2.533025
9,2.473795,6.205451,1.0,1.594932,2.526079


As the neural network uses 4 weights, it quickly became clear that adjusting the individual weights by hand is not the most efficient strategy to build an effective model. That is why a rather crude method for finding optimal weights has been used. A function with 4 nested for loops has been defined,which iterates through 20^4 combinations of weights and computes MAPE. The best weights are returned and fed into the model.

In [None]:
def mape(w0, w1, w2, w3):
  """ Compute MAPE.
  """

  N, M, F = network(data_train["sqrthg"], data_train["sqrtlamg"], [w0, w1, w2, w3])
  mape = 100 * np.mean(np.abs( F/data_train["c"] - 1))
  return mape

In [None]:
def find_weights():
  """ Find best weight that give the lowest MAPE.
  """
  
  best_weights = [0, 0, 0, 0]
  best_mape = 1e10
  for w0 in  np.linspace(-0.7815, -0.784, 20):
    for w1 in np.linspace(0.34, 0.35, 20):
      for w2 in np.linspace(0.018, 0.0205, 20):
        for w3 in np.linspace(0.38, 0.385, 20):
          m = mape(w0, w1, w2, w3)
          if m < best_mape:
            best_mape = m
            best_weights = [w0, w1, w2, w3]
  return best_mape, best_weights

In [None]:
#find_weights()

Calling the find_weights function is pretty computationaly intensive, therefore it had to be called several times with readjusting the weigh ranges each time. First, the function is executed with initial range for each weight = (-1.00, 1.00) with 20 steps within this range. Once the function returns some initial weights, the range per weight is changed, so that the range is smaller, allowing for more accurate weight determination.

In [None]:
#1st run: (3.3718295663961686, [-0.9, 0.3999999999999997, -2.220446049250313e-16, 0.3999999999999997])
#2nd run: (1.246089487797823, [-0.8, 0.3631578947368421, -0.015789473684210534, 0.4052631578947368])
#3rd run: (1.127148152425753, [-0.7921052631578948, 0.35789473684210527, 0.0, 0.3973684210526316])
#4th run: (1.0502662561028346, [-0.7863157894736842, 0.3510526315789474, 0.010526315789473682, 0.39])
#5th run: (1.0183585573351592, [-0.7810526315789474, 0.34647368421052627, 0.01789473684210526, 0.385])
#6th run: (1.010960667822022, [-0.7833684210526316, 0.346, 0.019842105263157894, 0.3836842105263158])
#7th run: (1.0084639416135308, [-0.7817631578947368, 0.3452631578947368, 0.0205, 0.3831578947368421])

In [None]:
best_weights = [-0.7817631578947368, 0.3452631578947368, 0.0205, 0.3831578947368421] #MAPE = 1.0084639416135308

# (1.008327548828432,
#  [-0.7820769230769231, 0.3453846153846154, 0.0205, 0.3832051282051282])

Now that the neural network is trained and best possible weights are found, we use the network to predict wave speed values from the test data set.

In [None]:
data_test = pd.read_csv('water_waves_verification_data_input.csv')
data_test['sqrthg'] = np.sqrt(data_test['h']*g)
data_test['sqrtlamg'] = np.sqrt(data_test['lam']*g)
x0_test = data_test['sqrthg']
x1_test = data_test['sqrtlamg']

In [None]:
w_test = best_weights
results = network(x0_test, x1_test, w_test, eps=1e-16)[2]
results

0     8.341562
1    11.392465
2    12.707986
3    13.927701
4    14.616280
5    11.189324
6    19.302340
7    20.504227
8    12.267723
9    21.758209
dtype: float64

In [None]:
def format_submission(results, name_of_file):
  """Write data to csv for Kaggle submission
  """

  df = pd.DataFrame({'Predicted': results})
  df.index.name = 'Id'
  df.to_csv(name_of_file)

format_submission(results, 'tutti_frutti_sub_11.csv')