## 1 Overview
An enviroment to train and evaluate neural networks on learning logical consequence. 

In [None]:
# For ggogle collab
!git clone https://github.com/stereifberger/master-s-thesis
%cd master-s-thesis/

In [None]:
# Install required dependencies - not necessary on google colab
#!pip install -r requirements.txt

In [None]:
# Import required libraries
from imports import *

## 2 Create dataset
First the dataset for training is generated. For this the function "create_dataset" from "generation.py" utilizes the functions "gen_outp_PA" to generate a set of random starting formulas, for which iterativly the applicability of rules is checked. All applicable rules are then used to generate new derivations. In each iteration of gen_oupt_PA, set by the iterations variable, new, longer examples are generated.

**Rules.** The rules are defined in calculi.py. Two sets are avaiable: Intuitionistic propositional logic (set below via "calculus = ipl") and classical propositional logic (set below via "calculus = cpl").

**Dataset entries.**
- **x_train.** Training input: [INDEX, PREMISES, DERIVATION SYMBOL, CONCLUSION]
- **y_train_ordered.** Dataset of correct derivations where each sublist i correspnds to INDEX: [DERIVATIONS_0...DERIVATION_N]

**Encoding.** Propositional variables and logical constants are encoded as integers. The integers are then one-hot-encoded into unique sequences containing only 0s and ones with the length of the maximum integer value, the feature length. The shape of the individual entries is 2D: [SEQUENCE LENGTH, FEATURE LENGTH].

**Example entries withouth numerical representation and one-hot-encoding.**
- **x_train.** [2345, A, A THEN B, DERIVES, B OR C]
- **y_train_ordered.** Sublist 2345 is entry entry: [[A, A THEN B, B, B OR C], [A, A THEN B, B, A AND B, B OR C]]


In [None]:
# Create dataset
x_train_2d, x_train_3d, y_train_ordered, max_y_train_len = generation.create_dataset(iterations = [1,3], calculus = calculi.ipl)

## 3 Prepare dataset and define model for training
Next with pytorch's dataloader the single training entries in x_train are assigned to batches of size "batch size" in mixed order. Then the different models are defined using definitions from "architectures.py". These models are:

- Feedforward network (net)
- Recurrent neural network (RNNNet)
- Long-short-term memory (LSTMNet)
- Transformers (TransformerModel)

In [None]:
# Use when gpu is present to empty its catch and define it as "device" for referencing it
torch.cuda.empty_cache()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# Get the datasets' shapes for th model definitions later
two_d_shape = x_train_2d.shape
three_d_shape = x_train_3d.shape

In [None]:
# Set train-test split to 80-20 and get the 2d dataset's shapes # [^1]
train_size = int(0.8 * len(x_train_2d)) 
test_size = len(x_train_2d) - train_size 
x_train_2d, x_test_2d = random_split(x_train_2d, [train_size, test_size])
train_size = int(0.8 * len(x_train_3d))
test_size = len(x_train_3d) - train_size
x_train_3d, x_test_3d = random_split(x_train_3d, [train_size, test_size])

In [None]:
# Collect and mix the data in [^2]
train_dataloader_2d = DataLoader(dataset = x_train_2d, shuffle = True, batch_size = 50)
test_dataloader_2d = DataLoader(dataset = x_test_2d, shuffle = True, batch_size = 50)
train_dataloader_3d = DataLoader(dataset = x_train_3d, shuffle = True, batch_size = 50)
test_dataloader_3d = DataLoader(dataset = x_test_3d, shuffle = True, batch_size = 50)

In [None]:
# Define the four models [^3]
ffn_model = architectures.ffn(input_size = two_d_shape[1]-1, 
                              output_size = max_y_train_len)
rnn_model = architectures.rnn(input_size = three_d_shape[2],
                              hidden_size = 20,
                              output_size = max_y_train_len)
lst_model = architectures.lst(input_size = three_d_shape[2],
                              hidden_size = 20,
                              output_size = max_y_train_len)
tra_model = architectures.tra(input_size = three_d_shape[2],
                              hidden_size = 20,
                              output_size = max_y_train_len,
                              num_layers = 2,
                              nhead = 4)

In [None]:
# Define the optimizer to be SGD, with a learning rate of 0.1 [^4]
ffn_optimizer = torch.optim.SGD(ffn_model.parameters(),lr=0.001)
rnn_optimizer = torch.optim.SGD(rnn_model.parameters(),lr=0.001)
lst_optimizer = torch.optim.SGD(lst_model.parameters(),lr=0.001)
tra_optimizer = torch.optim.SGD(tra_model.parameters(),lr=0.001)

# Train for 1000 epochs
epochs = 200

## 4 Training
Each subsequent cell trains one of the four models and calculates their mean squared error loss for the nearest correct derivation from the dataset to the derivation provided by the model. The logic for this is impolemented in the custom loss function "mse_min_dist" in losses.py.

In [None]:
# Load the feedforward model to the gpu 
ffn_model.to(device)

In [None]:
# Load the ground truth data to the gpu
y_train = y_train_ordered.to(device)

In [None]:
max_y_length = int(max_y_train_len/14)

In [None]:
### FFN ### [^5]
ffn_costval_train = [] # Define the lists for the loss values
ffn_costval_test = []
for j in tqdm(range(epochs), desc = "Epoch"): # Loop over all epochs
    ffn_model.train() # Set to training mode (weights are adjusted)
    train_loss = 0
    for i, x_train in enumerate(train_dataloader_2d):   # Loop over all batches
        x_train = x_train.to(device)
        y_pred = ffn_model(x_train[:,1:])               # Get the model's output for batch
        cost = losses.nffn_mse_min_dist(y_pred, x_train, y_train, max_y_length) # Calculate loss
        # Backpropagation
        ffn_optimizer.zero_grad()
        cost.backward()
        ffn_optimizer.step()
        train_loss += cost.item() # Append loss to intermediary list for average loss calculation
    avg_train_loss = train_loss / len(train_dataloader_2d) # Calculate average loss
    ffn_costval_train.append(avg_train_loss)

    ffn_model.eval() # Set evaluation mode (weights are not adjusted)
    test_loss = 0
    # Analog to above but without training a loop over all batches
    with torch.no_grad():
        for i, x_test in enumerate(test_dataloader_2d):
            x_test = x_test.to(device)
            y_pred = ffn_model(x_test[:, 1:])
            cost = losses.nffn_mse_min_dist(y_pred, x_test, y_train, max_y_length)
            test_loss += cost.item()
    avg_test_loss = test_loss / len(test_dataloader_2d)
    ffn_costval_test.append(avg_train_loss)

    if j % 10 == 0: # Get the loss every 10 epochs
        print(f"Epoch {j}: Train Loss - {avg_train_loss}, Test Loss - {avg_test_loss}")

In [None]:
### RNN ###
rnn_costval_train = [] # Define the lists for the loss values
rnn_costval_test = []
for j in tqdm(range(epochs), desc = "Epoch"): # Loop over all epochs
    rnn_model.train() # Set to training mode (weights are adjusted)
    train_loss = 0
    for i, x_train in enumerate(train_dataloader_3d):   # Loop over all batches
        x_train = x_train.to(device)
        y_pred = rnn_model(x_train[:,1:])               # Get the model's output for batch 
        cost = losses.threed_mse_min_dist(y_pred, x_train, y_tdict, (max_y_train_len/14), "rnn") # Calculate loss
        # Backpropagation
        rnn_optimizer.zero_grad()
        cost.backward()
        rnn_optimizer.step()
        train_loss += cost.item() # Append loss to intermediary list for average loss calculation
    avg_train_loss = train_loss / len(train_dataloader_3d) # Calculate average loss
    rnn_costval_train.append(avg_train_loss)

    rnn_model.eval() # Set evaluation mode (weights are not adjusted)
    test_loss = 0 
    # Analog to above but without training a loop over all batches
    with torch.no_grad(): 
        for i, x_test in enumerate(test_dataloader_3d):
            x_test = x_test.to(device)
            y_pred = rnn_model(x_test[:, 1:])
            cost = losses.threed_mse_min_dist(y_pred, x_test, y_tdict, (max_y_train_len/14), "rnn")
            test_loss += cost.item()
    avg_test_loss = test_loss / len(test_dataloader_2d)
    rnn_costval_test.append(avg_train_loss)

    if j % 10 == 0: # Get the loss every 10 epochs
        print(f"Epoch {j}: Train Loss - {avg_train_loss}, Test Loss - {avg_test_loss}")

In [None]:
### LSTM ###
lst_costval_train = [] # Define the lists for the loss values
lst_costval_test = []
for j in tqdm(range(epochs), desc = "Epoch"): # Loop over all epochs
    lst_model.train() # Set to training mode (weights are adjusted)
    train_loss = 0
    for i, x_train in enumerate(train_dataloader_3d):   # Loop over all batches
        x_train = x_train.to(device)
        y_pred = lst_model(x_train[:,1:])               # Get the model's output for batch 
        cost = losses.threed_mse_min_dist(y_pred, x_train, y_tdict, (max_y_train_len/14), "lst") # Calculate loss
        # Backpropagation
        lst_optimizer.zero_grad()
        cost.backward()
        lst_optimizer.step()
        train_loss += cost.item() # Append loss to intermediary list for average loss calculation
    avg_train_loss = train_loss / len(train_dataloader_3d) # Calculate average loss
    lst_costval_train.append(avg_train_loss)

    lst_model.eval() # Set evaluation mode (weights are not adjusted)
    test_loss = 0 
    # Analog to above but without training a loop over all batches
    with torch.no_grad(): 
        for i, x_test in enumerate(test_dataloader_3d):
            x_test = x_test.to(device)
            y_pred = lst_model(x_test[:, 1:])
            cost = losses.threed_mse_min_dist(y_pred, x_test, y_tdict, (max_y_train_len/14), "lst")
            test_loss += cost.item()
    avg_test_loss = test_loss / len(test_dataloader_2d)
    lst_costval_test.append(avg_train_loss)

    if j % 10 == 0: # Get the loss every 10 epochs
        print(f"Epoch {j}: Train Loss - {avg_train_loss}, Test Loss - {avg_test_loss}")

In [None]:
### Transformer ###
tra_costval_train = [] # Define the lists for the loss values
tra_costval_test = []
for j in tqdm(range(epochs), desc = "Epoch"): # Loop over all epochs
    tra_model.train() # Set to training mode (weights are adjusted)
    train_loss = 0
    for i, x_train in enumerate(train_dataloader_3d):   # Loop over all batches
        x_train = x_train.to(device)
        y_pred = tra_model(x_train[:,1:])               # Get the model's output for batch 
        cost = losses.mse_min_dist(y_pred, x_train, y_tdict, (max_y_train_len/14), "tra") # Calculate loss
        # Backpropagation
        tra_optimizer.zero_grad()
        cost.backward()
        tra_optimizer.step()
        train_loss += cost.item() # Append loss to intermediary list for average loss calculation
    avg_train_loss = train_loss / len(train_dataloader_3d) # Calculate average loss
    tra_costval_train.append(avg_train_loss)

    tra_model.eval() # Set evaluation mode (weights are not adjusted)
    test_loss = 0 
    # Analog to above but without training a loop over all batches
    with torch.no_grad(): 
        for i, x_test in enumerate(test_dataloader_3d):
            x_test = x_test.to(device)
            y_pred = tra_model(x_test[:, 1:])
            cost = losses.mse_min_dist(y_pred, x_test, y_tdict, (max_y_train_len/14), "tra")
            test_loss += cost.item()
    avg_test_loss = test_loss / len(test_dataloader_2d)
    tra_costval_train.append(avg_train_loss)

    if j % 10 == 0: # Get the loss every 10 epochs
        print(f"Epoch {j}: Train Loss - {avg_train_loss}, Test Loss - {avg_test_loss}")

## 5 Plot results
Here all results from above are plotted.

In [None]:
plt.figure(figsize=(8, 8))
x_data = list(range(200))
y_data_ffn = ffn_costval_train
y_data_rnn = rnn_costval_train
y_data_lst = lstm_costval_train
y_data_tra = tra_costval_train
plt.plot(x_data, y_data_ffn, label='FFN')
plt.plot(x_data, y_data_rnn, label='RNN')
plt.plot(x_data, y_data_lst, label='LSTM')
plt.plot(x_data, y_data_tra, label='Transformers')
plt.xlabel('Epochs')
plt.ylabel('Training MSE')
plt.legend()
plt.show()

In [None]:

plt.figure(figsize=(8, 8))
x_data = list(range(200))
y_data_ffn = ffn_costval_train
y_data_rnn = rnn_costval_train
y_data_lst = lstm_costval_train
y_data_tra = tra_costval_train
plt.plot(x_data, y_data_ffn, label='FFN')
plt.plot(x_data, y_data_rnn, label='RNN')
plt.plot(x_data, y_data_lst, label='LSTM')
plt.plot(x_data, y_data_tra, label='Transformers')
plt.xlabel('Epochs')
plt.ylabel('Test MSE')
plt.legend()
plt.show()