# V2 Algorithm - neural network to combine the weights of the models

I need to construct a neural network which takes a 11 dimensional input and outputs a single value. The inputs are the various distance scores using the distance models. The output is some weighted / transformed score. The neural network will learn the weights of the models.

We're going to take a masking approach to training the model, as I don't have a test/train set of ground truth 'similarity scores' to act as my target.

Instead, we mask each of the features and train a neural network on the 10 dim datasets one-by-one, then combine the weights of the resultant 11 networks to get a final network.

In [1]:
# Add the project root directory to the system path
import os
import sys

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(project_root)

from src.etl.etl_funcs import load_documents
from src.etl.embedding_funcs import embed_index
from src.algorithms import v0, v1
from src.processing import graph_construction, distance_metrics

from sentence_transformers import SentenceTransformer
import tqdm
import numpy as np
import keras

  from .autonotebook import tqdm as notebook_tqdm


## First construct our datasets

In [2]:
# Load the document index
document_index = load_documents()

# Embed the document index
embedded_index = embed_index(document_index)
sub_index = embedded_index[:50]

doc_distance_vectors = []
# For each document
for doc in tqdm.tqdm(sub_index, desc="Calculating distance vectors"):
    for other_doc in sub_index:
        
        doc_distance_vectors.append(
            distance_metrics.calculate_distance_vector(
                doc, other_doc
            )
        )

Embedding Documents: 100%|██████████| 197/197 [00:08<00:00, 23.50it/s]
Calculating distance vectors: 100%|██████████| 50/50 [00:02<00:00, 18.93it/s]


In [5]:
X = np.array(doc_distance_vectors.copy())
datasets = [(np.delete(X, i, axis=1), X[:, i]) for i in range(X.shape[1])]

# Build the models

## Neural Network Approach 1

In [14]:
np.shape(datasets[0][1])

(2500,)

In [10]:
# Train 11 different neural networks
models = []
for i in tqdm.tqdm(range(11), desc="Training models"):
    # Instantiate the model
    model = keras.models.Sequential(name=f'sequential_{i}')
    
    # Add the layers
    model.add(keras.layers.Input(shape=(10,)))
    model.add(keras.layers.Dense(10, activation='relu'))
    model.add(keras.layers.Dense(1, activation='linear'))
    
    # Compile the model and fit it
    model.compile(loss='mse', optimizer='adam')
    model.fit(datasets[i][0], datasets[i][1], epochs=100, verbose=0)
    
    # Append the model to the list of models
    models.append(model)

Training models:   9%|▉         | 1/11 [00:03<00:32,  3.23s/it]2024-04-30 11:43:21.323064: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: INVALID_ARGUMENT: Input to reshape is a tensor with 1 values, but the requested shape has 32
	 [[{{function_node __inference_one_step_on_data_107672}}{{node gradient_tape/compile_loss/mse/sub/Reshape}}]]
Training models:   9%|▉         | 1/11 [00:05<00:50,  5.01s/it]


InvalidArgumentError: Graph execution error:

Detected at node gradient_tape/compile_loss/mse/sub/Reshape defined at (most recent call last):
  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/runpy.py", line 86, in _run_code

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 195, in start

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 359, in execute_request

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 446, in do_execute

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3075, in run_cell

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3130, in _run_cell

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3334, in run_cell_async

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3517, in run_ast_nodes

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3577, in run_code

  File "/var/folders/df/j998nyn11sdf0w5cf48pny6m0000gn/T/ipykernel_13833/2908861406.py", line 14, in <module>

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 314, in fit

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 117, in one_step_on_iterator

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 104, in one_step_on_data

  File "/Users/lukasalemu/Downloads/ls/envs/dissertation_rag/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 66, in train_step

Input to reshape is a tensor with 1 values, but the requested shape has 32
	 [[{{node gradient_tape/compile_loss/mse/sub/Reshape}}]] [Op:__inference_one_step_on_iterator_107707]

In [None]:

# Combine the networks
combined_model = keras.models.Sequential()
combined_model.add(keras.layers.Dense(11, input_dim=11, activation='relu'))
combined_model.add(keras.layers.Dense(1, activation='linear'))

# Set the weights of the combined model
weights = [np.concatenate([model.layers[0].get_weights()[0] for model in models], axis=1),
           np.concatenate([model.layers[0].get_weights()[1] for model in models])]
combined_model.layers[0].set_weights(weights)

In [9]:
for model in models:
    print(model)

<Sequential name=sequential_5, built=True>
<Sequential name=sequential_6, built=True>
<Sequential name=sequential_7, built=True>
<Sequential name=sequential_8, built=True>
<Sequential name=sequential_9, built=True>


## Autoencode for dimensionality reduction

In [23]:
# This is the size of our encoded representations
encoding_dim = 1  # 1 floats -> compression of factor 11, assuming the input is 11 floats

# This is our input placeholder
input_data = keras.layers.Input(shape=(11,))

# "encoded" is the encoded representation of the input
encoded = keras.layers.Dense(encoding_dim, activation='relu')(input_data)

# "decoded" is the lossy reconstruction of the input
decoded = keras.layers.Dense(11, activation='sigmoid')(encoded)


# This model maps an input to its reconstruction
autoencoder = keras.models.Model(input_data, decoded)

# This model maps an input to its encoded representation
encoder = keras.models.Model(input_data, encoded)

# Compile the model
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')

# Assuming X is your 11 dimensional data
autoencoder.fit(
    X, 
    X,
    epochs=50,
    batch_size=256,
    shuffle=True
)

# Use the encoder to reduce dimensionality
X_encoded = encoder.predict(X)

Epoch 1/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 642us/step - loss: 0.7097
Epoch 2/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 562us/step - loss: 0.7096
Epoch 3/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 558us/step - loss: 0.7097
Epoch 4/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 580us/step - loss: 0.7096
Epoch 5/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 534us/step - loss: 0.7096
Epoch 6/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 600us/step - loss: 0.7095
Epoch 7/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 593us/step - loss: 0.7096
Epoch 8/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 605us/step - loss: 0.7095
Epoch 9/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 627us/step - loss: 0.7094
Epoch 10/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 497us/step - lo

In [24]:
X_encoded

array([[0.5128756 ],
       [0.6905924 ],
       [0.6634919 ],
       ...,
       [0.7135236 ],
       [0.7041426 ],
       [0.51287574]], dtype=float32)

## v3 - using dropout to train the model

1. Instantiate the model on a 11 dim dataset, with random weights for each input
2. Pick a feature dim to remove
3. Remove that feature from the input dataset and make it the target
4. Reshape the input
5. 

In [26]:
for document in doc_distance_vectors:
    

[[1.0, 1.0, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.659, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.711, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.628, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.647, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.384, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.6, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.416, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.487, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.55, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.552, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.523, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.464, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.436, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.325, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.483, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.409, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.407, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.366, 1.0, 1, 1.0, 1.0, 1.0, 1.0, 1, 1, 1],
 [1.0, 0.373, 1.0