# 2024 COMP90042 Project
*Make sure you change the file name with your group id.*

# Readme
*If there is something to be noted for the marker, please mention here.*

*If you are planning to implement a program with Object Oriented Programming style, please put those the bottom of this ipynb file*

# 1.DataSet Processing
(You can add as many code blocks and text blocks as you need. However, YOU SHOULD NOT MODIFY the section title)

In [1]:
## package imports

#!pip install pandas scikit-learn torch torchtext
!pip3 install torchtext==0.4.0

## deep-learning libraries
import tensorflow as tf
import torch
import torch.nn as nn
import torch.nn.functional as F
import keras
import torchtext
from torchtext.data.utils import get_tokenizer
from torchtext.data import Field, LabelField, Example, Dataset
from tensorflow import convert_to_tensor

## NLP preprocessing libraries
import nltk
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer

##others
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import string
import json





In [3]:
## reading in json files

"""
Description of json files
* [train-claims,dev-claims].json: JSON files for the labelled training and development set; 
* evidence.json: JSON file containing a large number of evidence passages (i.e. the “knowledge source”); 
* dev-claims-baseline.json: JSON file containing predictions of a baseline system on the development set;
"""

## relative file paths

## baseline system - will not be used for any training/evaluation
devClaimsBaselineFile='./data/dev-claims-baseline.json'
## use this for model training
trainClaimsFile='./data/train-claims.json'
## use this set for hyperparameter tuning and evaluation metric 
devClaimsFile='./data/dev-claims.json'
## evidence files need to be downloaded through https://drive.google.com/file/d/1JlUzRufknsHzKzvrEjgw8D3n_IRpjzo6/view?usp=sharing as it is to big to be uploaded to github
evidenceFile='./data/evidence.json'
## test unlabelled dataset
testFile='./data/test-claims-unlabelled.json'

In [4]:
# Load the JSON data
with open(trainClaimsFile, 'r') as file:
    trainClaims=json.load(file)
with open(devClaimsFile, 'r') as file:
    devClaims=json.load(file)
with open(evidenceFile, 'r') as file:
    evidenceData=json.load(file)

## Preprocessing data -- lowercase, tokenize, and stopword removal
stopwords=set(nltk.corpus.stopwords.words('english'))
tokenizer=get_tokenizer('basic_english')
punctuations=string.punctuation

def preprocess(text):
    token=tokenizer(text.lower())
    cleanedTokens=[t for t in token if (t not in stopwords) and (t not in punctuations)]
    return ' '.join(cleanedTokens)

for ids, texts in evidenceData.items():
    evidenceData[ids]=preprocess(texts)

# Function to create DataFrame and merge evidence IDs with Text
def createDF(claims, evidence):
    combinedData=[]
    for claimID, claimText in claims.items():
        # Combine the ID with its corresponding evidences
        evidenceID=claimText['evidences']
        evidenceText=(evidence[i] for i in evidenceID if i in evidence)
        combinedData.append({
            'claim_id': claimID,
            'claim_text': preprocess(claimText['claim_text']),
            'evidence_id': evidenceID,
            'evidence_text': " ".join(evidenceText),
            'claim_label': claimText['claim_label']
        })
    # Create DataFrame
    return pd.DataFrame(combinedData)

# Create CSV Files
trainFullMerged=createDF(trainClaims,evidenceData)
devFullMerged=createDF(devClaims,evidenceData)
trainFullMerged.to_csv("data/trainFullMerged.csv", index=False)
devFullMerged.to_csv("data/devFullMerged.csv", index=False)

# Convert evidence into csv as well
evidenceFinal=pd.DataFrame(list(evidenceData.items()),columns=['evidence_id','evidence_text'])
evidenceFinal.to_csv('data/evidencePreprocessed.csv',index=False)


In [5]:
# Convert unlabelled Data into CSV as well
with open(testFile, 'r') as file:
    testData=json.load(file)
for ids, texts in testData.items():
    claim_text = texts['claim_text']
    testData[ids]=preprocess(claim_text)
testFinal=pd.DataFrame(list(testData.items()), columns=['claim_id', 'claim_text'])
testFinal.to_csv('data/testPreprocessed.csv',index=False)

In [6]:
from tensorflow.keras.layers import TextVectorization
from sklearn.feature_extraction.text import TfidfVectorizer
# Reading in the created CSV files

## used this for setting how much to see when printing dataframes, 50 is default
pd.set_option('display.max_colwidth', None)

## use this for model training
trainClaimsFile='./data/trainFullMerged.csv'
## use this set for hyperparameter tuning and evaluation metric 
devClaimsFile='./data/devFullMerged.csv'
## evidence files need to be downloaded through googledrive (https://drive.google.com/file/d/1OyihwdAWfqHIOueCB4bLBkYg4hTN_OKm/view?usp=sharing)
evidenceFile='./data/evidencePreprocessed.csv'
## test unlabelled dataset
testFile='./data/testPreprocessed.csv'

trainDataframe=pd.read_csv(trainClaimsFile)
devDataframe=pd.read_csv(devClaimsFile)
evidenceDataframe=pd.read_csv(evidenceFile)

## One consideration is whether we want the evidence_id at all

# trainDataframe['claim_text_unvectorized']=trainDataframe['claim_text']
# trainDataframe['evidence_id']=trainDataframe['evidence_id'].astype(str).str.strip('[]').str.strip("'").apply(preprocess)
# trainDataframe['combined_evidence_unvectorized']=trainDataframe['evidence_id']+" "+trainDataframe['evidence_text']

# devDataframe['claim_text_unvectorized']=devDataframe['claim_text']
# devDataframe['evidence_id']=devDataframe['evidence_id'].astype(str).str.strip('[]').str.strip("'").apply(preprocess)
# devDataframe['combined_evidence_unvectorized']=devDataframe['evidence_id'] +" "+ devDataframe['evidence_text']

# evidenceDataframe['combined_evidence_unvectorized']=evidenceDataframe['evidence_id']+" "+evidenceDataframe['evidence_text']
# evidenceDataframe['combined_evidence_unvectorized']=evidenceDataframe['combined_evidence_unvectorized'].astype(str).str.strip("'").apply(preprocess)

trainDataframe['claim_text']=trainDataframe['claim_text']
trainDataframe['evidence_id']=trainDataframe['evidence_id'].astype(str).str.strip('[]').str.strip("'").apply(preprocess)
trainDataframe['combined_evidence']=trainDataframe['evidence_id']+" "+trainDataframe['evidence_text']

devDataframe['claim_text']=devDataframe['claim_text']
devDataframe['evidence_id']=devDataframe['evidence_id'].astype(str).str.strip('[]').str.strip("'").apply(preprocess)
devDataframe['combined_evidence']=devDataframe['evidence_id'] +" "+ devDataframe['evidence_text']

evidenceDataframe['combined_evidence']=evidenceDataframe['evidence_id']+" "+evidenceDataframe['evidence_text']
evidenceDataframe['combined_evidence']=evidenceDataframe['combined_evidence'].astype(str).str.strip("'").apply(preprocess)

print('shape of training set:',trainDataframe.shape,trainDataframe.columns)
print('shape of development set:',devDataframe.shape,devDataframe.columns)
print('shape of evidence:',evidenceDataframe.shape,evidenceDataframe.columns)

# textVector=TfidfVectorizer()
# labelVector=TfidfVectorizer(max_features=1000)

# textVector.fit(trainDataframe['claim_text_unvectorized'])
# labelVector.fit(evidenceDataframe['combined_evidence_unvectorized'])

# trainDataframe['combined_evidence']=list(labelVector.transform(trainDataframe['combined_evidence_unvectorized']))
# trainDataframe['claim_text']=list(textVector.transform(trainDataframe['claim_text_unvectorized']))

# devDataframe['combined_evidence']=list(labelVector.transform(devDataframe['combined_evidence_unvectorized']))
# devDataframe['claim_text']=list(textVector.transform(devDataframe['claim_text_unvectorized']))

# evidenceDataframe['combined_evidence']=list(labelVector.transform(evidenceDataframe['combined_evidence_unvectorized']))

shape of training set: (1228, 6) Index(['claim_id', 'claim_text', 'evidence_id', 'evidence_text', 'claim_label',
       'combined_evidence'],
      dtype='object')
shape of development set: (154, 6) Index(['claim_id', 'claim_text', 'evidence_id', 'evidence_text', 'claim_label',
       'combined_evidence'],
      dtype='object')
shape of evidence: (1208827, 3) Index(['evidence_id', 'evidence_text', 'combined_evidence'], dtype='object')


In [8]:
# def convertTensorflow(features, labels=None):
#     if labels is not None:
#         dataset = tf.data.Dataset.from_tensor_slices((features, labels))
#     else:
#         dataset = tf.data.Dataset.from_tensor_slices(features)
#     return dataset

# train_features = np.array([x.toarray() for x in trainDataframe['claim_text']])
# train_labels = np.array([x.toarray() for x in trainDataframe['combined_evidence']])

# trainTFDataset = convertTensorflow(train_features, train_labels)

# dev_features = np.array([x.toarray() for x in devDataframe['claim_text']])
# dev_labels = np.array([x.toarray() for x in devDataframe['combined_evidence']])

# devTFDataset = convertTensorflow(dev_features, dev_labels)

# evidence_features = np.array([x.toarray() for x in evidenceDataframe['combined_evidence']])

# evidenceTFDataset = convertTensorflow(evidence_features)

In [7]:
# print(evidenceTFDataset)
# for sample in evidenceTFDataset.take(3):  # Adjust the number taken based on dataset size
#     print(sample)

In [9]:
TEXT=torchtext.data.Field(tokenize=preprocess,
                          init_token='<text1>',
                          eos_token='<text2>',
                          lower=True)
LABEL = torchtext.data.LabelField(tokenize=preprocess,
                          lower=True)

## This converts dataframes into tensors, it does preprocessing twice as the data is already preprocessed but will leave it as is for now
def createDatasetEncoderInput(dataframe,textTransform,labelTransform):
    field=[(('reviewTextInput'),textTransform),(('evidenceTextOutput'),labelTransform)]
    examples=[]
    for _, row in dataframe.iterrows():
        reviewTextInput=row['claim_text']
        evidenceTextOutput=row['combined_evidence']
        examples.append(Example.fromlist([reviewTextInput,evidenceTextOutput], field))
    return Dataset(examples,fields=field)

trainTensor=createDatasetEncoderInput(trainDataframe,TEXT,LABEL)
devTensor=createDatasetEncoderInput(devDataframe,TEXT,LABEL)
    
## now, start to create the decoder inputs in the form of DATASETS for the combined_evidence
def createDatasetDecoderInput(dataframe,labelTransform):
    field=[(('evidenceTextOutput'),labelTransform)]
    examples=[]
    for _, row in dataframe.iterrows():
        evidenceTextOutput=row['combined_evidence']
        examples.append(Example.fromlist([evidenceTextOutput], field))
    return Dataset(examples,fields=field)

evidenceTensor=createDatasetDecoderInput(evidenceDataframe,LABEL)

TEXT.build_vocab(trainTensor)
print("TEXT Vocabulary:")
for word, index in TEXT.vocab.stoi.items():
    print(f"{word}: {index}")

TEXT Vocabulary:
<unk>: 0
<pad>: 1
<text1>: 2
<text2>: 3
 : 4
e: 5
a: 6
r: 7
i: 8
s: 9
t: 10
n: 11
o: 12
l: 13
c: 14
d: 15
m: 16
g: 17
u: 18
p: 19
h: 20
y: 21
w: 22
b: 23
f: 24
v: 25
0: 26
k: 27
2: 28
1: 29
x: 30
-: 31
9: 32
’: 33
j: 34
5: 35
3: 36
“: 37
7: 38
q: 39
8: 40
4: 41
”: 42
[: 43
6: 44
]: 45
z: 46
—: 47
‘: 48
%: 49
…: 50
°: 51
–: 52
/: 53
ñ: 54
$: 55
​: 56
­: 57
â: 58
~: 59
: 60
˚: 61
: 62
: 63
¦: 64
à: 65
ø: 66
₂: 67


In [10]:
assert (len(trainDataframe)==len(trainTensor)) and (len(devDataframe)==len(devTensor) and (len(evidenceDataframe)==len(evidenceTensor)))

# checking how the tensors look
for i in range(min(len(trainTensor), 3)):
    print('train',vars(trainTensor.examples[i]))
    print('dev',vars(devTensor.examples[i]))
    print('evidence',vars(evidenceTensor.examples[i]))

## this used the pytorch pipeline, change it to tensorflow pipelines now

train {'reviewTextInput': 'scientific evidence co2 pollutant higher co2 concentrations actually help ecosystems support plant animal life', 'evidenceTextOutput': 'evidence-442946 evidence-1194317 evidence-12171 high concentrations 100 times atmospheric concentration greater carbon dioxide toxic animal life raising concentration 10 000 ppm 1% higher several hours eliminate pests whiteflies spider mites greenhouse plants grow much 50 percent faster concentrations 1 000 ppm co 2 compared ambient conditions though assumes change climate limitation nutrients higher carbon dioxide concentrations favourably affect plant growth demand water'}
dev {'reviewTextInput': '[south australia] expensive electricity world', 'evidenceTextOutput': 'evidence-67732 evidence-572512 [citation needed] south australia highest retail price electricity country south australia highest power prices world'}
evidence {'evidenceTextOutput': 'evidence-0 john bennet lawes english entrepreneur agricultural scientist'}
tr

In [13]:
## convert into tensorflow tensors

def extractTorchtext(dataset,fields,type):
    texts=[]
    labels=[]
    hasLabel=True
    if type=="output":
        hasLabel=False
    for examples in dataset:
        text=getattr(examples,fields[0][0])
        texts.append(text)
        if hasLabel:
            label=getattr(examples,fields[1][0])
            labels.append(label)
    return texts,labels

trainTensorText,trainTensorLabel=extractTorchtext(trainTensor,[('reviewTextInput',None),('evidenceTextOutput',None)],'input')
devTensorText,devTensorLabel=extractTorchtext(devTensor,[('reviewTextInput',None),('evidenceTextOutput',None)],'input')
evidenceTexts,_=extractTorchtext(evidenceTensor,[('evidenceTextOutput',None)],'output')

def convertToTensorflow(texts,labels=None):
    if labels is not None:
        dataset=tf.data.Dataset.from_tensor_slices((texts, labels))
    else:
        dataset=tf.data.Dataset.from_tensor_slices(texts)
    return dataset

trainTFDataset=convertToTensorflow(trainTensorText,trainTensorLabel)
devTFDataset=convertToTensorflow(devTensorText,devTensorLabel)
#evidenceTFDataset=convertToTensorflow(evidenceTexts)



In [11]:
## THIS CODE BLOCK IS REDUNDANT, IT WAS USED TO PERFORM COUNT EMBEDDINGS BUT MAYBE WILL IMPLEMENT LATER

# print('Train dataset element spec:', trainTFDataset.element_spec)
# print('Dev dataset element spec:', devTFDataset.element_spec)
# print('Evidence dataset element spec:', evidenceTFDataset.element_spec)

# from tensorflow.keras.layers import TextVectorization

# textVector = TextVectorization(
#     output_mode='int')

# textData=trainTFDataset.map(lambda x, y: x)  # Extract just the text part from the dataset

# # Adapt the vectorization layer to the text data
# textVector.adapt(trainTFDataset.map(lambda x, y:x))

# labelVector=TextVectorization(
#     output_mode='int'
# )

# labelVector.adapt(evidenceTFDataset)

# def vectorizeTexts(text,label):
#     text=textVector(text)
#     label=labelVector(label)
#     return text,label

# def vectorizeEvidence(label):
#     label=labelVector(label)
#     return label

# vectorizedTrain=trainTFDataset.map(vectorizeTexts)
# vectorizedDev=devTFDataset.map(vectorizeTexts)
# vectorizedEvidence=evidenceTFDataset.map(vectorizeEvidence)

# print(vectorizedEvidence)

In [12]:
## THIS CODE BLOCK IS REDUNDANT, IT WAS USED TO PERFORM COUNT EMBEDDINGS BUT MAYBE WILL IMPLEMENT LATER

# print(vectorizedTrain.element_spec)
# for sample in vectorizedEvidence.take(3):
#     print('Sample:', sample.numpy())
# vocabText=textVector.get_vocabulary()
# vocabLabel=labelVector.get_vocabulary()
# print(vocabText)
# print(vocabLabel)
# print(vectorizedTrain)

In [12]:
## if we wanted to use word embeddings, process the TFDatasets first then run this
## use these as tensors (these are EagerTensors)

tensorTrain=tf.convert_to_tensor(list(trainTFDataset))
tensorDev=tf.convert_to_tensor(list(devTFDataset))
#tensorEvidence=tf.convert_to_tensor(list(evidenceTFDataset))

In [14]:
# from sklearn.feature_extraction.text import TfidfVectorizer

# trainNumpyText=tensorTrain[:,0].numpy()
# trainNumpyLabel=tensorTrain[:,1].numpy()
# devNumpyText=tensorDev[:,0].numpy()
# devNumpyLabel=tensorDev[:,1].numpy()
# evidenceNumpy=tensorEvidence.numpy()

# tfidfVectorText=TfidfVectorizer()
# tfidfVectorLabel=TfidfVectorizer(max_features=300)

# tfidfVectorText.fit(trainNumpyText)
# tfidfVectorLabel.fit(evidenceNumpy)

# trainNumpyText=tfidfVectorText.transform(trainNumpyText)
# trainNumpyLabel=tfidfVectorLabel.transform(trainNumpyLabel)

# devNumpyText=tfidfVectorText.transform(devNumpyText)
# devNumpyLabel=tfidfVectorLabel.transform(devNumpyLabel)

# evidenceNumpy=tfidfVectorLabel.transform(evidenceNumpy)

In [15]:
# #MemoryError: Unable to allocate 16.5 GiB for an array with shape (1228, 1807303) and data type int64

# tensorTFIDFTrainText=tf.convert_to_tensor(trainNumpyText.toarray(),dtype=tf.float32)
# tensorTFIDFTrainLabel=tf.convert_to_tensor(trainNumpyLabel.toarray(),dtype=tf.float32)

# tensorTFIDFDevText=tf.convert_to_tensor(devNumpyText.toarray(),dtype=tf.float32)
# tensorTFIDFDevLabel=tf.convert_to_tensor(devNumpyLabel.toarray(),dtype=tf.float32)

# tensorTFIDFEvidence=tf.convert_to_tensor(evidenceNumpy.toarray(),dtype=tf.float32)

# tensorTFIDFTrain=tf.concat([tensorTFIDFDevText,tensorTFIDFDevLabel],axis=0)

# 2. Model Implementation
(You can add as many code blocks and text blocks as you need. However, YOU SHOULD NOT MODIFY the section title)

![alt text](image.png)

In [185]:
## POSITIONAL ENCODINGS

from keras.models import Model

def positional_encoding(length, depth):
    positions=np.arange(length)[:, np.newaxis]
    depth_indices=np.arange(depth)[np.newaxis, :]
    angle_rates = 1 / np.power(10000, (2 * (depth_indices // 2)) / np.float32(depth))
    angle_rads = positions * angle_rates
    # sine applied to even indices; cosine applied to odd indices
    pos_encoding = np.concatenate([np.sin(angle_rads[:, 0::2]), np.cos(angle_rads[:, 1::2])], axis=-1)
    return tf.cast(pos_encoding, dtype=tf.float32)
  
class PositionalEmbedding(tf.keras.layers.Layer):
    def __init__(self, vocab_size, d_model):
        super(PositionalEmbedding, self).__init__()
        self.d_model=d_model
        self.embedding=tf.keras.layers.Embedding(vocab_size, d_model, mask_zero=True)
        self.pos_encoding=positional_encoding(length=2048, depth=d_model)

    def compute_mask(self, *args, **kwargs):
        return self.embedding.compute_mask(*args, **kwargs)

    def call(self, x):
        length = tf.shape(x)[1]
        x=self.embedding(x)
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        x += self.pos_encoding[tf.newaxis, :length, :]
        return x
  
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

## I only use one encoder here, but we can try to use two encoders, one for label and one for the review text

tensorTrainText=tensorTrain[:,1]
tensorTrainLabels=tensorTrain[:,0]
tensorTrainConcat=tf.concat([tensorTrainLabels,tensorTrainText],axis=0)

vectorizer=TextVectorization(output_mode='int',output_sequence_length=15)

## CAN CHANGE THIS ADAPT TO RUN ON THE EVIDENCE INSTEAD, MIGHT BE BETTER BUT IT SLOWS DOWN MY PC A LOT SO WILL LEAVE AS LABELS ONLY FOR NOW
vectorizer.adapt(tensorTrainConcat)

In [186]:
embedText=PositionalEmbedding(vocab_size=(len(vectorizer.get_vocabulary())),d_model=512)
embedLabel=PositionalEmbedding(vocab_size=(len(vectorizer.get_vocabulary())),d_model=512)

vectorizedTensorTrainText=vectorizer(tensorTrainText)
vectorizedTensorTrainLabels=vectorizer(tensorTrainLabels)
#vectorizedEvidence=vectorizer(tensorEvidence)


inputEmbedded=embedText(vectorizedTensorTrainText)
outputEmbedded=embedText(vectorizedTensorTrainLabels)

## CANNOT EMBED THIS, CAUSES OUT OF MEMORY ERROR
#outputEmbedded=embedLabel(vectorizedEvidence)

In [187]:
## Check shape
inputEmbedded._keras_mask

<tf.Tensor: shape=(1228, 15), dtype=bool, numpy=
array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ...,
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]])>

In [188]:
## Base attention layers

class BaseAttention(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__()
        self.mha=tf.keras.layers.MultiHeadAttention(**kwargs)
        self.layernorm=tf.keras.layers.LayerNormalization()
        self.add=tf.keras.layers.Add()
    

In [189]:
## cross attention layer

from tensorflow.keras.layers import Layer, MultiHeadAttention, LayerNormalization, Add

class CrossAttention(Layer):
    def __init__(self, num_heads, key_dim, **kwargs):
        super(CrossAttention, self).__init__(**kwargs)
        self.mha = MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)
        self.layernorm = LayerNormalization(epsilon=1e-6)
        self.add = Add()

    def call(self, query, context):
        attn_output, attn_scores = self.mha(query=query, value=context, key=context, return_attention_scores=True)
        self.last_attn_scores = attn_scores
        x = self.add([query, attn_output])
        x = self.layernorm(x)
        return x

## key_dim CAN BE INCREASED BUT IT SLOWS DOWN PC
sample_ca=CrossAttention(num_heads=2, key_dim=5)

result=sample_ca(outputEmbedded, inputEmbedded)

print(inputEmbedded.shape)
print(outputEmbedded.shape)
print(result.shape)

(1228, 15, 512)
(1228, 15, 512)
(1228, 15, 512)


In [190]:
## global self-attention layer

class GlobalSelfAttention(BaseAttention):
  def call(self, x):
    attn_output = self.mha(
        query=x,
        value=x,
        key=x)
    x = self.add([x, attn_output])
    x = self.layernorm(x)
    return x

sample_gsa = GlobalSelfAttention(num_heads=2, key_dim=5)

print(inputEmbedded.shape)
print(sample_gsa(inputEmbedded).shape)

(1228, 15, 512)
(1228, 15, 512)


In [191]:
## Causal self attention layer
class CausalSelfAttention(BaseAttention):
  def call(self, x):
    attn_output = self.mha(
        query=x,
        value=x,
        key=x,
        use_causal_mask = True)
    x = self.add([x, attn_output])
    x = self.layernorm(x)
    return x

sample_csa = CausalSelfAttention(num_heads=2, key_dim=5)

print(inputEmbedded.shape)
print(sample_csa(inputEmbedded).shape)

(1228, 15, 512)
(1228, 15, 512)


In [192]:
out1 = sample_csa(inputEmbedded[:, :3])
out2 = sample_csa(inputEmbedded)[:, :3]

diff = tf.reduce_max(abs(out1 - out2))
print("Max difference:", diff.numpy())

Max difference: 4.7683716e-07


In [193]:
## Feedforward layer
class FeedForward(tf.keras.layers.Layer):
  def __init__(self, d_model, dff, dropout_rate=0.1):
    super().__init__()
    self.seq = tf.keras.Sequential([
      tf.keras.layers.Dense(dff, activation='relu'),
      tf.keras.layers.Dense(d_model),
      tf.keras.layers.Dropout(dropout_rate)
    ])
    self.add = tf.keras.layers.Add()
    self.layer_norm = tf.keras.layers.LayerNormalization()

  def call(self, x):
    x = self.add([x, self.seq(x)])
    x = self.layer_norm(x) 
    return x

## Edit these numbers -- the shape needs to be the same for the ffn numbers
sample_ffn = FeedForward(512, 150)

print(inputEmbedded.shape)
print(sample_ffn(inputEmbedded).shape)

(1228, 15, 512)
(1228, 15, 512)


In [200]:
## Encoder Layer

class EncoderLayer(tf.keras.layers.Layer):
  def __init__(self,*, d_model, num_heads, dff, dropout_rate=0.1):
    super().__init__()

    self.self_attention = GlobalSelfAttention(
        num_heads=num_heads,
        key_dim=d_model,
        dropout=dropout_rate)

    self.ffn = FeedForward(d_model, dff)

  def call(self, x):
    x = self.self_attention(x)
    x = self.ffn(x)
    return x

sample_encoder_layer = EncoderLayer(d_model=512, num_heads=8, dff=2048)

print(inputEmbedded.shape)
print(sample_encoder_layer(inputEmbedded).shape)

class Encoder(tf.keras.layers.Layer):
  def __init__(self, *, num_layers, d_model, num_heads,
               dff, vocab_size, dropout_rate=0.1):
    super().__init__()

    self.d_model = d_model
    self.num_layers = num_layers

    self.pos_embedding = PositionalEmbedding(
        vocab_size=vocab_size, d_model=d_model)

    self.enc_layers = [
        EncoderLayer(d_model=d_model,
                     num_heads=num_heads,
                     dff=dff,
                     dropout_rate=dropout_rate)
        for _ in range(num_layers)]
    self.dropout = tf.keras.layers.Dropout(dropout_rate)

  def call(self, x):
    # `x` is token-IDs shape: (batch, seq_len)
    x = self.pos_embedding(x)  # Shape `(batch_size, seq_len, d_model)`.

    # Add dropout.
    x = self.dropout(x)

    for i in range(self.num_layers):
      x = self.enc_layers[i](x)

    return x  # Shape `(batch_size, seq_len, d_model)`
  
# Instantiate the encoder.
sample_encoder = Encoder(num_layers=4,
                         d_model=512,
                         num_heads=8,
                         dff=2048,
                         vocab_size=8500)

sample_encoder_output = sample_encoder(tensorTrainConcat, training=False)

# Print the shape.
print(tensorTrainConcat.shape)
print(sample_encoder_output.shape)  # Shape `(batch_size, input_seq_len, d_model)`.

(1228, 15, 512)
(1228, 15, 512)


InvalidArgumentError: Exception encountered when calling layer 'positional_embedding_82' (type PositionalEmbedding).

{{function_node __wrapped__StridedSlice_device_/job:localhost/replica:0/task:0/device:CPU:0}} slice index 1 of dimension 0 out of bounds. [Op:StridedSlice] name: encoder_54/positional_embedding_82/strided_slice/

Call arguments received by layer 'positional_embedding_82' (type PositionalEmbedding):
  • x=tf.Tensor(shape=(2456,), dtype=string)

# 3.Testing and Evaluation
(You can add as many code blocks and text blocks as you need. However, YOU SHOULD NOT MODIFY the section title)

## Object Oriented Programming codes here

*You can use multiple code snippets. Just add more if needed*