In [4]:
import torch
import torch.nn as nn
import math

In [5]:
class InputEmbeddings(nn.Module):
    def __init__(self, d_model:int , vocab_size: int): #constructor to define the variabls
        super().__init__()
        self.d_model = d_model  # embedding lenght
        self.vocab_size = vocab_size
        self.embedding = nn.Embedding(vocab_size, d_model) # length of sentence * embedding dimension  and return a 
        # a same vector of embedding which will be multiplied later to get the emedding for each word
        
    def forward(self,x):
        # in this we try to normalize the embedding
        return self.embedding(x) *math.sqrt(self.d_model)
    
    
    
class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int , seq_len: int, dropout:float):
        super().__init__()
        self.d_model = d_model
        self.seq_len = seq_len
        self.dropout = nn.Dropout(dropout) # we need to create a dropput layer to reduce overfitting
        
        # now we need to create a positionla encoding as per transformer paper
        # create matrix of length (Seq_len,d_model)
        pe= torch.zeros(seq_len,d_model)
        
        #Create matrix of length (Seq_len,1) gives th position of each word in sentence
        position=torch.arange(0,seq_len, dtype=torch.float).unsqueeze(1)
        div_term= torch.exp(torch.arange(0,d_model,2).float() * (-math.log(1000.0)/d_model))
        
        #apply sin and cosine
        pe[:,0::2]=torch.sin(position*div_term)
        pe[:,1::2]=torch.cos(position*div_term)
        
        
        pe=pe.unsqueeze(0) #(1, Seq_Len,d_model)
        
        self.register_buffer('pe',pe)
        
        
    def forward(self,x):
        # adding pe to embedding of every word and making sure that this layer is not trainable
        x= x + (self.pe[:,:x.shape[1],:].require_grad(False))
        
  
        
class LayerNormalization(nn.Module):
    def __init__(self, eps:float = 10**-6) -> None: # the eps si avoid if during normalizatin the value in the dnmoniator ios 0
        super().__init__()
        self.eps =eps
        self.aplha=nn.Parameter(torch.ones(1)) #mulitplied
        self.bias=nn.Paramters(torch.ones(1)) #Added
    
    def forward(self,x):
        
        mean=x.mean(dim=-1, keepdim=True)
        std=x.std(dim=-1, keepdim=True)
        return self.alpha *(x-mean)/(std+self.eps) + self.bias
    
        
        

In [6]:
class FeedForwardBlock(nn.Module):
    def __init__(self, d_model:int, d_ff:int, dropout: float) -> None :
        super().__init__()
        self.linear_1 = nn.Linear(d_model,d_ff)
        self.dropout = nn.Dropout(dropout)
        self.linear_2 = nn.Linear(d_ff,d_model)
        
        
    def forward(self,x):
        return self.linear_2(self.dropout(torch.relu(self.linear_1(x))))
        

In [7]:
class MultiHeadAttentionBlock(nn.Module):
    def __init__(self, d_model: int,h:int,dropout: float) -> None:
        super().__init__()
        self.d_model = d_model
        self.h = h
        self.dropout= dropout
        
        assert d_model %h == 0
        self.d_k = d_model // h
        self.w_q = nn.Linear(d_model,d_model)
        self.w_k = nn.Linear(d_model,d_model)
        self.w_v = nn.Linear(d_model,d_model)
        
        self.w_o = nn.Linear(d_model,d_model)
        
        self.dropout = nn.Dropout(dropout)
    
    @staticmethod
    def attention(query,key,value,mask,dropout:nn.Dropout):
        d_k=qurery.shape[-1]
        
        # (Batch,h, seq_len, d_k) -->(Batch,h, seq_len, seq_len)
        attention_score= (query@key.transpose(-2,-1))/math.sqrt(d_k)
        
        
        #masking
        if mask is not None:
            attention_scores.masked_fill(mask==0,-1e9)
        
        attention_scores=attention_scores.softmax(dim=-1)
        
        if dropout is not None:
            attention_scores=dropout(attention_scores)
            
        return (attention_scores @value),attention_scores
            
        
    def forward(self,q,k,v,mask):
        query = self.w_q(q)   # (Batch, seq_len, d_model) --> (Batch,seq_len,d_model)
        ke = self.w_k(q)    # (Batch, seq_len, d_model) --> (Batch,seq_len,d_model)
        value = self.w_v(v)   # (Batch, seq_len, d_model) --> (Batch,seq_len,d_model)
        
        
        
        # (Batch, seq_len, d_model) --> # (Batch, seq_len, h, d_k) --> # (Batch,h, seq_len, d_k)
        query= query.view(query.shape[0],query,shape[1],self.h,self.d_k).transpose(1,2)
        key= key.view(key.shape[0],key,shape[1],self.h,self.d_k).transpose(1,2)
        value= value.view(value.shape[0],value,shape[1],self.h,self.d_k).transpose(1,2)
        
        x,self.attention_scores=MultiHeadAttentionBlock.attention(query,key,value,mask,self.dropout)
        
        #(Batch,h, seq_len, d_k) -->(Batch, seq_len,h, d_k)-->(Batch,h, seq_len, d_k)
        x=x.transpose(1,2).contiguous().view(x.shape[0],-1,self.h *self.d_k)
         #(Batch,h, seq_len, d_model) --> #(Batch,h, seq_len, d_model)
        return self.w_o(x)
        
        

        
        
        

In [8]:
class ResidualConnection(nn.Module):
    def __init__(self,dropout:float)->None:
        super().__init__()
        self.dropout=nn.Dropout(dropout)
        self.norm = LayerNormalization()
        
    
    def forward(self,x,sublayer):
        return x+self.dropout(sublayer(self.norm(x)))
        

In [9]:
class EncoderBlock(nn.Module):
    def __init__(self, self_attention_block: MultiHeadAttentionBlock, feed_forward_block: FeedForwardBlock,dropout:float) -> None:
        super().__init__()
        self.self_attention_block=self_attention_block
        self.feedforward_block= feedforward_block
        self.residual_connection=nn.ModuleList([ResidualConnection(dropout) for _ in range(2)])
        
    def forward(self,x ,src_mask):
        
        x=self.residual_connections[0](x,lambda x: self.self_attention_block(x,x,x,src_mask))
        x=self.residual_connections[1](x,self.feed_forward_block)
        return x
        

In [10]:
class Encoder(nn.Module):
    def __init__(self,layers:nn.ModuleList) -> None:
        super().__init__()
        self.layers= layers
        self.norm =LayerNormalization()
    
    def forward(self,x,mask):
        
        for layer in self.layers:
            x=layer(x,mask)
            
        return self.norm(x)
        
        

In [2]:
pip install --upgrade tensorflow keras


Requirement already up-to-date: tensorflow in /home/naseem_fordham/.local/lib/python3.8/site-packages (2.13.1)
Collecting keras
  Using cached keras-2.15.0-py3-none-any.whl (1.7 MB)
Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3
  Using cached protobuf-4.25.0-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
[31mERROR: tensorflow 2.13.1 has requirement keras<2.14,>=2.13.1, but you'll have keras 2.15.0 which is incompatible.[0m
[31mERROR: wandb 0.12.21 has requirement protobuf<4.0dev,>=3.12.0, but you'll have protobuf 4.25.0 which is incompatible.[0m
[31mERROR: tensorboardx 2.6 has requirement protobuf<4,>=3.8.0, but you'll have protobuf 4.25.0 which is incompatible.[0m
Installing collected packages: keras, protobuf
  Attempting uninstall: keras
    Found existing installation: keras 2.13.1
    Uninstalling keras-2.13.1:
      Successfully uninstalled keras-2.13.1
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.12

In [3]:
!pip install keras==2.13.1 grpcio==1.48.2 protobuf==3.12.0 wandb==0.12.21 tensorboardx==2.6


Collecting keras==2.13.1
  Using cached keras-2.13.1-py3-none-any.whl (1.7 MB)
Collecting protobuf==3.12.0
  Using cached protobuf-3.12.0-cp38-cp38-manylinux1_x86_64.whl (1.3 MB)
[31mERROR: tensorflow 2.13.1 has requirement protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you'll have protobuf 3.12.0 which is incompatible.[0m
[31mERROR: tensorboard 2.13.0 has requirement protobuf>=3.19.6, but you'll have protobuf 3.12.0 which is incompatible.[0m
[31mERROR: google-api-core 2.14.0 has requirement protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0.dev0,>=3.19.5, but you'll have protobuf 3.12.0 which is incompatible.[0m
[31mERROR: googleapis-common-protos 1.58.0 has requirement protobuf!=3.20.0,!=3.20.1,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you'll have protobuf 3.12.0 which is incompatible.[0m
Installing collected packages: keras, protobuf
  Attempting uninstall: keras
    Found e

In [5]:
!pip install --upgrade protobuf==3.20.3


Collecting protobuf==3.20.3
  Using cached protobuf-3.20.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.0 MB)
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.12.0
    Uninstalling protobuf-3.12.0:
      Successfully uninstalled protobuf-3.12.0
Successfully installed protobuf-3.20.3


In [6]:
import tensorflow as tf
import keras

print("TensorFlow version:", tf.__version__)
print("Keras version:", keras.__version__)

  from pandas.core.computation.check import NUMEXPR_INSTALLED


TensorFlow version: 2.13.1
Keras version: 2.13.1


In [7]:
import os
import random
import pickle
from timeit import default_timer as timer
from tqdm import tqdm, trange

import re
import nltk
nltk.download("punkt")
from nltk.corpus import stopwords
nltk.download("stopwords")
from string import punctuation

import pandas as pd
pd.set_option("display.max_rows",20)
pd.set_option("display.max_columns", None)
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.utils import class_weight
from sklearn.manifold import TSNE


from transformers import TFAutoModel, BertTokenizerFast, BertModel
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import callbacks as cb
from tensorflow.keras.models import Model, Sequential

from tensorflow.keras.constraints import MaxNorm
from tensorflow import keras
from keras.layers import Input, Concatenate, Flatten, Embedding, Dense, Dropout, LSTM
from keras.models import Model, Sequential
from keras.layers import Lambda

[nltk_data] Downloading package punkt to
[nltk_data]     /home/naseem_fordham/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/naseem_fordham/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
