In [None]:
!pip install transformers

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# importing the Dataset
import pandas as pd
messages = pd.read_csv("/content/drive/MyDrive/INDOML Dataset/Datasets/nlp_vector.csv")
from transformers import pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import MultiHeadAttention, LayerNormalization, Dropout, Layer
from tensorflow.keras.layers import Embedding, Input, GlobalAveragePooling1D, Dense
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential, Model
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.optimizers.legacy import Adam
import numpy as np
import re
import warnings
import re
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

In [None]:
class TransformerBlock(Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = Sequential(
            [Dense(ff_dim, activation="relu"),
             Dense(embed_dim),]
        )
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
class TokenAndPositionEmbedding(Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [None]:
lemmatizer = WordNetLemmatizer()
def lament(a):
    delimiters = '.',' ',',',';','!',':','?','\t','\n','\0'
    regex_pattern = '|'.join(map(re.escape, delimiters))
    splits = re.split(regex_pattern, a)
    lament = []
    for i in splits:
        lament.append(lemmatizer.lemmatize(i))
    return ' '.join(lament)
#messages['Lemmatized'] = messages.apply(lambda row : lament(row['Text']), axis = 1)
messages

In [None]:
def split(a):
  try:
    return a.split()
  except:
    return [a]
messages['Text'].fillna(0,inplace=True)
messages['Listed'] = messages.apply(lambda row : split(row['Text']), axis = 1)
messages

In [None]:
maxlen = 200
X = messages['Listed']
X = tf.keras.preprocessing.sequence.pad_sequences(X, maxlen=maxlen)
y = messages['Label']
X

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
embed_dim = 32  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer
vocab_size=100000

ann_inputs = Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(ann_inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = GlobalAveragePooling1D()(x)
x = Dropout(0.3)(x)
x = Dense(20, activation="relu")(x)
x = Dropout(0.3)(x)
outputs = Dense(16, activation="softmax")(x)

ann_model = Model(inputs=ann_inputs, outputs=outputs)

**RESNET50**

In [None]:
from tensorflow.keras.layers import Input, Lambda, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.applications.resnet50 import ResNet50
#from keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img
from tensorflow.keras.models import Sequential
import numpy as np
from glob import glob
import matplotlib.pyplot as plt

In [None]:
# re-size all the images to this
IMAGE_SIZE = [224,224]
train_path = "/content/drive/MyDrive/INDOML Dataset/Datasets/Train"

In [None]:
resnet = ResNet50(input_shape=IMAGE_SIZE + [3], weights='imagenet', include_top=False)

In [None]:
# don't train existing weights
for layer in resnet.layers:
    layer.trainable = False

In [None]:
# our layers - you can add more if you want
x = Flatten()(resnet.output)

In [None]:
  # useful for getting number of output classes
folders = glob("/content/drive/MyDrive/INDOML Dataset/Datasets/Train/*")
folders

In [None]:
prediction = Dense(len(folders), activation='softmax')(x)
# create a model object
cnn_model = Model(inputs=resnet.input, outputs=prediction)

In [None]:
# Use the Image Data Generator to import the images from the dataset
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)

test_datagen = ImageDataGenerator(rescale = 1./255)

In [None]:
# Make sure you provide the same target size as initialied for the image size
training_set = train_datagen.flow_from_directory("/content/drive/MyDrive/INDOML Dataset/Datasets/Train",
                                                 target_size = (224,224),
                                                 batch_size = 32,
                                                 class_mode = 'categorical')

In [None]:
from tensorflow.keras.layers import Concatenate

merged_model = Concatenate()([ann_model.output, Flatten()(resnet.output)])
merged_model = Dense(32, activation='relu')(merged_model)
merged_model = Dense(16, activation='softmax')(merged_model)

In [None]:
final_model = Model(inputs=[ann_model.input, resnet.input], outputs=merged_model)

In [None]:
a,b = training_set.next()

In [None]:
final_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
final_model.fit([X_train, a], b, epochs=5)