In [None]:
# importing the Dataset
import pandas as pd
messages = pd.read_csv("/content/drive/MyDrive/INDOML Dataset/Datasets/INDOMLNLPTEXT.csv")

In [None]:
!pip install transformers

In [None]:
messages

In [None]:
import re
import nltk
nltk.download('stopwords')

In [None]:
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

In [None]:
corpus = []
for i in range(0, len(messages)):
    review = re.sub('[^a-zA-Z]', ' ', messages['Text'][i])
    review = review.lower()
    review = review.split()

    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
    review = ' '.join(review)
    corpus.append(review)

In [None]:
corpus

In [None]:
# Creating the Bag of Words model
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=2500)
X = cv.fit_transform(corpus).toarray()

In [None]:
y=messages.iloc[:,1].values

**LOGISTIC REGRESSION**

In [None]:
# Train Test Split

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

In [None]:
from sklearn.linear_model import LogisticRegression
spam_detect_model = LogisticRegression().fit(X_train, y_train)

In [None]:
#prediction
y_pred=spam_detect_model.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score,classification_report

In [None]:
score=accuracy_score(y_test,y_pred)
print(score)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_pred,y_test))

**ANN**

In [None]:
import tensorflow as tf

In [None]:
print(tf.__version__)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LeakyReLU,PReLU,ELU
from tensorflow.keras.layers import Dropout


In [None]:
# Initialising the ANN
classifier = Sequential()

In [None]:
# Adding the input layer and the first hidden layer
classifier.add(Dense(units=11,activation='relu'))
classifier.add(Dropout(0.2))

In [None]:
# Adding the input layer and the first hidden layer
classifier.add(Dense(units=2048,activation='relu'))
classifier.add(Dropout(0.2))

In [None]:
# Adding the input layer and the first hidden layer
classifier.add(Dense(units=1024,activation='relu'))
classifier.add(Dropout(0.2))

In [None]:
# Adding the input layer and the first hidden layer
classifier.add(Dense(units=512,activation='relu'))
classifier.add(Dropout(0.2))

In [None]:
# Adding the input layer and the first hidden layer
classifier.add(Dense(units=256,activation='relu'))
classifier.add(Dropout(0.2))

In [None]:
# Adding the input layer and the first hidden layer
classifier.add(Dense(units=16,activation='relu'))
classifier.add(Dropout(0.2))

In [None]:
import pandas as pd
from transformers import pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import MultiHeadAttention, LayerNormalization, Dropout, Layer
from tensorflow.keras.layers import Embedding, Input, GlobalAveragePooling1D, Dense
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential, Model
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.optimizers.legacy import Adam
import numpy as np
import re
import warnings
import re
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

In [None]:
class TransformerBlock(Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = Sequential(
            [Dense(ff_dim, activation="relu"),
             Dense(embed_dim),]
        )
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
class TokenAndPositionEmbedding(Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [None]:
embed_dim = 32  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer
vocab_size=1278
maxlen=2500

inputs = Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = GlobalAveragePooling1D()(x)
x = Dropout(0.1)(x)
x = Dense(20, activation="relu")(x)
x = Dropout(0.1)(x)
outputs = Dense(16, activation="softmax")(x)

model = Model(inputs=inputs, outputs=outputs)

In [None]:
classifier.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
#model_history=classifier.fit(X_train,y_train,validation_split=0.33,batch_size=10,epochs=50)
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

history = model.fit(X_train, y_train,
                    batch_size=64, epochs=2,
                    validation_data=(X_test, y_test)
                   )

In [None]:
# summarize history for loss
import matplotlib.pyplot as plt

plt.plot(model_history.history['loss'])
plt.plot(model_history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# summarize history for accuracy
plt.plot(model_history.history['accuracy'])
plt.plot(model_history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()