In [None]:
!pip install tensorflow
#installing tensorflow since it has a set of libraries and tools that support the building process of machine learning models.

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv1D, Dense, Dropout, Embedding, GlobalMaxPooling1D
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

# Load the dataset
df = pd.read_csv('Bhaav-Dataset.csv', encoding='utf-8')

# Preprocess the dataset
X = df['Sentences'].values
y = df['Annotation'].values
# Tokenize the text data
tokenizer = Tokenizer(num_words=5000, oov_token='<OOV>')
tokenizer.fit_on_texts(X)
word_index = tokenizer.word_index
X = tokenizer.texts_to_sequences(X)

# Pad the sequences to a fixed length
max_length = 100
X = pad_sequences(X, maxlen=max_length, padding='post')

# Convert the labels to one-hot encoding
num_classes = len(set(y))
y = tf.keras.utils.to_categorical(y, num_classes)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the CNN architecture
model = Sequential()
model.add(Embedding(5000, 32, input_length=max_length))
model.add(Conv1D(filters=32, kernel_size=3, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax')) # there is dropout in this model and

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model on the test set
results = model.evaluate(X_test, y_test)
print(results)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[4.670283317565918, 0.49445948004722595]


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense

# Load the dataset from the .csv file
data = pd.read_csv('Bhaav-Dataset.csv')

# Split the dataset into input sentences and corresponding labels
sentences = data['Sentences'].values
labels = data['Annotation'].values

# Perform label encoding on the emotion labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the dataset into training and testing sets
train_sentences, test_sentences, train_labels, test_labels = train_test_split(sentences, labels, test_size=0.2, random_state=42)

# Tokenize the text and convert sentences to sequences
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(train_sentences)
train_sequences = tokenizer.texts_to_sequences(train_sentences)
test_sequences = tokenizer.texts_to_sequences(test_sentences)

# Pad the sequences to have a consistent length
max_seq_length = max([len(seq) for seq in train_sequences + test_sequences])
train_data = pad_sequences(train_sequences, maxlen=max_seq_length)
test_data = pad_sequences(test_sequences, maxlen=max_seq_length)

# Define the CNN model
model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=100, input_length=max_seq_length))
model.add(Conv1D(filters=128, kernel_size=5, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=len(label_encoder.classes_), activation='softmax')) #model architecture lacks dropout

# Compile and train the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(train_data, train_labels, epochs=10, batch_size=32, validation_data=(test_data, test_labels))

# Evaluate the model on the test set
loss, accuracy = model.evaluate(test_data, test_labels)
print(f'Test Loss: {loss:.4f}')
print(f'Test Accuracy: {accuracy:.4f}')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 2.3246
Test Accuracy: 0.5304


In [None]:
from sklearn.metrics import classification_report

# Generate predictions for the test set
y_pred = model.predict(test_sentences)
y_pred = np.argmax(y_pred, axis=1)

# Convert one-hot encoded labels back to categorical labels
y_true = np.argmax(y_test, axis=1)

# Compute classification report
report = classification_report(y_true, y_pred)

print(report)



ValueError: ignored

#Approach 2

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense


In [None]:
df = pd.read_csv("Bhaav-Dataset.csv")


In [None]:
# Split the data into input (text) and target (emotion) columns
X = df["Sentences"].values
y = df["Annotation"].values

# Perform tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X)
X = tokenizer.texts_to_sequences(X)

# Pad sequences to ensure equal length
max_length = max([len(x) for x in X])
X = pad_sequences(X, maxlen=max_length)

# Convert emotion labels to one-hot vectors
label_map = {label: idx for idx, label in enumerate(np.unique(y))}
y = np.array([label_map[label] for label in y])
num_classes = len(label_map)
y = tf.keras.utils.to_categorical(y, num_classes)


In [None]:
#split the dataset into train and test 
train_ratio = 0.8
split_idx = int(train_ratio * len(X))

X_train = X[:split_idx]
y_train = y[:split_idx]
X_test = X[split_idx:]
y_test = y[split_idx:]


In [None]:
#building the CNN model
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 100
filters = 128
kernel_size = 3

model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_length))
model.add(Conv1D(filters, kernel_size, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [None]:
model.save('emotion_detection_model.h5')

In [None]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 126, 100)          1806200   
                                                                 
 conv1d_2 (Conv1D)           (None, 124, 128)          38528     
                                                                 
 global_max_pooling1d_2 (Glo  (None, 128)              0         
 balMaxPooling1D)                                                
                                                                 
 dense_4 (Dense)             (None, 5)                 645       
                                                                 
Total params: 1,845,373
Trainable params: 1,845,373
Non-trainable params: 0
_________________________________________________________________


In [None]:
epochs = 10
batch_size = 32

model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f987aacc9d0>

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)


Test Loss: 2.5433542728424072
Test Accuracy: 0.5328736901283264


In [None]:
model = tf.keras.models.load_model("emotion_detection_model.h5")


In [None]:
from sklearn.metrics import classification_report

# Generate predictions for the test set
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)

# Convert one-hot encoded labels back to categorical labels
y_true = np.argmax(y_test, axis=1)

# Compute classification report
report = classification_report(y_true, y_pred, target_names=emotion_categories)

print(report)




NameError: ignored

In [None]:
train_sentences, test_sentences, train_labels, test_labels = train_test_split(sentences, labels, test_size=0.2, random_state=42)


In [None]:
def predict_emotion(sentence):
    # Tokenize the sentence
    sentence = tokenizer.texts_to_sequences([sentence])
    sentence = pad_sequences(sentence, maxlen=max_length)

    # Predict the emotion
    emotion_probabilities = model.predict(sentence)[0]
    predicted_emotion = np.argmax(emotion_probabilities)
    
    # Map the predicted label to the corresponding emotion
    label_map_reverse = {v: k for k, v in label_map.items()}
    predicted_emotion = label_map_reverse[predicted_emotion]
    
    return predicted_emotion


In [None]:
sentence = "यह एक उदाहरण है"
predicted_emotion = predict_emotion(sentence)
print("Predicted Emotion:", predicted_emotion)


NameError: ignored

Approach 3

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense

# Load the Bhaav dataset
data = pd.read_csv('Bhaav-Dataset.csv')

# Preprocess the data
sentences = data['Sentences'].values
labels = data['Annotation'].values

# Encode the labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the data into train and test sets
train_sentences, test_sentences, train_labels, test_labels = train_test_split(sentences, labels, test_size=0.2, random_state=42)

# Tokenize the sentences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_sentences)

train_sequences = tokenizer.texts_to_sequences(train_sentences)
test_sequences = tokenizer.texts_to_sequences(test_sentences)

# Pad sequences to a fixed length
max_seq_length = 100
train_data = pad_sequences(train_sequences, maxlen=max_seq_length)
test_data = pad_sequences(test_sequences, maxlen=max_seq_length)

# Define the CNN model
embedding_dim = 100
num_filters = 128
filter_sizes = [3, 4, 5]

model = Sequential()
model.add(Embedding(len(tokenizer.word_index) + 1, embedding_dim, input_length=max_seq_length))
model.add(Conv1D(num_filters, 3, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(128, activation='relu'))
model.add(Dense(len(label_encoder.classes_), activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
batch_size = 64
epochs = 10
model.fit(train_data, train_labels, batch_size=batch_size, epochs=epochs, validation_data=(test_data, test_labels))

# Perform emotion prediction
text = "सुना है यहाँ मुर्दो की खोपडिया दौड़ती है "
input_sequence = tokenizer.texts_to_sequences([text])
input_data = pad_sequences(input_sequence, maxlen=max_seq_length)
predictions = model.predict(input_data)
emotion_labels = label_encoder.inverse_transform(np.argmax(predictions, axis=1))

print("Input Text:", text)
print("Predicted Emotion:", emotion_labels[0])


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Input Text: सुना है यहाँ मुर्दो की खोपडिया दौड़ती है 
Predicted Emotion: 3


Approach 4

In [None]:
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
from sklearn.preprocessing import LabelEncoder

# Load the data
data = pd.read_csv('Bhaav-Dataset.csv')

# Preprocess the data
texts = data['Sentences'].values
labels = data[['Annotation']].values

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences
max_length = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, maxlen=max_length)

# Label encoding
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels.argmax(axis=1))

# Define the CNN model
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 100

model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_length))
model.add(Conv1D(128, 5, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(64, activation='relu'))
model.add(Dense(5, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(padded_sequences, encoded_labels, epochs=10, validation_split=0.2)

# Predict the emotions for all sentences
predicted_labels = model.predict_classes(padded_sequences)

# Convert the predicted labels back to emotion labels
predicted_emotions = label_encoder.inverse_transform(predicted_labels)

# Save the predicted emotions to a .txt file
with open('predicted_emotions.txt', 'w') as file:
    for label in predicted_emotions:
        file.write(label + '\n')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


AttributeError: ignored

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense

# Load the dataset
dataset = pd.read_csv('Bhaav-Dataset.csv')

# Preprocess the dataset
X = dataset['Sentences']
y = dataset['Annotation']

# Map numerical labels to emotions
emotion_labels = {0: 'angry', 1: 'joy', 2: 'sad', 3: 'surprise', 4: 'neutral'}
y = y.map(emotion_labels)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train)
vocab_size = len(tokenizer.word_index) + 1

X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)

max_length = max(len(sequence) for sequence in X_train)
X_train = pad_sequences(X_train, maxlen=max_length, padding='post')
X_test = pad_sequences(X_test, maxlen=max_length, padding='post')

# Define the CNN model
embedding_dim = 100

model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_length))
model.add(Conv1D(128, 5, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(64, activation='relu'))
model.add(Dense(len(emotion_labels), activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=16, validation_data=(X_test, y_test))

# Predict the emotions for the test set
predicted_labels = model.predict_classes(X_test)
predicted_emotions = [emotion_labels[label] for label in predicted_labels]

# Save the predicted emotions in a text file
with open('predicted_emotions.txt', 'w') as file:
    for emotion in predicted_emotions:
        file.write(emotion + '\n')


Epoch 1/10


ValueError: ignored

##Approach 5


In [None]:
import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt

import sklearn
from sklearn.preprocessing import LabelEncoder

!pip install unidecode
!pip install openpyxl

import pandas as pd
from pandas import read_excel
import numpy as np
import re
from re import sub
import multiprocessing
from unidecode import unidecode
import os
from time import time 
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Embedding, Flatten, Conv1D, BatchNormalization
from tensorflow.keras.optimizers import SGD,Adam
from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
import tensorflow.keras.backend as K
import h5py
import csv
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold

In [None]:
dataset_cricket = pd.read_excel(io="Cricket.xlsx", sheet_name="Sheet1", engine='openpyxl')
dataset_cricket.head()

In [None]:
sns.countplot(dataset_cricket['Polarity'])

In [None]:
def text_to_word_list(text):
    text = text.split()
    return text

def replace_strings(text):
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           u"\u00C0-\u017F"          #latin
                           u"\u2000-\u206F"          #generalPunctuations
                               
                           "]+", flags=re.UNICODE)
    english_pattern=re.compile('[a-zA-Z0-9]+', flags=re.I)
    #latin_pattern=re.compile('[A-Za-z\u00C0-\u00D6\u00D8-\u00f6\u00f8-\u00ff\s]*',)
    
    text=emoji_pattern.sub(r'', text)
    text=english_pattern.sub(r'', text)

    return text

def remove_punctuations(my_str):
    # define punctuation
    punctuations = '''````£|¢|Ñ+-*/=EROero৳০১২৩৪৫৬৭৮৯012–34567•89।!()-[]{};:'"“\’,<>./?@#$%^&*_~‘—॥”‰⚽️✌�￰৷￰'''
    
    no_punct = ""
    for char in my_str:
        if char not in punctuations:
            no_punct = no_punct + char

    # display the unpunctuated string
    return no_punct



def joining(text):
    out=' '.join(text)
    return out

def preprocessing(text):
    out=remove_punctuations(replace_strings(text))
    return out

In [None]:
dataset_cricket['Text'] = dataset_cricket.Text.apply(lambda x: preprocessing(str(x)))
dataset_cricket.reset_index(drop=True, inplace=True)
enc = LabelEncoder()
dataset_cricket['Polarity'] = enc.fit_transform(dataset_cricket['Polarity'])

In [None]:
train1, test1 = train_test_split(dataset_cricket,random_state=69, test_size=0.2)
training_sentences_cricket = []
testing_sentences_cricket = []



train_sentences_cricket=train1['Text'].values
train_labels_cricket=train1['Polarity'].values
for i in range(train_sentences_cricket.shape[0]): 
    #print(train_sentences[i])
    x=str(train_sentences_cricket[i])
    training_sentences_cricket.append(x)
    
training_sentences_cricket=np.array(training_sentences_cricket)





test_sentences_cricket=test1['Text'].values
test_labels_cricket=test1['Polarity'].values

for i in range(test_sentences_cricket.shape[0]): 
    x=str(test_sentences_cricket[i])
    testing_sentences_cricket.append(x)
    
testing_sentences_cricket=np.array(testing_sentences_cricket)


train_labels_cricket=tf.keras.utils.to_categorical(train_labels_cricket)


test_labels_cricket=tf.keras.utils.to_categorical(test_labels_cricket)
print("Training Set Length: "+str(len(train1)))
print("Testing Set Length: "+str(len(test1)))
print("training_sentences shape: "+str(training_sentences_cricket.shape))
print("testing_sentences shape: "+str(testing_sentences_cricket.shape))
print("train_labels shape: "+str(train_labels_cricket.shape))
print("test_labels shape: "+str(test_labels_cricket.shape))

In [None]:
print(training_sentences_cricket[1])
print(train_labels_cricket[0])

In [None]:
vocab_size = len(training_sentences_cricket)+1
embedding_dim = 100
max_length = 100
trunc_type='post'
oov_tok = "<OOV>"
print(training_sentences_cricket.shape)
print(train_labels_cricket.shape)

In [None]:
tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences_cricket)
word_index = tokenizer.word_index
print(len(word_index))
print("Word index length:"+str(len(tokenizer.word_index)))
sequences = tokenizer.texts_to_sequences(training_sentences_cricket)
padded = pad_sequences(sequences,maxlen=max_length, truncating=trunc_type)


test_sequences = tokenizer.texts_to_sequences(testing_sentences_cricket)
testing_padded = pad_sequences(test_sequences,maxlen=max_length)

In [None]:
print("Sentence :--> \n")
print(training_sentences_cricket[2]+"\n")
print("Sentence Tokenized and Converted into Sequence :--> \n")
print(str(sequences[2])+"\n")
print("After Padding the Sequence with padding length 100 :--> \n")
print(padded[2])

In [None]:
print("Padded shape(training): "+str(padded.shape))
print("Padded shape(testing): "+str(testing_padded.shape))

In [None]:
def precision(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def recall(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def f1_score(y_true, y_pred):
    pr = precision(y_true, y_pred)
    rec = recall(y_true, y_pred)
    f1_score = 2 * (pr * rec) / (pr + rec)
    return f1_score

In [None]:
train_labels_cricket #padded

In [None]:
# from tensorflow.keras.layers import GlobalMaxPooling1D

detection=Sequential()

detection.add(Embedding(vocab_size, embedding_dim, input_length=max_length))

#1 -convolutional layer-1
detection.add(Conv1D(64 ,kernel_size=3))
detection.add(BatchNormalization())
detection.add(Activation('relu'))
# detection.add(GlobalMaxPooling1D())
detection.add(Dropout(0.1))

#2 -convolutional layer-2
detection.add(Conv1D(96,kernel_size=3))
detection.add(BatchNormalization())
detection.add(Activation('relu'))
# detection.add(GlobalMaxPooling1D())
# detection.add(Dropout(0.2))

#2 -convolutional layer-2
detection.add(Conv1D(128,kernel_size=3))
detection.add(BatchNormalization())
detection.add(Activation('relu'))
# detection.add(GlobalMaxPooling1D())
detection.add(Dropout(0.15))


# #2 -convolutional layer-2
# detection.add(Conv1D(256,kernel_size=3))
# detection.add(BatchNormalization())
# detection.add(Activation('relu'))
# # detection.add(MaxPooling2D(pool_size=(2,2)))
# # detection.add(GlobalMaxPooling1D())
# detection.add(Dropout(0.2))

# #3 -convolutional layer-3
# detection.add(Conv1D(384,kernel_size=3))
# detection.add(BatchNormalization())
# detection.add(Activation('relu'))
# # detection.add(GlobalMaxPooling1D())
# # detection.add(MaxPooling2D(pool_size=(2,2)))
# # detection.add(UpSampling2D(input_shape=(512, 3, 3)))
# detection.add(Dropout(0.2))

# #3 -convolutional layer-3
# detection.add(Conv1D(512,kernel_size=3))
# detection.add(BatchNormalization())
# detection.add(Activation('relu'))
# # detection.add(GlobalMaxPooling1D())
# # detection.add(MaxPooling2D(pool_size=(2,2)))
# # detection.add(UpSampling2D(input_shape=(512, 3, 3)))
# detection.add(Dropout(0.3))


# detection.add(Flatten())
# detection.add(Dense(512))
# detection.add(BatchNormalization())
# detection.add(Activation('relu'))

# detection.add(Flatten())
# detection.add(Dense(256))
# detection.add(BatchNormalization())
# detection.add(Activation('relu'))

detection.add(Flatten())
detection.add(Dense(128))
detection.add(BatchNormalization())
detection.add(Activation('relu'))

detection.add(Flatten())
detection.add(Dense(64))
detection.add(BatchNormalization())
detection.add(Activation('relu'))

detection.add(Dense(3,activation='sigmoid'))
optimum=Adam(lr=0.00001)
detection.summary()
detection.compile(optimizer=optimum,loss='binary_crossentropy',metrics=['accuracy', precision, recall])

In [None]:
from sklearn.model_selection import train_test_split

# Split the data
x_train, x_valid, y_train, y_valid = train_test_split(padded, train_labels_cricket, test_size=0.15, shuffle= True) #data, label

In [None]:

model_cricket = detection.fit(x_train, y_train,shuffle=True,epochs=30,batch_size=4,validation_data=(x_valid,y_valid))

In [None]:
detection.evaluate(x=testing_padded,y=test_labels_cricket,verbose=1)
# loss: - accuracy: - recall:  - precision:

In [None]:
plt.plot(model_cricket.history['loss'], label='loss_train')
plt.plot(model_cricket.history['val_loss'], label='loss_val')
# plt.plot(mod.history['accuracy'], label='acc_train')
# plt.plot(mod.history['val_accuracy'], label='acc_val')
plt.legend()
plt.title('Train Val_Loss in Proposed Neural Network')
plt.show()
plt.savefig('LossVal_Loss')

In [None]:
plt.plot(model_cricket.history['accuracy'], label='train acc')
plt.plot(model_cricket.history['val_accuracy'], label='val acc')
plt.legend()
plt.title('TrainVal_Acc in Proposed Neural Network')
plt.show()
plt.savefig('AccVal_Acc')

##Approach 5


In [None]:
#preparing y

from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

le = LabelEncoder()
labelEncode = le.fit_transform(data["category"])
print("LabelEncode")
print(labelEncode)
categorical_y = to_categorical(labelEncode)
print("To_Categorical")
print(categorical_y)

In [None]:
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences

#preparing x for CNN
MAX_FEATURES = 20001

onehot_corpus = []
for text in textList:
    onehot_corpus.append(one_hot(text,MAX_FEATURES))
    
maxTextLen = 0
for text in textList:
    word_token=word_tokenize(text)
    if(maxTextLen < len(word_token)):
        maxTextLen = len(word_token)
        
print("Max number of words : ",maxTextLen)

padded_corpus=pad_sequences(onehot_corpus,maxlen=maxTextLen,padding='post')
x_train2,x_test2,y_train2,y_test2 = train_test_split(padded_corpus,categorical_y,test_size=0.33,random_state=42)

In [None]:
from keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten
def build_cnn_model():
    model = Sequential()
    
    model.add(Embedding(MAX_FEATURES, 100, input_length=maxTextLen))


    model.add(Conv1D(64, 2, padding='same', activation='relu'))
    model.add(MaxPooling1D(2))
    #model.add(MaxPooling1D(2))
    
    model.add(Flatten())
    
    model.add(Dense(units=1024,activation="relu"))
    model.add(Dense(units=512,activation="relu"))
    
    model.add(Dense(units=y_train2.shape[1],activation="softmax"))
    
    optimizer = Adam(lr=0.000055,beta_1=0.9,beta_2=0.999)
    
    model.compile(optimizer=optimizer,metrics=["accuracy"],loss=categorical_crossentropy)
    return model

In [None]:
cnn_model = build_cnn_model()
plot_model(cnn_model,show_shapes=True)

In [None]:
cnn_history = cnn_model.fit(x_train2,y_train2,epochs=10,batch_size=1280,shuffle=True)
ypred2 = cnn_model.predict(x_test2)

In [None]:
cnn_accuracy = accuracy_score(y_test2.argmax(axis=-1),ypred2.argmax(axis=-1))
#print("CNN Accuracy:",cnn_accuracy)
cnn_cn = confusion_matrix(y_test2.argmax(axis=-1),ypred2.argmax(axis=-1))
plt.subplots(figsize=(18,14))
sns.heatmap(cnn_cn,annot=True,fmt="1d",cbar=False,xticklabels=le.classes_,yticklabels=le.classes_)
plt.title("CNN Accuracy: {}".format(cnn_accuracy),fontsize=50)
plt.xlabel("Predicted",fontsize=15)
plt.ylabel("Actual",fontsize=15)
plt.show()

In [None]:
fig3, axe1 = plt.subplots(nrows=1, ncols=2, figsize=(15,5))
axe1[0].plot(cnn_history.history["accuracy"],label="accuracy",color="blue")
axe1[1].plot(cnn_history.history["loss"],label="loss",color="red")
axe1[0].title.set_text("CNN Accuracy")
axe1[1].title.set_text("CNN Loss")
axe1[0].set_xlabel("Epoch")
axe1[1].set_xlabel("Epoch")
axe1[0].set_ylabel("Rate")
plt.show()

In [None]:
def cnn_predict(text):
    puretext = leadMyWord(text)
    onehottext = one_hot(puretext,MAX_FEATURES)
    text_pad = pad_sequences([onehottext],maxlen=maxTextLen,padding='post')
    predicted = cnn_model.predict(text_pad)
    predicted_category = predicted.argmax(axis=1)
    return le.classes_[predicted_category]
    
for _ in range(10):
    randint = np.random.randint(len(data))
    text = data.text[randint]  
    print("  Text")
    print("-"*8)
    print(text)
    print("-"*20)
    print("Actual Category: {}".format(data.category[randint]))
    print("ANN Predicted Category: {}".format(ann_predict(text)[0]))
    print("CNN Predicted Category: {}".format(cnn_predict(text)[0]))
    print("*"*50)

In [None]:
#Let me try it too
def predict_print(text):
    print("  Text")
    print("-"*8)
    print(text)
    print("-"*20)
    print("ANN Predicted Category: {}".format(ann_predict(text)[0]))
    print("CNN Predicted Category: {}".format(cnn_predict(text)[0]))
    print("*"*50)
myText = "Yemeğin içinden kıl çıktı, gitmenizi önermiyorum." # hair came out of the dish, I don't suggest you go
predict_print(myText)
myText = "Tuş bozuk." # Key Broken
predict_print(myText)

In [None]:
num_epochs = 100
batch_size = 100
TensorFlow Session
with tf.Session() as sess:
    # Initialize all variables
    sess.run(tf.global_variables_initializer())
    
    # Add the model graph to TensorBoard
    writer.add_graph(sess.graph)
    
    # Loop over number of epochs
    for epoch in range(num_epochs):
        
        start_time = time.time()
        train_accuracy = 0
        
        for batch in range(0, int(len(data.train.labels)/batch_size)):
            
            # Get a batch of images and labels
            x_batch, y_true_batch = data.train.next_batch(batch_size)
            
            # Put the batch into a dict with the proper names for placeholder variables
            feed_dict_train = {x: x_batch, y_true: y_true_batch}
            
            # Run the optimizer using this batch of training data.
            sess.run(optimizer, feed_dict=feed_dict_train)
            
            # Calculate the accuracy on the batch of training data
            train_accuracy += sess.run(accuracy, feed_dict=feed_dict_train)
            
            # Generate summary with the current batch of data and write to file
            summ = sess.run(merged_summary, feed_dict=feed_dict_train)
            writer.add_summary(summ, epoch*int(len(data.train.labels)/batch_size) + batch)
        
          
        train_accuracy /= int(len(data.train.labels)/batch_size)
        
        # Generate summary and validate the model on the entire validation set
        summ, vali_accuracy = sess.run([merged_summary, accuracy], feed_dict={x:data.validation.images, y_true:data.validation.labels})
        writer1.add_summary(summ, epoch)
        

        end_time = time.time()
        
        print("Epoch "+str(epoch+1)+" completed : Time usage "+str(int(end_time-start_time))+" seconds")
        print("\tAccuracy:")
        print ("\t- Training Accuracy:\t{}".format(train_accuracy))
        print ("\t- Validation Accuracy:\t{}".format(vali_accuracy))