<a href="https://colab.research.google.com/github/saatweek/Coronavirus_tweets_NLP_Text_Classification/blob/master/Corona_Tweets_Classification_(1D_Convolutions).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Mounting Google Drive to this colab notebook

from google.colab import drive

drive.mount('/content/drive')

#importing all the dependencies

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

#training file and test file paths

train_csv_path = "/content/drive/My Drive/Colab Notebooks/Personal Projects/Corona tweets/Corona_NLP_train.csv"
test_csv_path = '/content/drive/My Drive/Colab Notebooks/Personal Projects/Corona tweets/Corona_NLP_test.csv'

#reading the training_file through pandas

training_dataframe = pd.read_csv(train_csv_path, encoding = "ISO-8859-1")
training_dataframe.dropna()
testing_dataframe = pd.read_csv(test_csv_path)
testing_dataframe.dropna()

#making empty lists for storing traning and testing data

training_sentences = []
training_labels = []

testing_sentences = []
testing_labels = []

#selecting the training sentences and appending each data to the previously created training_sentences list
for sentences in training_dataframe.iloc[:, -2]:
    training_sentences.append(sentences)

for labels in training_dataframe.iloc[:, -1]:
    labels = labels.replace(' ', '')
    training_labels.append(labels)

for sentences in testing_dataframe.iloc[:, -2]:
    testing_sentences.append(sentences)

for labels in testing_dataframe.iloc[:, -1]:
    labels = labels.replace(' ', '')
    testing_labels.append(labels)

#Hyperparameters
vocab_size = 10000
oov_tok = '<OOV>'
max_length = 64
pad_type = 'post'
trunc_type = 'pre'
num_labels = 5
embedding_dims = 16
num_epochs = 20

sentence_tokenizer = Tokenizer(num_words = vocab_size, oov_token = oov_tok)
label_tokenizer = Tokenizer(num_words = num_labels, oov_token = oov_tok)

sentence_tokenizer.fit_on_texts(training_sentences)
label_tokenizer.fit_on_texts(training_labels)

sentence_word_index = sentence_tokenizer.word_index
label_word_index = label_tokenizer.word_index

training_sequence = sentence_tokenizer.texts_to_sequences(training_sentences)
testing_sequence = sentence_tokenizer.texts_to_sequences(testing_sentences)
training_label_sequence = label_tokenizer.texts_to_sequences(training_labels)
testing_label_sequence = label_tokenizer.texts_to_sequences(testing_labels)

padded_training = pad_sequences(training_sequence, maxlen = max_length, padding = pad_type, truncating = trunc_type)
padded_testing = pad_sequences(testing_sequence, maxlen = max_length, padding = pad_type, truncating = trunc_type)

padded_training = np.array(padded_training)
training_label_sequence = np.array(training_label_sequence)
padded_testing = np.array(padded_testing)
testing_label_sequence = np.array(testing_label_sequence)

Mounted at /content/drive


In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dims),
    tf.keras.layers.Conv1D(128, 5, activation='relu'),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(64, activation = 'relu'),
    tf.keras.layers.Dense(5, activation = 'softmax')
])

model.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

history = model.fit(padded_training, training_label_sequence, 
                    validation_data = (padded_testing, testing_label_sequence), 
                    epochs = num_epochs)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
import plotly.graph_objects as go

fig = go.Figure()
x = np.linspace(1,100, 100)
y1 = history.history['accuracy']
y2 = history.history['val_accuracy']
fig.add_trace(go.Scatter(x = x, y = y1, name = 'accuracy')),
fig.add_trace(go.Scatter(x = x, y = y2, name = 'val_accuracy', ))
fig.update_layout(xaxis_title = 'Epochs', yaxis_title = 'accuracy', title = 'Accuracy of Model')
fig.show()