In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### FILE PATH

In [None]:
TRAIN_FILE_PATH = '/kaggle/input/ag-news-classification-dataset/train.csv'
TEST_FILE_PATH = '/kaggle/input/ag-news-classification-dataset/test.csv'

This is a classification problem where we need to classify a news article consisting of title and description into following category : 1-World, 2-Sports, 3-Business, 4-Sci/Tech
Here we have sequentional data 

This is a sequential problem - 
Since we have the existing data available we can use bidirectional LSTM for this classification problem.

## Reading the dataframe from the data set.

In [None]:
pd1 = pd.read_csv(TRAIN_FILE_PATH)
pd2 = pd.read_csv(TEST_FILE_PATH)

In [None]:
print(pd1.head())
print(pd2.head())

In [None]:
print(pd1.columns)
print(pd2.columns)

In [None]:
pd1.shape

## Lets analyze data

#### Installing Sweetviz library

In [None]:
! pip install sweetviz

In [None]:
import sweetviz as sv

In [None]:
report = sv.analyze(pd1)

In [None]:
report.show_notebook()

In [None]:
pd1.columns

##  Data Pre-Processing 

In [None]:
## we can combine title and description together before feeding it to bi directional lstm 

X_train  =  pd1['Title']+' '+pd1['Description'] # also removing the class from the training dataset

X_test   =  pd2['Title']+'  '+pd2['Description'] # also removing the class from the training dataset


y_train  =   pd1['Class Index'].apply(lambda x: x-1)  # assigning label of train

y_test =    pd2['Class Index'].apply(lambda x: x-1) # assigning lale of test



In [None]:
## Finding the max no of words in a sentence in complete data set 

max_len = X_train.map(lambda x : len(x.split())).max()

1.Data Generator ( Shuffling the data  ) 
2.Pipeline 

## Lets tokenize the text data set.

Tokenization is one of the most important step of pre processing while modelling the text dataset 

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

vocabulary_size = 10000 # random value
embed_size      = 32    # random value 

tok = Tokenizer(num_words=vocabulary_size)
tok.fit_on_texts(X_train.values)


# Token 
X_train = tok.texts_to_sequences(X_train)
X_test  = tok.texts_to_sequences(X_test)

# Now we need to pad all the sequences based on the max value 

X_train = pad_sequences(X_train,maxlen=max_len)
X_test = pad_sequences(X_test,maxlen=max_len)


## MODEL

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, GlobalMaxPooling1D, Bidirectional

In [None]:
import pandas as pd
import numpy as np

#Data Visualization
import matplotlib.pyplot as plt

#Text Color
from termcolor import colored

#Train Test Split
from sklearn.model_selection import train_test_split

#Model Evaluation
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score
from mlxtend.plotting import plot_confusion_matrix

#Deep Learning
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, GlobalMaxPooling1D, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import plot_model

In [None]:
from tensorflow.keras.layers import Embedding

In [None]:
vocabulary_size = 10000 # random value
embed_size      = 32  # random value

In [None]:
# Implementing a sequential model

model = Sequential()
model.add(Embedding(vocabulary_size,embed_size,input_length = max_len)) #input layer is embedding layer
model.add(Bidirectional(LSTM(128, return_sequences=True)))              # Bidirectinal LSTM
model.add(Bidirectional(LSTM(64, return_sequences=True)))
model.add(GlobalMaxPooling1D())                                         # Flattening layer to reduce everything in a vector form
model.add(Dense(256, activation='relu'))                                                  # Dense layer
model.add(Dropout(0.25))
model.add(Dense(128, activation='relu')) 
model.add(Dropout(0.25))                                                # doing regularization in Neural Network
model.add(Dense(64, activation='relu')) 
model.add(Dropout(0.25))
model.add(Dense(4, activation='softmax'))                               #  we have 4 labels as output


In [None]:
model.summary()

In [None]:
# callbacks = [
#     EarlyStopping(     #EarlyStopping is used to stop at the epoch where val_accuracy does not improve significantly
#         monitor='val_accuracy',
#         min_delta=1e-4,
#         patience=4,
#         verbose=1
#     ),
#     ModelCheckpoint(
#         filepath='weights.h5',
#         monitor='val_accuracy', 
#         mode='max', 
#         save_best_only=True,
#         save_weights_only=True,
#         verbose=1
#     )
# ]

In [None]:
model.compile(loss = 'sparse_categorical_crossentropy',
              optimizer = 'adam',
              metrics = ['accuracy']             
             )

In [None]:
# model.fit(X_train,y_train,batch_size=256,validation_data=(X_test,y_test),epochs=20, 
#           callbacks=callbacks)

model.fit(X_train,y_train,batch_size=256,validation_data=(X_test,y_test),epochs=20)

## We can observe that we can achieve maximum validation accuracy of 90.49 and training accuracy of 98.16

## Lets analyze the accuracy using GRU model

In [None]:
from tensorflow.keras.layers import GRU

In [None]:
model = Sequential()
model.add(Embedding(vocabulary_size,embed_size,input_length = max_len)) #input layer is embedding layer
model.add(Bidirectional(GRU(128, return_sequences=True)))              # Bidirectinal LSTM
model.add(Bidirectional(GRU(64, return_sequences=True)))
model.add(GlobalMaxPooling1D())                                         # Flattening layer to reduce everything in a vector form
model.add(Dense(256, activation='relu'))                                                  # Dense layer
model.add(Dropout(0.25))
model.add(Dense(128, activation='relu')) 
model.add(Dropout(0.25))                                                # doing regularization in Neural Network
model.add(Dense(64, activation='relu')) 
model.add(Dropout(0.25))
model.add(Dense(4, activation='softmax'))                               #  we have 4 labels as output

In [None]:
model.summary()

In [None]:
model.compile(loss = 'sparse_categorical_crossentropy',
              optimizer = 'rmsprop',
              metrics = ['accuracy']             
             )

In [None]:
model.fit(X_train,y_train,batch_size=256,validation_data=(X_test,y_test),epochs=10)

## We can observe that we can achieve maximum validation accuracy of 91.96 and training accuracy of 92.88

In [None]:
# Trying Optimizer as adam
model.compile(loss = 'sparse_categorical_crossentropy',
              optimizer = 'adam',
              metrics = ['accuracy']             
             )

In [None]:
model.fit(X_train,y_train,batch_size=256,validation_data=(X_test,y_test),epochs=10)