In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN, GRU, LSTM, Bidirectional, Dense, Dropout

In [2]:
dt = pd.read_csv('urdu-sentiment-corpus-v1.tsv', delimiter='\t')
dt.columns = ['Tweet', 'Class']
dt['Class'] = dt['Class'].map({'P': 1, 'N': 0})
dt = dt.dropna()
dt

Unnamed: 0,Tweet,Class
0,میں نے ایٹم بم بنایا ھے ۔۔۔۔او بھائی ایٹم بمب ...,1.0
1,چندے سے انقلاب اور عمران خان وزیر اعظم نہیں بن...,0.0
3,"سرچ انجن گوگل کے نائب صدر نے فضا میں ، 130,000...",1.0
4,ابھی تک اسکی لہریں کبھی کبھی آ جاتی ہیں یار :أْ,1.0
5,گندی زبان اور گٹر جیسے دماغ والے جاهل جیالے ه...,0.0
...,...,...
995,اُس آدمی نے اِس سالار کو کافی معقول ٹپ دی ہے ۔,1.0
996,چچا غالب کی روح سے معذرت کے ساتھہم نے مانا کہ ...,1.0
997,واہ جناب واہ! اچھی رہی۔ جناب خود کو فرشتہ سمجو...,1.0
998,اسلام آباد :پی اے ٹی کا دھرنا ختم، صفائی کے کا...,1.0


In [11]:
X = dt['Tweet'].values
y = dt['Class'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
token = Tokenizer()
token.fit_on_texts(X_train)
X_train_sequence = token.texts_to_sequences(X_train)
X_test_sequence = token.texts_to_sequences(X_test)
max_seq_len = max([len(s) for s in X_train_sequence + X_test_sequence])
vocabulary_size = len(token.word_index) + 1
X_train_padded = pad_sequences(X_train_sequence, maxlen=max_seq_len, padding='post')
X_test_padded = pad_sequences(X_test_sequence, maxlen=max_seq_len, padding='post')

def model_creation(modeltype, layernum, dropoutrate):
    mod = Sequential()
    mod.add(Embedding(vocabulary_size, 100, input_length=max_seq_len))
    if modeltype == 'RNN':
        i = 0
        while i < layernum:
            mod.add(SimpleRNN(64, return_sequences=True))
            i += 1
        mod.add(SimpleRNN(64))
    elif modeltype == 'GRU':
        i = 0
        while i < layernum:
            mod.add(GRU(64, return_sequences=True))
            i += 1
        mod.add(GRU(64))
    elif modeltype == 'LSTM':
        i = 0
        while i < layernum:
            mod.add(LSTM(64, return_sequences=True))
            i += 1
        mod.add(LSTM(64))
    elif modeltype == 'BiLSTM':
        i = 0
        while i < layernum:
            mod.add(Bidirectional(LSTM(64, return_sequences=True)))
            i += 1
        mod.add(Bidirectional(LSTM(64)))
    mod.add(Dropout(dropoutrate))
    mod.add(Dense(1, activation='sigmoid'))
    return mod
res= []

In [21]:
for modeltype in ['RNN', 'GRU', 'LSTM', 'BiLSTM']:
    for layernum in [2, 3]:
        for dropoutrate in [0.3, 0.7]:
            print(f"Training {modeltype} with {layernum} Layers and Dropout {dropoutrate}...")
            mod = model_creation(modeltype, layernum, dropoutrate)
            mod.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
            mod.fit(X_train_padded, y_train, epochs=5, batch_size=64, verbose=0)
            y_predicted = np.round(mod.predict(X_test_padded))
            accuracy = accuracy_score(y_test, y_predicted)
            precision = precision_score(y_test, y_predicted)
            recall = recall_score(y_test, y_predicted)
            f1measure = f1_score(y_test, y_predicted)
            res.append({
                'Model': modeltype,
                'Number of Layers': layernum,
                'Dropout': dropoutrate,
                'Accuracy': accuracy,
                'Precision': precision,
                'Recall': recall,
                'F-Score': f1measure
            })

Training RNN with 2 Layers and Dropout 0.3...
Training RNN with 2 Layers and Dropout 0.7...
Training RNN with 3 Layers and Dropout 0.3...
Training RNN with 3 Layers and Dropout 0.7...
Training GRU with 2 Layers and Dropout 0.3...
Training GRU with 2 Layers and Dropout 0.7...
Training GRU with 3 Layers and Dropout 0.3...


  _warn_prf(average, modifier, msg_start, len(result))


Training GRU with 3 Layers and Dropout 0.7...
Training LSTM with 2 Layers and Dropout 0.3...


  _warn_prf(average, modifier, msg_start, len(result))


Training LSTM with 2 Layers and Dropout 0.7...
Training LSTM with 3 Layers and Dropout 0.3...
Training LSTM with 3 Layers and Dropout 0.7...
Training BiLSTM with 2 Layers and Dropout 0.3...
Training BiLSTM with 2 Layers and Dropout 0.7...
Training BiLSTM with 3 Layers and Dropout 0.3...
Training BiLSTM with 3 Layers and Dropout 0.7...


In [22]:
results = pd.DataFrame(res)
print(results)

     Model  Number of Layers  Dropout  Accuracy  Precision    Recall   F-Score
0      RNN                 2      0.3  0.489796   0.474074  0.542373  0.505929
1      RNN                 2      0.7  0.518367   0.500000  0.542373  0.520325
2      RNN                 3      0.3  0.510204   0.491803  0.508475  0.500000
3      RNN                 3      0.7  0.551020   0.534483  0.525424  0.529915
4      GRU                 2      0.3  0.530612   0.600000  0.076271  0.135338
5      GRU                 2      0.7  0.481633   0.481633  1.000000  0.650138
6      GRU                 3      0.3  0.518367   0.000000  0.000000  0.000000
7      GRU                 3      0.7  0.518367   0.000000  0.000000  0.000000
8     LSTM                 2      0.3  0.538776   0.513369  0.813559  0.629508
9     LSTM                 2      0.7  0.587755   0.557047  0.703390  0.621723
10    LSTM                 3      0.3  0.632653   0.618644  0.618644  0.618644
11    LSTM                 3      0.7  0.595918   0.