In [38]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Embedding, Dense, SimpleRNN, RNN, LSTM, GRU, SpatialDropout1D


In [4]:
data = pd.read_csv('sentiment140/training.1600000.processed.noemoticon.csv', encoding='latin-1')

In [5]:
data = data.rename(columns={'0':'Sentiment',
                     '1467810369': 'Tweet ID',
                     'Mon Apr 06 22:19:45 PDT 2009':'Date',
                     'NO_QUERY':'Query',
                     '_TheSpecialOne_':'Username',
                     "@switchfoot http://twitpic.com/2y1zl - Awww, that\'s a bummer.  You shoulda got David Carr of Third Day to do it. ;D":'Tweet'})

In [6]:
def Sentiment_remap(x):
    if x == 0:
        return 0
    else:
        return 1

In [7]:
data['Sentiment'] = data['Sentiment'].apply(Sentiment_remap)

In [8]:
def username_removal(string):
    
    for i in range(string.count('@')): 
        username_begin_index = string.find('@')
        username_end_index = string[username_begin_index::].find(' ') + username_begin_index
    
        string = string.replace(string[username_begin_index:username_end_index+1],' ')
    
    return string                    

In [9]:
data['Tweet'] = data['Tweet'].apply(username_removal)

In [10]:
x = data['Tweet']
y = data['Sentiment']

In [174]:
x_train, x_test, y_train, y_test = train_test_split(x,y,train_size=0.03, test_size=0.01, stratify=y, random_state=7)

In [175]:
tokenizer = Tokenizer(num_words=10000)

In [176]:
tokenizer.fit_on_texts(x_train)

In [177]:
x_train_seq = tokenizer.texts_to_sequences(x_train)
x_test_seq = tokenizer.texts_to_sequences(x_test)

In [178]:
x_train_seq[0]

[20, 1017, 177, 712, 2, 3, 305, 1607, 3, 3675, 23, 946, 12, 38, 38]

In [181]:
non_zero = 0
duzina = len(x_train_seq)

for i in range(len(x_train_seq)):
    non_zero += np.count_nonzero(x_train_seq[i])
max_len = int(non_zero/duzina)
max_len

12

In [182]:
x_train_pad = pad_sequences(x_train_seq, maxlen=max_len, padding='post')
x_test_pad = pad_sequences(x_test_seq, maxlen=max_len, padding='post')

# MODEL 1 - Potpuno povezane neuronske mreze



In [183]:
# import tensorflow as tf
# tf.logging.set_verbosity(tf.logging.ERROR)

In [184]:
from keras.models import Sequential
from keras.layers import Dense

In [185]:
model = Sequential([
    Embedding(input_dim=10000, output_dim=128, input_length=max_len),
    #SpatialDropout1D(0.25),
    SimpleRNN(units=128),
    #LSTM(64),
    Dense(1, activation='sigmoid')
])

In [186]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [187]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

In [188]:
history = model.fit(x_train_pad, y_train, epochs=10, batch_size=64, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [189]:
loss, accuracy = model.evaluate(x_test_pad, y_test)



In [None]:
print(f'Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}')

# Modeli sa drugim paddingom

In [191]:
max_len = 50
x_train_pad = pad_sequences(x_train_seq, maxlen=max_len, padding='post')
x_test_pad = pad_sequences(x_test_seq, maxlen=max_len, padding='post')

In [192]:
model1 = Sequential([
    Embedding(input_dim=10000, output_dim=128, input_length=max_len),
    #SpatialDropout1D(0.25),
    SimpleRNN(units=128),
    #LSTM(64),
    Dense(1, activation='sigmoid')
])

In [193]:
model1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [194]:
history1 = model1.fit(x_train_pad, y_train, epochs=10, batch_size=64, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [195]:
loss, accuracy = model1.evaluate(x_test_pad, y_test)

