In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from tensorflow.keras.callbacks import EarlyStopping


In [5]:
data = pd.read_csv('training.1600000.processed.noemoticon.csv', encoding='latin-1')

In [6]:
data = data.rename(columns={'0':'Sentiment',
                     '1467810369': 'Tweet ID',
                     'Mon Apr 06 22:19:45 PDT 2009':'Date',
                     'NO_QUERY':'Query',
                     '_TheSpecialOne_':'Username',
                     "@switchfoot http://twitpic.com/2y1zl - Awww, that\'s a bummer.  You shoulda got David Carr of Third Day to do it. ;D":'Tweet'})

In [7]:
def Sentiment_remap(x):
    if x == 0:
        return 0
    else:
        return 1

In [8]:
data['Sentiment'] = data['Sentiment'].apply(Sentiment_remap)

In [9]:
def username_removal(string):
    
    for i in range(string.count('@')): 
        username_begin_index = string.find('@')
        username_end_index = string[username_begin_index::].find(' ') + username_begin_index
    
        string = string.replace(string[username_begin_index:username_end_index+1],' ')
    
    return string                    

In [10]:
data['Tweet'] = data['Tweet'].apply(username_removal)

In [11]:
x = data['Tweet']
y = data['Sentiment']

In [12]:
x_train, x_test, y_train, y_test = train_test_split(x,y,train_size=0.66, stratify=y, random_state=7)

In [13]:
tokenizer = Tokenizer(num_words=10000)

In [14]:
tokenizer.fit_on_texts(x_train)

In [15]:
x_train_seq = tokenizer.texts_to_sequences(x_train)
x_test_seq = tokenizer.texts_to_sequences(x_test)

In [16]:
x_train_seq

[[1,
  668,
  1,
  413,
  6,
  156,
  33,
  5,
  1428,
  615,
  695,
  86,
  1,
  237,
  2,
  9239,
  508,
  1,
  642,
  41],
 [49,
  17,
  299,
  2337,
  4351,
  10,
  5,
  339,
  53,
  3,
  1289,
  196,
  3,
  282,
  77,
  40,
  7,
  82,
  2452,
  132,
  197,
  3716,
  1929,
  14],
 [65, 65, 65, 65, 65, 65, 19, 32, 16, 4, 517, 204, 65, 690, 61, 14, 2513, 4],
 [990, 269, 39, 445, 59, 155],
 [57,
  15,
  15,
  128,
  1,
  428,
  196,
  28,
  41,
  1,
  1172,
  28,
  620,
  9804,
  65,
  5,
  3877,
  644,
  1,
  5258,
  78,
  72,
  5332,
  382,
  73,
  71,
  24],
 [1, 25, 1381, 1, 25, 20, 845, 73, 3, 10, 3, 25, 1, 74, 82, 1449, 535, 4],
 [6784,
  7452,
  62,
  1,
  15,
  19,
  1641,
  3299,
  3731,
  3597,
  103,
  21,
  335,
  34,
  4,
  1668,
  7906],
 [2779, 8, 15, 176, 1, 47, 9],
 [940, 406, 6843, 178, 7, 15, 91, 11, 31, 3, 7226, 91, 2490, 835],
 [9,
  519,
  2,
  46,
  11,
  14,
  13,
  3538,
  44,
  725,
  127,
  7,
  540,
  1264,
  14,
  4,
  12,
  54,
  7,
  33,
  86,
  7,
  283

In [17]:
max_len = 100
x_train_pad = pad_sequences(x_train_seq, maxlen=max_len, padding='post')
x_test_pad = pad_sequences(x_test_seq, maxlen=max_len, padding='post')


In [18]:
x_train_pad

array([[  1, 668,   1, ...,   0,   0,   0],
       [ 49,  17, 299, ...,   0,   0,   0],
       [ 65,  65,  65, ...,   0,   0,   0],
       ...,
       [  1, 162,   7, ...,   0,   0,   0],
       [144,   3, 689, ...,   0,   0,   0],
       [ 29, 343,   0, ...,   0,   0,   0]], dtype=int32)

# MODEL 1 - Potpuno povezane neuronske mreze



In [19]:
# import tensorflow as tf
# tf.logging.set_verbosity(tf.logging.ERROR)

In [20]:
from keras.models import Sequential
from keras.layers import Dense

In [26]:
model = Sequential([
    Embedding(input_dim=10000, output_dim=128, input_length=max_len),
    SpatialDropout1D(0.4),
    LSTM(64, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
])

2024-01-27 21:13:02.974772: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-01-27 21:13:02.975194: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-01-27 21:13:02.975484: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

InternalError: cudaSetDevice() on GPU:0 failed. Status: CUDA-capable device(s) is/are busy or unavailable

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

In [None]:
history = model.fit(x_train_pad, y_train, epochs=10, batch_size=128, validation_split=0.2, callbacks=[early_stopping])

In [None]:
loss, accuracy = model.evaluate(x_test_pad, y_test)

In [None]:
print(f'Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}')