# Sentiment analysis on airlines tweets with Recurret Neural Network
For this model I used 
__[Twitter US Airline Sentiment Dataset](https://www.kaggle.com/crowdflower/twitter-airline-sentiment)__

In [13]:
import pandas as pd
import numpy as np
import keras
from keras import layers
from sklearn.preprocessing import LabelEncoder
from utils import train_test_split, to_sequences

In [4]:
# read the data from file
tweets = pd.read_csv('data/Tweets.csv')
X = tweets['text']
y = tweets['airline_sentiment']

In [5]:
# encode labels
enc = LabelEncoder()
y = pd.DataFrame(enc.fit_transform(y))
y.head()

Unnamed: 0,0
0,1
1,2
2,1
3,0
4,0


In [6]:
# create pandas dataframe from tweets and labels for future splitting
data = pd.concat([X,y], axis=1)
train_data, test_data = train_test_split(data, test_size=0.2,random_state=13)

In [9]:
maxlen  = 30
X_train_raw = train_data['text']
X_test_raw = test_data['text']

y_train = np.array(train_data[0])
y_test = np.array(test_data[0])

# tokenize text and apply padding
X_train, X_test, vocab_size = to_sequences(X_train_raw,X_test_raw, maxlen)


In [10]:
def build_model(vocab_size, maxlen):
    
    model = keras.Sequential()
    
    model.add(layers.Embedding(vocab_size,256,input_length=maxlen))
    
    model.add(layers.LSTM(32, recurrent_dropout=0.3))
    
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dropout(0.5))
    
    model.add(layers.Dense(3,activation='softmax'))
    
    return model

In [15]:
model = build_model(vocab_size, maxlen)

model.compile(loss='sparse_categorical_crossentropy',
              optimizer=keras.optimizers.Adam(lr=0.001),metrics=['accuracy'])

In [16]:
history = model.fit(X_train, y_train, epochs=5,verbose=1,validation_data=(X_test,y_test),batch_size=64)

Train on 11712 samples, validate on 2928 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
