# Sentiment Analysis - Keras and Embeding layer

This notebooks contains the training, evaluation and predictions of a classification for sentiment analysis using keras and embedding layer

### Imports

In [None]:
import pandas as pd
import numpy as np

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt


In [None]:
from keras import models
from keras import layers

### Read and plot target

In [None]:
df = pd.read_csv('data.csv')
df.head()

In [None]:
df.Sentiment.value_counts()

### Training and experimentation

In [None]:
X_train, x_test, Y_train, y_test = train_test_split( df.Sentence, df.Sentiment, test_size=0.2, random_state=13)
print(f'Train shapes {X_train.shape}, {Y_train.shape}')
print(f'Test shapes {x_test.shape}, {y_test.shape}')

In [None]:
le = LabelEncoder()
y_train_le = le.fit_transform(Y_train)
y_test_le = le.transform(y_test)


In [None]:
Y_train_one = pd.get_dummies(Y_train, dtype=int)
Y_test_one = pd.get_dummies(y_test, dtype=int)

In [None]:
tk = tf.keras.layers.TextVectorization(
    max_tokens = 10000,
    standardize = 'lower_and_strip_punctuation',
    output_sequence_length = 50
)

In [None]:

tk.adapt(X_train)

X_train_tokens = tk(X_train)
x_test_tokens = tk(x_test)

#tk.get_vocabulary()


In [None]:

X_train_tokens.shape

In [None]:
emb_model = models.Sequential()
emb_model.add(layers.Embedding(10000, 8, input_length=50))
emb_model.add(layers.Flatten())
emb_model.add(layers.Dense(3, activation='softmax'))
emb_model.summary()

In [None]:

emb_model.compile(optimizer='rmsprop'
                  , loss='categorical_crossentropy'
                  , metrics=['accuracy'])
    
history = emb_model.fit(X_train_tokens,
                       Y_train_one,
                       epochs=20,
                       batch_size=64,
                       validation_data=(x_test_tokens, Y_test_one),
                       verbose=1)


### Evaluate model

In [None]:
def eval_metric(history, metric_name):
    '''
    Function to evaluate a trained model on a chosen metric. 
    Training and validation metric are plotted in a
    line chart for each epoch.
    
    Parameters:
        history : model training history
        metric_name : loss or accuracy
    Output:
        line chart with epochs of x-axis and metric on
        y-axis
    '''
    metric = history.history[metric_name]
    val_metric = history.history['val_' + metric_name]

    e = range(1, 20 + 1)

    plt.plot(e, metric, 'bo', label='Train ' + metric_name)
    plt.plot(e, val_metric, 'b', label='Validation ' + metric_name)
    plt.legend()
    plt.show()

In [None]:
eval_metric(history, 'accuracy')