## Task 2: Setup and Imports

1. Installing Hugging Face's nlp package
2. Importing libraries

In [None]:
%pip install nlp
%pip install matplotlib
%pip install tensorflow
%pip install datasets
%pip install tf-keras

In [67]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import nlp
import random
from datasets import load_dataset

%matplotlib inline

def show_history(h):
    epochs_trained = len(h.history['loss'])
    plt.figure(figsize=(16, 6))

    plt.subplot(1, 2, 1)
    plt.plot(range(0, epochs_trained), h.history.get('accuracy'), label='Training')
    plt.plot(range(0, epochs_trained), h.history.get('val_accuracy'), label='Validation')
    plt.ylim([0., 1.])
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(range(0, epochs_trained), h.history.get('loss'), label='Training')
    plt.plot(range(0, epochs_trained), h.history.get('val_loss'), label='Validation')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()


def show_confusion_matrix(y_true, y_pred, classes):
    from sklearn.metrics import confusion_matrix

    cm = confusion_matrix(y_true, y_pred, normalize='true')

    plt.figure(figsize=(8, 8))
    sp = plt.subplot(1, 1, 1)
    ctx = sp.matshow(cm)
    plt.xticks(list(range(0, 6)), labels=classes)
    plt.yticks(list(range(0, 6)), labels=classes)
    plt.colorbar(ctx)
    plt.show()


print('Using TensorFlow version', tf.__version__)

Using TensorFlow version 2.19.0


## Task 3: Importing Data

1. Importing the Tweet Emotion dataset
2. Creating train, validation and test sets
3. Extracting tweets and labels from the examples

In [68]:
dataset = load_dataset('emotion')

In [69]:
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
})

In [70]:
train = dataset['train']
val = dataset['validation']
test = dataset['test']

In [71]:
def get_tweet(data):
    tweets = [x['text'] for x in data]
    lTemp = [x['label'] for x in data]
    labels = []
    for l in lTemp:
        if l == 0:
            labels.append("sadness")
        elif l == 1:
            labels.append("joy")
        elif l == 2:
            labels.append('love')
        elif l == 3:
            labels.append('anger')
        elif l == 4:
            labels.append("fear")
    return tweets, labels


In [72]:
tweets, labels = get_tweet(train)

In [73]:
tweets[0], labels[0]

('i didnt feel humiliated', 'sadness')

## Task 4: Tokenizer

1. Tokenizing the tweets

In [74]:
from tf_keras.preprocessing.text import Tokenizer

In [75]:
tokenizer = Tokenizer(num_words = 10000, oov_token = "<UNK>")
tokenizer.fit_on_texts(tweets)

In [76]:
tokenizer.texts_to_sequences([tweets[0]])

[[2, 139, 3, 679]]

In [77]:
tweets[0]

'i didnt feel humiliated'

## Task 5: Padding and Truncating Sequences

1. Checking length of the tweets
2. Creating padded sequences

## Task 6: Preparing the Labels

1. Creating classes to index and index to classes dictionaries
2. Converting text labels to numeric labels

## Task 7: Creating the Model

1. Creating the model
2. Compiling the model

## Task 8: Training the Model

1. Preparing a validation set
2. Training the model

## Task 9: Evaluating the Model

1. Visualizing training history
2. Prepraring a test set
3. A look at individual predictions on the test set
4. A look at all predictions on the test set