In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import time
import os
import math
import random
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import re
import string
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import keras.backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Dense, LSTM, Embedding, Dropout, Activation, Flatten, Input, concatenate, Conv1D, GlobalMaxPooling1D, MaxPooling1D

import nltk
nltk.download('stopwords')
nltk.download('wordnet')
from nltk.corpus import stopwords
from bs4 import BeautifulSoup

**Text preparation and load data**

In [None]:
stop = set(stopwords.words('english'))
def clean_data(text):
    #text = BeautifulSoup(text, "html.parser").get_text()
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'\W', ' ', text)
    text = re.sub(r'\s+[a-zA-Z]\s+', ' ', text)
    text = re.sub(r'\^[a-zA-Z]\s+', ' ', text) 
    text = re.sub(r'\s+', ' ', text, flags=re.I)
    
    final = []
    for word in text.split():
        if word.strip().lower() not in stop and word.strip().lower().isalpha():
            final.append(word.strip().lower())
    text = " ".join(final)
    return text

In [None]:
def load_data():
    df_train = pd.read_csv('../input/emotions-dataset-for-nlp/train.txt', names=['Text', 'Emotion'], sep=';')
    df_val = pd.read_csv('../input/emotions-dataset-for-nlp/val.txt', names=['Text', 'Emotion'], sep=';')
    df_test = pd.read_csv('../input/emotions-dataset-for-nlp/test.txt', names=['Text', 'Emotion'], sep=';')
    df_test = df_test[df_test['Emotion'].isin(['sadness','anger','joy','fear'])]
    df_val = df_val[df_val['Emotion'].isin(['sadness','anger','joy','fear'])]
    df_train = df_train[df_train['Emotion'].isin(['sadness','anger','joy','fear'])]

    X_train = df_train['Text'].apply(clean_data)
    y_train = df_train['Emotion']

    X_test = df_test['Text'].apply(clean_data)
    y_test = df_test['Emotion']

    X_val = df_val['Text'].apply(clean_data)
    y_val = df_val['Emotion']
    
    le = LabelEncoder()
    y_train = le.fit_transform(y_train)
    y_test = le.transform(y_test)
    y_val = le.transform(y_val)

    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)
    y_val = to_categorical(y_val)
    
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(pd.concat([X_train, X_test], axis=0))

    sequences_train = tokenizer.texts_to_sequences(X_train)
    sequences_test = tokenizer.texts_to_sequences(X_test)
    sequences_val = tokenizer.texts_to_sequences(X_val)

    X_train = pad_sequences(sequences_train, maxlen=256, truncating='pre')
    X_test = pad_sequences(sequences_test, maxlen=256, truncating='pre')
    X_val = pad_sequences(sequences_val, maxlen=256, truncating='pre')

    vocabSize = len(tokenizer.index_word) + 1
    print(f"Vocabulary size = {vocabSize}")
    
    return X_train, y_train, X_val, y_val, X_test, y_test, vocabSize

**Count F-score**

In [None]:
def get_f1(y_true, y_pred): #taken from old keras source code
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val

**Start learning for CNN+bi-LSTM model**

In [None]:
start = time.time()
# Data
X_train, y_train, X_val, y_val, X_test, y_test, vocabSize = load_data()
# Embedding
max_features = vocabSize
maxlen = X_train.shape[1]
embedding_size = 256

# Convolution
kernel_size = 5
filters = 128
pool_size = 4

# LSTM
lstm_output_size = 128

print('Build model...')
model = Sequential()
model.add(Embedding(vocabSize, embedding_size, input_length=X_train.shape[1]))
model.add(Dropout(0.25))
model.add(Conv1D(filters,
                 kernel_size,
                 padding='valid',
                 activation='relu',
                 strides=1))
model.add(MaxPooling1D(pool_size=pool_size))
#model.add(LSTM(lstm_output_size))
model.add(Bidirectional(LSTM(lstm_output_size)))
model.add(Dense(4))
model.add(Activation('softmax'))
model.summary()

adam = Adam(learning_rate=10e-3)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=get_f1)

# Fit model
history = model.fit(X_train,
                    y_train,
                    validation_data=(X_val, y_val),
                    verbose=1,
                    batch_size=256,
                    epochs=3
                   )

model.evaluate(X_test, y_test, verbose=1)
_time = time.time() - start
print(f"Learning time: {_time}")


In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss Function')
plt.ylabel('Loss')
plt.xlabel('epoch')
plt.legend(['train', 'Validation'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['get_f1'])
plt.plot(history.history['val_get_f1'])
plt.title('model F1-score')
plt.ylabel('F1-score')
plt.xlabel('epoch')
plt.legend(['train', 'Validation'], loc='upper left')
plt.show()

In [None]:
# # Classify custom sample
# def predict(sentence):
#     print(sentence)
#     sentence = clean(sentence)
#     sentence = tokenizer.texts_to_sequences([sentence])
#     sentence = pad_sequences(sentence, maxlen=256, truncating='pre')
#     result = le.inverse_transform(np.argmax(model.predict(sentence), axis=-1))[0]
#     proba =  np.max(model.predict(sentence))
    
#     print(f"{result} : {proba}\n\n")

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

**Define Transformer block**

In [None]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

**Start learning for Transformer+CNN**

In [None]:
start = time.time()
embed_dim = 64  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 64 # Hidden layer size in feed forward network inside transformer
X_train, y_train, X_val, y_val, X_test, y_test, vocab_size = load_data()
maxlen = X_train.shape[1]
kernel_size = 8
filters = 32

inputs = layers.Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = layers.Conv1D(16,
                 kernel_size,
                 padding='valid',
                 activation='relu',
                 strides=1)(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)
# x = layers.Dense(16, activation="relu")(x)
# x = layers.Dropout(0.1)(x)
outputs = layers.Dense(4, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=[get_f1])
model.summary()
history = model.fit(
    X_train, y_train, batch_size=32, epochs=3, validation_data=(X_val, y_val)
)
finish = time.time()
print(f'Finished at {finish-start}')

In [None]:
plt.plot(history.history['get_f1'])
plt.plot(history.history['val_get_f1'])
plt.title('model F1-score')
plt.ylabel('F1-score')
plt.xlabel('epoch')
plt.legend(['train', 'Validation'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss Function')
plt.ylabel('Loss')
plt.xlabel('epoch')
plt.legend(['train', 'Validation'], loc='upper left')
plt.show()

**Define federated client**


In [None]:
!pip install -U flwr["simulation"]

In [None]:
# Make TensorFlow logs less verbose
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import flwr as fl

In [None]:
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, model, x_train, y_train, x_val, y_val) -> None:
        self.model = model
        self.x_train, self.y_train = x_train, y_train
        self.x_val, self.y_val = x_val, y_val

    def get_parameters(self):
        return self.model.get_weights()

    def fit(self, parameters, config):
        self.model.set_weights(parameters)
        self.model.fit(self.x_train, self.y_train, epochs=1, verbose=1, batch_size=16)
        print(f"Len of self x_train {len(self.x_train)}")
        return self.model.get_weights(), len(self.x_train), {}

    def evaluate(self, parameters, config):
        self.model.set_weights(parameters)
        loss, acc = self.model.evaluate(self.x_val, self.y_val, verbose=2)
        print(f"loss {loss} and acc is {acc}")
        return loss, len(self.x_val), {"accuracy": acc}

**Federated client with CNN+bi-LSTM model**

In [None]:
def client_fn(cid: str) -> fl.client.Client:
    X_train, y_train, X_val, y_val, X_test, y_test, vocabSize = load_data()
    
    max_features = vocabSize
    maxlen = X_train.shape[1]
    embedding_size = 256
    print(f'max length of embedding {maxlen}')
    print(f'max features/vocabSize {vocabSize}')
    print(f'Len of X train is {len(X_train)}')
    print(f'Len of y train is {len(y_train)}')
    print(f'Len of X val is {len(X_val)}')
    print(f'Len of y val is {len(y_val)}')
    
    # Convolution
    kernel_size = 5
    filters = 128
    pool_size = 4

    # LSTM
    lstm_output_size = 128
    
    #Get train data for particular client
    partition_size = math.floor(len(X_train) / NUM_CLIENTS)
    idx_from, idx_to = int(cid) * partition_size, (int(cid) + 1) * partition_size
    X_train_cid = X_train[idx_from:idx_to]
    y_train_cid = y_train[idx_from:idx_to]
    print(f'Len of cid train X: {len(X_train_cid)}')
    print(f'Len of cid train y: {len(y_train_cid)}')
    
    #Get validation data for particular client
    partition_size = math.floor(len(X_val) / NUM_CLIENTS)
    idx_from, idx_to = int(cid) * partition_size, (int(cid) + 1) * partition_size
    X_val_cid = X_val[idx_from:idx_to]
    y_val_cid = y_val[idx_from:idx_to]
    print(f'Len of cid train X: {len(X_val_cid)}')
    print(f'Len of cid train y: {len(y_val_cid)}')
    
      # Create model
    model = Sequential(
        [
            Embedding(vocabSize, embedding_size, input_length=X_train.shape[1]),
            Dropout(0.25),
            Conv1D(filters,
                 kernel_size,
                 padding='valid',
                 activation='relu',
                 strides=1),
            MaxPooling1D(pool_size=pool_size),
            Bidirectional(LSTM(lstm_output_size)),
            Dense(4),
            Activation('softmax')
        ]
    )
    model.compile("adam", "categorical_crossentropy", metrics=["accuracy"])

    return FlowerClient(model, X_train_cid, y_train_cid, X_val_cid, y_val_cid)

In [None]:
from typing import List, Tuple, Optional

**Define federated strategy to have ability to save weights or to input aggregated metrics**

In [None]:
class SaveModelStrategy(fl.server.strategy.FedAvg):
#     def aggregate_fit(
#         self,
#         rnd: int,
#         results: List[Tuple[fl.server.client_proxy.ClientProxy, fl.common.FitRes]],
#         failures: List[BaseException],
#     ) -> Optional[fl.common.Weights]:
#         aggregated_weights = super().aggregate_fit(rnd, results, failures)
#         if aggregated_weights is not None:
#             # Save aggregated_weights
#             print(f"Saving round {rnd} aggregated_weights...")
#             np.savez(f"round-{rnd}-weights.npz", *aggregated_weights)
#         return aggregated_weights
    def aggregate_evaluate(
        self,
        rnd: int,
        results: List[Tuple[fl.server.client_proxy.ClientProxy, fl.common.EvaluateRes]],
        failures: List[BaseException],
    ) -> Optional[float]:
        if not results:
            return None

        # Weigh accuracy of each client by number of examples used
        accuracies = [r.metrics["accuracy"] * r.num_examples for _, r in results]
        examples = [r.num_examples for _, r in results]

        # Aggregate and print custom metric
        accuracy_aggregated = sum(accuracies) / sum(examples)
        print(f"Round {rnd} accuracy aggregated from client results: {accuracy_aggregated}")

        # Call aggregate_evaluate from base class (FedAvg)
        return super().aggregate_evaluate(rnd, results, failures)


**Start federated learning with CNN+bi-LSTM model**

In [None]:
NUM_CLIENTS = 15

# Start simulation
fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=NUM_CLIENTS,
    num_rounds=10,
    strategy = SaveModelStrategy(min_fit_clients=2,min_eval_clients=2)
)

**Federated client with Transformer+CNN**

In [None]:
def client_fn_transformer(cid: str) -> fl.client.Client:
    embed_dim = 64  # Embedding size for each token
    num_heads = 2  # Number of attention heads
    ff_dim = 64 # Hidden layer size in feed forward network inside transformer
    
    X_train, y_train, X_val, y_val, X_test, y_test, vocab_size = load_data()
    
    maxlen = X_train.shape[1]
    kernel_size = 4
    filters = 32

    inputs = layers.Input(shape=(maxlen,))
    embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
    x = embedding_layer(inputs)
    transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
    x = transformer_block(x)
    x = layers.Conv1D(filters,
                 kernel_size,
                 padding='valid',
                 activation='relu',
                 strides=1)(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(16, activation="relu")(x)
    #x = layers.Dropout(0.1)(x)
    outputs = layers.Dense(4, activation="softmax")(x)

    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=[get_f1])
    
    #Get train data for particular client
    partition_size = math.floor(len(X_train) / NUM_CLIENTS)
    idx_from, idx_to = int(cid) * partition_size, (int(cid) + 1) * partition_size
    X_train_cid = X_train[idx_from:idx_to]
    y_train_cid = y_train[idx_from:idx_to]
    print(f'Len of cid train X: {len(X_train_cid)}')
    print(f'Len of cid train y: {len(y_train_cid)}')
    
    #Get validation data for particular client
    partition_size = math.floor(len(X_val) / NUM_CLIENTS)
    idx_from, idx_to = int(cid) * partition_size, (int(cid) + 1) * partition_size
    X_val_cid = X_val[idx_from:idx_to]
    y_val_cid = y_val[idx_from:idx_to]
    print(f'Len of cid train X: {len(X_val_cid)}')
    print(f'Len of cid train y: {len(y_val_cid)}')
    
   

    return FlowerClient(model, X_train_cid, y_train_cid, X_val_cid, y_val_cid)

**Start federated learning with Transformer+CNN**

In [None]:
NUM_CLIENTS = 15

# Start simulation
fl.simulation.start_simulation(
    client_fn=client_fn_transformer,
    num_clients=NUM_CLIENTS,
    num_rounds=10,
    strategy = SaveModelStrategy(min_fit_clients=2,min_eval_clients=2)
)