In [None]:
!pip install tensorflow==2.11.0

In [None]:
!pip install tensorflow_datasets

In [None]:
!sudo apt install curl gnupg

In [None]:
!curl https://bazel.build/bazel-release.pub.gpg | sudo apt-key add -

In [None]:
!echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" | sudo tee /etc/apt/sources.list.d/bazel.list

In [None]:
!sudo apt update

In [None]:
!sudo apt install bazel=5.4.0

In [None]:
!git clone https://www.github.com/tensorflow/models

In [None]:
!models/research/seq_flow_lite/demo/colab/setup_workspace.sh

In [None]:
!pip install models/research/seq_flow_lite

In [None]:
!rm -rf models/research/seq_flow_lite/tf_ops

In [None]:
!rm -rf models/research/seq_flow_list/tflite_ops

In [None]:
%cd models/research/seq_flow_lite

In [None]:
import tensorflow as tf

In [None]:
import tensorflow_datasets as tfds

In [None]:
ds = tfds.load('goemotions', split='train')

In [None]:
for element in ds.take(5):
  print(element)

In [46]:
LABELS = [
    'admiration',
    'amusement',
    'anger',
    'annoyance',
    'approval',
    'caring',
    'confusion',
    'curiosity',
    'desire',
    'disappointment',
    'disapproval',
    'disgust',
    'embarrassment',
    'excitement',
    'fear',
    'gratitude',
    'grief',
    'joy',
    'love',
    'nervousness',
    'optimism',
    'pride',
    'realization',
    'relief',
    'remorse',
    'sadness',
    'surprise',
    'neutral',
]


In [None]:
CONFIG = {
    'name': 'models.prado',
    'batch_size': 1024,
    'train_steps': 10000,
    'learning_rate': 0.0006,
    'learning_rate_decay_steps': 340,
    'learning_rate_decay_rate': 0.7,
}

In [None]:
CONFIG['save_checkpoints_steps'] = int(CONFIG['train_steps'] / 10)

In [None]:
MODEL_CONFIG = {
    'labels': LABELS,
    'multilabel': True,
    'quantize': False,
    'max_seq_len': 128,
    'max_seq_len_inference': 128,
    'exclude_nonalphaspace_unicodes': False,
    'split_on_space': True,
    'embedding_regularizer_scale': 0.035,
    'embedding_size': 64,
    'bigram_channels': 64,
    'trigram_channels': 64,
    'feature_size': 512,
    'network_regularizer_scale': 0.0001,
    'keep_prob': 0.5,
    'word_novelty_bits': 0,
    'doc_size_levels': 0,
    'add_bos_tag': False,
    'add_eos_tag': False,
    'pre_logits_fc_layers': [],
    'text_distortion_probability': 0.0,
}

In [None]:
CONFIG['model_config'] = MODEL_CONFIG

In [None]:
!pip install layers

In [None]:
from tensorflow.keras import layers

In [None]:
from layers import base_layers
from layers import projection_layers

def build_dataset(mode, inspect=False):
  if mode == base_layers.TRAIN:
    split = 'train'
    count = None
  elif mode == base_layers.EVAL:
    split = 'test'
    count = 1
  else:
    raise ValueError('mode={}, must be TRAIN or EVAL'.format(mode))

  batch_size = CONFIG['batch_size']
  if inspect:
    batch_size = 1

  # Convert examples from their dataset format into the model format.
  def process_input(features):
    # Generate the projection for each comment_text input.  The final tensor
    # will have the shape [batch_size, number of tokens, feature size].
    # Additionally, we generate a tensor containing the number of tokens for
    # each comment_text (seq_length).  This is needed because the projection
    # tensor is a full tensor, and we are not using EOS tokens.
    text = features['comment_text']
    text = tf.reshape(text, [batch_size])
    projection_layer = projection_layers.ProjectionLayer(MODEL_CONFIG, mode)
    projection, seq_length = projection_layer(text)

    # Convert the labels into an indicator tensor, using the LABELS indices.
    label = tf.stack([features[label] for label in LABELS], axis=-1)
    label = tf.cast(label, tf.float32)
    label = tf.reshape(label, [batch_size, len(LABELS)])

    model_features = ({'projection': projection, 'sequence_length': seq_length}, label)

    if inspect:
      model_features = (model_features[0], model_features[1], features)

    return model_features

  ds = tfds.load('goemotions', split=split)
  ds = ds.repeat(count=count)
  ds = ds.shuffle(buffer_size=batch_size * 2)
  ds = ds.batch(batch_size, drop_remainder=True)
  ds = ds.map(process_input,
              num_parallel_calls=tf.data.experimental.AUTOTUNE,
              deterministic=False)
  ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
  return ds

train_dataset = build_dataset(base_layers.TRAIN)
test_dataset = build_dataset(base_layers.EVAL)
inspect_dataset = build_dataset(base_layers.TRAIN, inspect=True)

In [None]:
from models import prado

def build_model(mode):
  # First we define our inputs.
  inputs = []
  if mode == base_layers.TRAIN or mode == base_layers.EVAL:
    # For TRAIN and EVAL, we'll be getting dataset examples,
    # so we'll get projections and sequence_lengths.
    projection = tf.keras.Input(
        shape=(MODEL_CONFIG['max_seq_len'], MODEL_CONFIG['feature_size']),
        name='projection',
        dtype='float32')

    sequence_length = tf.keras.Input(
        shape=(), name='sequence_length', dtype='float32')
    inputs = [projection, sequence_length]
  else:
    # Otherwise, we get string inputs which we need to project.
    input = tf.keras.Input(shape=(), name='input', dtype='string')
    projection_layer = projection_layers.ProjectionLayer(MODEL_CONFIG, mode)
    projection, sequence_length = projection_layer(input)
    inputs = [input]

  # Next we add the model layer.
  model_layer = prado.Encoder(MODEL_CONFIG, mode)
  logits = model_layer(projection, sequence_length)

  # Finally we add an activation layer.
  if MODEL_CONFIG['multilabel']:
    activation = tf.keras.layers.Activation('sigmoid', name='predictions')
  else:
    activation = tf.keras.layers.Activation('softmax', name='predictions')
  predictions = activation(logits)

  model = tf.keras.Model(
      inputs=inputs,
      outputs=[predictions])

  return model

In [None]:
!rm -rf model

model = build_model(base_layers.TRAIN)

# Create the optimizer.
learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=CONFIG['learning_rate'],
    decay_rate=CONFIG['learning_rate_decay_rate'],
    decay_steps=CONFIG['learning_rate_decay_steps'],
    staircase=True)

optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

# Define the loss function.
loss = tf.keras.losses.BinaryCrossentropy(from_logits=False)

model.compile(optimizer=optimizer, loss=loss)

epochs = int(CONFIG['train_steps'] / CONFIG['save_checkpoints_steps'])
model.fit(
    x=train_dataset,
    epochs=epochs,
    validation_data=test_dataset,
    steps_per_epoch=CONFIG['save_checkpoints_steps'])

model.save_weights('model/model_checkpoint')

In [None]:
model = build_model(base_layers.EVAL)

# Define metrics over each category.
metrics = []
for i, label in enumerate(LABELS):
  metric = tf.keras.metrics.Precision(
      thresholds=[0.5],
      class_id=i,
      name='precision@0.5/{}'.format(label))
  metrics.append(metric)
  metric = tf.keras.metrics.Recall(
      thresholds=[0.5],
      class_id=i,
      name='recall@0.5/{}'.format(label))
  metrics.append(metric)

# Define metrics over the entire task.
metric = tf.keras.metrics.Precision(thresholds=[0.5], name='precision@0.5/all')
metrics.append(metric)
metric = tf.keras.metrics.Recall(thresholds=[0.5], name='recall@0.5/all')
metrics.append(metric)

model.compile(metrics=metrics)
model.load_weights('model/model_checkpoint')
result = model.evaluate(x=test_dataset, return_dict=True)


In [None]:
for label in LABELS:
  precision_key = 'precision@0.5/{}'.format(label)
  recall_key = 'recall@0.5/{}'.format(label)
  if precision_key in result and recall_key in result:
    print('{}: (precision@0.5: {}, recall@0.5: {})'.format(
        label, result[precision_key], result[recall_key]))

precision_key = 'precision@0.5/all'
recall_key = 'recall@0.5/all'
if precision_key in result and recall_key in result:
  print('all: (precision@0.5: {}, recall@0.5: {})'.format(
      result[precision_key], result[recall_key]))

In [None]:
EMOTION_MAP = {
    'admiration': 'admiration',
    'amusement': 'amusement',
    'anger': 'anger',
    'annoyance': 'annoyance',
    'approval': 'approval',
    'caring': 'caring',
    'confusion': 'confusion',
    'curiosity': 'curiosity',
    'desire': 'desire',
    'disappointment': 'disappointment',
    'disapproval': 'disapproval',
    'disgust': 'disgust',
    'embarrassment': 'embarrassment',
    'excitement': 'excitement',
    'fear': 'fear',
    'gratitude': 'gratitude',
    'grief': 'grief',
    'joy': 'joy',
    'love': 'love',
    'nervousness': 'nervousness',
    'optimism': 'optimism',
    'pride': 'pride',
    'realization': 'realization',
    'relief': 'relief',
    'remorse': 'remorse',
    'sadness': 'sadness',
    'surprise': 'surprise',
    'neutral': 'neutral',
}

In [22]:
import pandas as pd

In [None]:
from google.colab import files

In [None]:
import io

In [None]:
yikyaks = pd.read_csv('/content/database.csv')

In [None]:
yikyaks

In [None]:
yikyaks.iloc[:, 0] = yikyaks.iloc[:, 0]

In [None]:
grouped_data = yikyaks.groupby(yikyaks.iloc[:, 0])

In [None]:
!pip install matplotlib

In [None]:
import matplotlib.pyplot as plt

In [None]:
from matplotlib.cm import get_cmap

In [None]:
model.summary()

In [None]:
import numpy as np

In [None]:
PREDICT_TEXT = [
  b'Good for you!',
  b'Happy birthday!',
  b'I love you.',
]

In [None]:
import numpy as np

model = build_model(base_layers.PREDICT)
model.load_weights('model/model_checkpoint')

for text in PREDICT_TEXT:
  results = model.predict(x=[text])
  print('')
  print('{}:'.format(text))
  labels = np.flip(np.argsort(results[0]))
  for x in range(3):
    label = LABELS[labels[x]]
    label = EMOTION_MAP[label] if EMOTION_MAP[label] else label
    print('{}: {}'.format(label, results[0][labels[x]]))

In [None]:
dfs = []

for date, group in grouped_data:
    print('Date: {}'.format(date))

    emotion_totals = {emotion: 0.0 for emotion in LABELS}
    total_emotions = {emotion: 0.0 for emotion in LABELS}

    for index, row in group.iterrows():
        print(row[1])
        results = model.predict(x=[row[1]])
        print('')
        print('{}:'.format(row[1]))
        labels = np.flip(np.argsort(results[0]))

        neutral = False

        for x in range(3):
            label = LABELS[labels[x]]
            if (x == 0):
              if label == 'neutral':
                neutral = True
              else:
                emotion_totals[label] += 1
            if (x == 1 and neutral):
              emotion_totals[label] += 1
            total_emotions[label] += results[0][labels[x]]
            print('{}: {}'.format(label, results[0][labels[x]]))

    top_emotion = max(emotion_totals, key=emotion_totals.get)
    print('Top Emotion: {} ({:.2%})'.format(top_emotion, emotion_totals[top_emotion]))

    df_plot = pd.DataFrame([emotion_totals], index=[date])
    dfs.append(df_plot)

df_combined = pd.concat(dfs)

cmap = get_cmap('tab20')
colors = [cmap(i) for i in np.linspace(0, 1, len(LABELS))]

ax = df_combined.plot(kind='bar', stacked=True, color=colors)
plt.xlabel('Date')
plt.ylabel('Emotion Frequency')
plt.title('YikYak Emotion Distribution Per Day')

# Customize the legend with unique colors
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles, labels, title='Emotion', bbox_to_anchor=(1.05, 1), loc='upper left', ncol=2)

plt.show()


In [40]:
labeled_data = pd.read_csv('/content/LabeledData-dates.csv')

In [None]:
labeled_data

In [43]:
grouped = labeled_data.groupby('Date')

In [44]:
from matplotlib.cm import get_cmap
import matplotlib.pyplot as plt

In [None]:
import numpy as np

dfs_yikyaks = []

for date, group in grouped:
  print(date)
    # Check if the group is not empty
  if not group.empty:
      emotions = {emotion: 0.0 for emotion in LABELS}

      for index, row in group.iterrows():
        emotions[row['Emotion Detected']] += 1

      print(emotions)

      df_plot = pd.DataFrame([emotions], index=[date])
      dfs_yikyaks.append(df_plot)

print(dfs_yikyaks)

# Check if there are DataFrames in the list before concatenating
if dfs_yikyaks:
    df_comb = pd.concat(dfs_yikyaks)

    cmap = get_cmap('tab20')
    colors = [cmap(i) for i in np.linspace(0, 1, len(LABELS))]

    ax_2 = df_comb.plot(kind='bar', stacked=True, color=colors, width=0.8)

    plt.xlabel('Date')
    plt.ylabel('Emotion Frequency')
    plt.title('YikYak Emotion Distribution Per Day')

    handles, labels = ax_2.get_legend_handles_labels()
    ax_2.legend(handles, labels, title='Emotion', bbox_to_anchor=(1.05, 1), loc='upper left', ncol=2)

    plt.show()
else:
    print("No data to plot.")