In [1]:
%%capture
%pip install datasets transformers pandas matplotlib tqdm --upgrade --quiet

In [2]:
# Automatically loads changes in other files in this project
%load_ext autoreload
%autoreload 2

## Importing Libraries


In [3]:
import torch
from torch.utils.data import DataLoader
import pandas as pd
from transformers import pipeline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split


In [8]:
#df = pd.read_csv('../Merged_Cleaned_Dataset_Labeled_API.csv.csv')
data = [
    {
        "text": "love the new collection amazing craftsmanship and attention to detail",
        "brand_labels": ["product quality", "reputation & heritage"],
        "emotion_labels": ["love", "admiration"]
    },
    {
        "text": "horrible customer service never shopping there again",
        "brand_labels": ["customer service"],
        "emotion_labels": ["disgust", "disapproval"]
    },
    {
        "text": "thrilled with the ecofriendly initiatives",
        "brand_labels": ["sustainability"],
        "emotion_labels": ["excitement", "optimism"]
    },
    {
        "text": "very disappointed in the quality for the price paid",
        "brand_labels": ["product quality"],
        "emotion_labels": ["disappointment"]
    },
    {
        "text": "impressed by the brand's commitment to ethical practices",
        "brand_labels": ["ethical practices"],
        "emotion_labels": ["approval", "pride"]
    }
]
df = pd.DataFrame(data)


In [9]:
# Delete later
df['label'] = df['label'].apply(lambda x: [x] if not isinstance(x, list) else x)

KeyError: 'label'

In [10]:
df.head(5)

Unnamed: 0,text,brand_labels,emotion_labels
0,love the new collection amazing craftsmanship ...,"[product quality, reputation & heritage]","[love, admiration]"
1,horrible customer service never shopping there...,[customer service],"[disgust, disapproval]"
2,thrilled with the ecofriendly initiatives,[sustainability],"[excitement, optimism]"
3,very disappointed in the quality for the price...,[product quality],[disappointment]
4,impressed by the brand's commitment to ethical...,[ethical practices],"[approval, pride]"


In [None]:
brand_perception_labels_map = {
        0: 'product quality',
        1: 'reputation & heritage',
        2: 'customer service',
        3: 'social impact',
        4: 'ethical practices',
        5: 'sustainability'
    }

emotion_labels_map = {0: "admiration",
    1: "amusement",
    2: "anger",
    3: "annoyance",
    4: "approval",
    5: "caring",
    6: "confusion",
    7: "curiosity",
    8: "desire",
    9: "disappointment",
    10: "disapproval",
    11: "disgust",
    12: "embarrassment",
    13: "excitement",
    14: "fear",
    15: "gratitude",
    16: "grief",
    17: "joy",
    18: "love",
    19: "nervousness",
    20: "optimism",
    21: "pride",
    22: "realization",
    23: "relief",
    24: "remorse",
    25: "sadness",
    26: "surprise",
    27: "neutral"}

## Create datasets

In [None]:
# Get text from df and put in a list of strings
texts = [item for item in df['text'] if isinstance(item, str) and item.strip() != '']

In [None]:
# Create a list of hot encoded values for brand aspects 
def hot_encode_brand_perception(row):
    labels_map = {
        'product quality': 0,
        'reputation & heritage': 1,
        'customer service': 2,
        'social impact': 3,
        'ethical practices': 4,
        'sustainability': 5
    }
    result = np.zeros(6)
    for label in row['brand_labels']:  # iterate through the list of labels in each row
        if label in labels_map:
            result[labels_map[label]] = 1
    return result

# Apply the function to each row
brand_labels = df.apply(hot_encode_brand_perception, axis=1).tolist()

In [None]:
def hot_encode_emotions(row):
    labels_map = {
    "admiration": 0,
    "amusement": 1,
    "anger": 2,
    "annoyance": 3,
    "approval": 4,
    "caring": 5,
    "confusion": 6,
    "curiosity": 7,
    "desire": 8,
    "disappointment": 9,
    "disapproval": 10,
    "disgust": 11,
    "embarrassment": 12,
    "excitement": 13,
    "fear": 14,
    "gratitude": 15,
    "grief": 16,
    "joy": 17,
    "love": 18,
    "nervousness": 19,
    "optimism": 20,
    "pride": 21,
    "realization": 22,
    "relief": 23,
    "remorse": 24,
    "sadness": 25,
    "surprise": 26,
    "neutral": 27
}

    result = np.zeros(28)
    for label in row['emotion_labels']:  # iterate through the list of labels in each row
        if label in labels_map:
            result[labels_map[label]] = 1
    return result

# Apply the function to each row
emotion_labels = df.apply(hot_encode_emotions, axis=1).tolist()

In [None]:
print(emotion_labels[1])

In [None]:
# Split into validation, test, and train splits

# First, split into train and temp (either test or validation) (change back to 0.2)
texts_train, texts_temp, emotions_train, emotions_temp, brands_train, brands_temp = train_test_split(
    texts, emotion_labels, brand_labels, test_size=0.4, random_state=42)

# Then, split the temp data into validation and test sets
texts_val, texts_test, emotions_val, emotions_test, brands_val, brands_test = train_test_split(
    texts_temp, emotions_temp, brands_temp, test_size=0.5, random_state=42)  # This splits the remaining 20% into two 10% segments


In [None]:
%cd ..

In [None]:
from datasetss.brand_perception_dataset import BrandPerceptionDataset

train_dataset = BrandPerceptionDataset(texts_train, emotions_train, brands_train)
val_dataset = BrandPerceptionDataset(texts_val, emotions_val, brands_val)
test_dataset = BrandPerceptionDataset(texts_test, emotions_test, brands_test)

# Example usage with DataLoader
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)


In [None]:
#CHANGE EPOCHS BACK TO 100
from modules.BrandPerceptionModel import BrandPerceptionModel
config = {
    'model_name': 'SamLowe/roberta-base-go_emotions',
    'n_labels_bp': 6,
    'batch_size': 128,
    'lr': 1.5e-6,
    'warmup': 0.2, 
    'train_size': len(train_loader),
    'weight_decay': 0.001,
    'n_epochs': 10
}
print("Config:", config)
model = BrandPerceptionModel(config)

In [None]:
import pytorch_lightning as pl
trainer = pl.Trainer(max_epochs=config['n_epochs'], num_sanity_val_steps=5)
trainer.fit(model, train_loader, val_loader)

In [None]:
def make_predictions():
    # Assuming your model is already initialized and loaded with trained weights
    model.eval()  # Set the model to evaluation mode

    # Iterate through your validation or test dataset
    predictions = []
    with torch.no_grad():  # Disable gradient tracking
        for batch in val_loader:  # Assuming val_loader is your validation data loader
            # Forward pass
            outputs = model.predict_step(batch)

            # Post-processing if needed
            emotion_logits = outputs["emotion_logits"]
            brand_logits = outputs["brand_logits"]
            
            # Example post-processing: convert logits to probabilities
            emotion_probs = torch.softmax(emotion_logits, dim=1)
            brand_probs = torch.softmax(brand_logits, dim=1)
            
            # Append the predictions
            predictions.append((emotion_probs, brand_probs))
        return predictions

    # Now predictions contain the predicted probabilities for each batch
    # You can further process these predictions as needed for your task


In [None]:
predictions = make_predictions()

In [None]:
# Only do for a specfic brand
# initialize dataset
# initialize loader 
amiri_dataset = BrandPerceptionDataset()
amiri_loader = DataLoader()

In [None]:
# Only do for a specifc brand
# Initialize sum_tensor

emotion_tensors = predictions[0]
brand_perception_tensors = predictions[1]

In [None]:
def avg(tensors):
    sum_tensor = np.zeros_like(predictions[0])
    for tensor in tensors:
        sum_tensor += tensor
    return sum_tensor / len(tensors)



In [None]:
# Aggregated result across all 
avg_amiri_emotion = avg(emotion_tensors)
avg_amiri_brand_perception = avg(brand_perception_tensors)

In [None]:
# Function to map indices to labels
def map_to_labels(tensor, labels_map):
    labels = []
    for i, value in enumerate(tensor[0]):
        label = labels_map[i] if i in labels_map else "Unknown"
        labels.append((label, value))
    return labels

In [None]:
# Map indices to labels for tensor 1
labels_tensor1 = map_to_labels(avg_amiri_brand_perception, brand_perception_labels_map)
print("Tensor 1:")
for label, value in labels_tensor1:
    print(f"{label}: {value}")

# Map indices to labels for tensor 2
labels_tensor2 = map_to_labels(avg_amiri_emotion, emotion_labels_map)
print("\nTensor 2:")
for label, value in labels_tensor2:
    print(f"{label}: {value}")