In [3]:
!pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu124

Looking in indexes: https://download.pytorch.org/whl/cu124
Collecting torchvision==0.20.1
  Downloading https://download.pytorch.org/whl/cu124/torchvision-0.20.1%2Bcu124-cp39-cp39-win_amd64.whl (6.1 MB)
     ---------------------------------------- 0.0/6.1 MB ? eta -:--:--
     ----------------------------------- ---- 5.5/6.1 MB 30.5 MB/s eta 0:00:01
     ---------------------------------------- 6.1/6.1 MB 26.9 MB/s eta 0:00:00
Collecting torchaudio==2.5.1
  Downloading https://download.pytorch.org/whl/cu124/torchaudio-2.5.1%2Bcu124-cp39-cp39-win_amd64.whl (4.1 MB)
     ---------------------------------------- 0.0/4.1 MB ? eta -:--:--
     ---------------------------------------- 4.1/4.1 MB 31.0 MB/s eta 0:00:00
Collecting torch==2.5.1
  Downloading https://download.pytorch.org/whl/cu124/torch-2.5.1%2Bcu124-cp39-cp39-win_amd64.whl (2510.7 MB)
     ---------------------------------------- 0.0/2.5 GB ? eta -:--:--
     ---------------------------------------- 0.0/2.5 GB 27.9 MB/s eta 0:0

In [1]:
import torch
torch.cuda.is_available()

True

In [2]:
from transformers import pipeline
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base")
classifier("I love this!")


Device set to use cuda:0


[{'label': 'joy', 'score': 0.9771687984466553}]

In [3]:
## Define function to perform emotion classification on text that is 50 characters on either side of keywords

import re
def detect_emotion_with_context(text, keywords, context_size=50):
    """
    Detects emotions associated with keywords in text, providing surrounding context.

    Args:
        text (str): The input text.
        keywords (list): A list of keywords to search for.
        context_size (int): The number of characters to include as context 
                          around the keyword.  In this case, I used 50.

    Returns:
        dict: A dictionary where keys are keywords found, and values are lists 
              of tuples. Each tuple contains:
              - The classified emotion (from hartmann_distil_roberta).
              - The surrounding context of the keyword.
    """
    results = {}
    for keyword in keywords:
        keyword_results = []
        # Case-insensitive search for the keyword with re.IGNORECASE
        for match in re.finditer(r'\b' + re.escape(keyword) + r'\b', text, re.IGNORECASE):
            start = max(0, match.start() - 50)
            end = min(len(text), match.end() + 50)
            context = text[start:end]
            emotion = classifier(context)
            
            keyword_results.append((emotion, context))
        if keyword_results:
          results[keyword] = keyword_results
    return results


In [40]:
# Example usage of defined function above:
text = """
As I was walking in the snow in the woods, I tripped and fell and got hurt. 
My parents sent me to a nice family in Le Chambon.  I was kept safe and happy there. 
I was always having a good time with my friends, especially when we would go to the village.
"""
keywords = ["woods", "Le Chambon", "village"]

emotion_data = detect_emotion_with_context(text, keywords)

for keyword, data_list in emotion_data.items():
    print(f"Keyword: {keyword}")
    for emotion, context in data_list:
        print(f"  Emotion: {emotion}")
        print(f"  Context: {context}")

Keyword: woods
  Emotion: [{'label': 'fear', 'score': 0.9789318442344666}]
  Context: 
As I was walking in the snow in the woods, I tripped and fell and got hurt. 
My parents sen
Keyword: Le Chambon
  Emotion: [{'label': 'joy', 'score': 0.9053284525871277}]
  Context: got hurt. 
My parents sent me to a nice family in Le Chambon.  I was kept safe and happy there. 
I was always 
Keyword: village
  Emotion: [{'label': 'disgust', 'score': 0.4012473523616791}]
  Context: th my friends, especially when we would go to the village.



In [55]:
# Create dataframe of output from classification in line 1.  Then, lines 2-5 split the "Context" portion out from
# the classified "Emotion" into separate columns. Then, lines 6-8 normalize and remove the list that contained the 
# dictionary of the classified emotion label and score.  Then, lines 9-10 normalize the dictionary and create 
# separate columns for the label and score.  Then, lines 11-21 adds a column for the numerical value of the emotion 
# using the key values from the Hartmann Roberta model.  Lines 22 just displays the final dataframe.

import pandas as pd
from pandas import json_normalize
import numpy as np

df_location = pd.DataFrame(emotion_data.items(), columns = ['Location', 'Emotion'])
df_location2 = df_location['Emotion'].apply(pd.Series).join(df_location.drop('Emotion', axis=1))
for col in df_location2.columns[:-1]:
    df_location2 = pd.concat([df_location2.drop(col, axis=1), df_location2[col].apply(pd.Series)], axis=1)
df_location2.columns = ["Location", "Emotion", "Context"]
df_norm = pd.json_normalize(df_location2["Emotion"])
df_location3 = pd.concat([df_location2, df_norm], axis=1).drop("Emotion", axis=1)
df_location3.columns = ["Location", "Context", "Emotion"]
df_norm2 = pd.json_normalize(df_location3["Emotion"])
df_location4 = pd.concat([df_location3, df_norm2], axis=1).drop("Emotion", axis=1)
conditions = [
    df_location4["label"] == "anger",
    df_location4["label"] == "disgust",
    df_location4["label"] == "fear",
    df_location4["label"] == "joy",
    df_location4["label"] == "neutral",
    df_location4["label"] == "sadness",
    df_location4["label"] == "surprise"
]
values = ["0", "1", "2", "3", "4", "5", "6"]
df_location4["label_numeric"] = np.select(conditions, values, default="Unknown")
df_location4

Unnamed: 0,Location,Context,label,score,label_numeric
0,woods,"\nAs I was walking in the snow in the woods, I...",fear,0.978932,2
1,Le Chambon,got hurt. \nMy parents sent me to a nice famil...,joy,0.905328,3
2,village,"th my friends, especially when we would go to ...",disgust,0.401247,1


In [29]:
import pandas as pd

# Sample DataFrame
data = {'group1': ['A', 'A', 'B', 'B', 'A', 'B'],
        'group2': ['X', 'X', 'Y', 'Y', 'X', 'Y'],
        'values': [1, 2, 1, 2, 1, 3]}
df_test = pd.DataFrame(data)

# Calculate mode of 'values' grouped by 'group1' and 'group2'
df_test_grouped = df_test.groupby(['group1', 'group2'], as_index=False).agg({'values': lambda x: list(set(x))})

#Display the result
print(df_test_grouped)

  group1 group2     values
0      A      X     [1, 2]
1      B      Y  [1, 2, 3]
