In [2]:
import pandas as pd

# Load the synthetic and combined labeled datasets
synthetic_path = "../preprocessing/data_cleaned/synthetic_poetry_dataset.csv"
combined_path = "../preprocessing/data_cleaned/combined_poetry_dataset.csv"

synthetic_df = pd.read_csv(synthetic_path)
combined_df = pd.read_csv(combined_path)

# Filter out rows with unknown emotion
combined_labeled_df = combined_df[combined_df["emotion"].str.lower() != "unknown"]

# Combine both clean sources
full_training_df = pd.concat([combined_labeled_df, synthetic_df], ignore_index=True)
full_training_df = full_training_df.sample(frac=1, random_state=42).reset_index(drop=True)  # Shuffle

# Show distribution of labels
emotion_distribution = full_training_df["emotion"].value_counts()

# import ace_tools as tools; tools.display_dataframe_to_user(name="Training Dataset", dataframe=full_training_df)
emotion_distribution


emotion
sadness        10
joy            10
yearning        5
awe             5
hope            5
empowerment     5
Name: count, dtype: int64

In [18]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import joblib
from sklearn.utils import resample

from transformers import pipeline
import pandas as pd

# Load your emotion-labeled poetry dataset
# full_training_df = pd.read_csv("your_data.csv")
# We'll assume the column full_training_df["poem"] contains your texts

# Load the BERT-based emotion classifier
emotion_classifier = pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    top_k=4  # or top_k=3 for top-3 predictions
)

# Classify all poems in the dataset
def classify_emotion(text):
    try:
        result = emotion_classifier(text[:512])[0][0]  # truncate to 512 tokens
        return result["label"], round(result["score"], 4)
    except:
        return "error", 0.0

# Apply classifier
full_training_df[["predicted_emotion", "confidence"]] = full_training_df["poem"].apply(
    lambda x: pd.Series(classify_emotion(x))
)


from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Model name
model_name = "j-hartmann/emotion-english-distilroberta-base"

# Load model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Save them locally
model.save_pretrained("./bert_emotion_model/")
tokenizer.save_pretrained("./bert_emotion_model/")



  from .autonotebook import tqdm as notebook_tqdm
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Device set to use cpu


('./bert_emotion_model/tokenizer_config.json',
 './bert_emotion_model/special_tokens_map.json',
 './bert_emotion_model/vocab.json',
 './bert_emotion_model/merges.txt',
 './bert_emotion_model/added_tokens.json',
 './bert_emotion_model/tokenizer.json')

In [13]:
# Define prediction function again
def predict_emotion(poem: str) -> str:
    vec = tfidf.transform([poem])
    pred = model.predict(vec)
    return label_encoder.inverse_transform(pred)[0]

# Test it again
sample_poem = """
The skies were clear, the stars were bright,
My heart was full in velvet night.
You touched my soul, then slipped away,
Now memories ache at end of day.
"""

predict_emotion(sample_poem)

'joy'

In [14]:
for emotion in label_encoder.classes_:
    test_poems = full_training_df[full_training_df["emotion"] == emotion]["poem"].sample(5)
    print(f"--- True Emotion: {emotion} ---")
    for p in test_poems:
        print(p[:60], "->", predict_emotion(p))

--- True Emotion: awe ---
This is a poem about nature,
Where feelings run deep and bri -> awe
This is a poem about nature,
Where feelings run deep and bri -> awe
This is a poem about nature,
Where feelings run deep and bri -> awe
This is a poem about nature,
Where feelings run deep and bri -> awe
This is a poem about nature,
Where feelings run deep and bri -> awe
--- True Emotion: empowerment ---
This is a poem about freedom,
Where feelings run deep and br -> empowerment
This is a poem about freedom,
Where feelings run deep and br -> empowerment
This is a poem about freedom,
Where feelings run deep and br -> empowerment
This is a poem about freedom,
Where feelings run deep and br -> empowerment
This is a poem about freedom,
Where feelings run deep and br -> empowerment
--- True Emotion: hope ---
This is a poem about hope,
Where feelings run deep and brigh -> hope
This is a poem about hope,
Where feelings run deep and brigh -> hope
This is a poem about hope,
Where feelings run deep and 

In [11]:
full_training_df["emotion"].value_counts()


emotion
sadness        10
joy            10
yearning        5
awe             5
hope            5
empowerment     5
Name: count, dtype: int64

In [17]:
full_training_df

Unnamed: 0,title,poem,emotion,author,topic,form,emotion_encoded
0,Synthetic Nature Poem 5,"This is a poem about nature,\nWhere feelings r...",awe,GPT-Generated,nature,free verse,0
1,Synthetic Nature Poem 2,"This is a poem about nature,\nWhere feelings r...",awe,GPT-Generated,nature,free verse,0
2,Synthetic Nature Poem 1,"This is a poem about nature,\nWhere feelings r...",awe,GPT-Generated,nature,free verse,0
3,Synthetic Longing Poem 2,"This is a poem about longing,\nWhere feelings ...",yearning,GPT-Generated,longing,free verse,5
4,Synthetic Love Poem 5,"This is a poem about love,\nWhere feelings run...",joy,GPT-Generated,love,free verse,3
5,Synthetic Hope Poem 3,"This is a poem about hope,\nWhere feelings run...",hope,GPT-Generated,hope,free verse,2
6,Synthetic Joy Poem 3,"This is a poem about joy,\nWhere feelings run ...",joy,GPT-Generated,joy,free verse,3
7,Synthetic Longing Poem 3,"This is a poem about longing,\nWhere feelings ...",yearning,GPT-Generated,longing,free verse,5
8,Synthetic Joy Poem 5,"This is a poem about joy,\nWhere feelings run ...",joy,GPT-Generated,joy,free verse,3
9,Synthetic Loss Poem 2,"This is a poem about loss,\nWhere feelings run...",sadness,GPT-Generated,loss,free verse,4
