In [1]:
import pandas as pd
import ollama
import json
from tqdm import tqdm
import ast


In [2]:
# Define your 10 class labels
LABELS = [
'husband and wife', 'infatuation', 'friendship', 'romantic love',
       'the desire for vengeance', 'humanoid robot', 'father and son',
       'human vs. captivity', 'time travel', 'greed for riches'
]


In [3]:
test = pd.read_csv('test_sum.csv')

In [7]:
def classify_text_with_ollama(text: str, model: str) -> list:
    prompt = PROMPT_TEMPLATE.format(labels="\n".join(LABELS), text=text)
    response = ollama.chat(model=model, messages=[{"role": "user", "content": prompt}])
    content = response['message']['content']
    content = content.lower()
    return content


def process_content(content):
    try:
        predicted_labels = ast.literal_eval(content)
        if isinstance(predicted_labels, list):

            return [label.strip() for label in predicted_labels if label.strip() in LABELS]
    except:
        # Fallback: extract matching labels from text
        return [label for label in LABELS if label in content]

    return []

def main(df1: pd.DataFrame, output_csv: str, model: str, text_column: str = 'file_content'):
    # Add binary columns
    df = pd.DataFrame(index=df1.index, columns=df1.columns)
    df['content']= df1['content']
    
    for label in LABELS:
        df[label] = 0

    df["raw_prediction"] = ""

    print(f"Processing {len(df)} rows using model: {model}")
    for idx, row in tqdm(df.iterrows(), total=len(df)):
        text = row[text_column]
        content = classify_text_with_ollama(text, model=model)
        predicted_labels = process_content(content)
        df.at[idx, "message"] = content
        df.at[idx, "raw_prediction"] = str(predicted_labels)
        for label in predicted_labels:
            df.at[idx, label] = 1

    df.to_csv(output_csv, index=False)
    return df 
    print(f"✅ Done. Output saved to: {output_csv}")

In [8]:
PROMPT_TEMPLATE = """
You are a theme classifier for TV episode summaries. Based on the summaries provided, identify which of the following 10 EXACT themes are present as **main themes** in the episode. 

You must only choose from the following ten themes: {labels}. Use the definitions to guide your selections.

--- Theme Definitions ---

husband and wife: The relationship between husband and wife is featured.  
infatuation: An intense but (typically) short-lived passion that may peter out or settle into more enduring romantic love.  
friendship: The friendship between two characters is featured.  
romantic love: Featured is that peculiar sort of love between people so often associated with sexual attraction.  
the desire for vengeance: A character seeks retribution over a perceived injury or wrong.  
humanoid robot: An automaton that resembles a human being is featured.
father and son: The relationship between a father and his son is featured.  
human vs. capitivity: A struggle between captive and captor is featured.
time travel: Traveling between past and future points in time is featured.
greed for riches: A character exhibits an inordinate desire for wealth such as money, luxuries, and the like.  


--- Example 1 ---

Input Text:  
'\n\n== Plot ==\nA rebellious young girl and her family move into a gated neighborhood whose inhabitants place conformity and well to do behavior above all else. As time goes by, she learns to dress and act like all of the other kids in the neighborhood. However, her curiosity still persists and she befriends a young man who, like her, questions the stringent rules the community has set forth for them and indulges in vices such as cigarettes and liquor. One night she sees him being put into a van and taken away by the neighborhood patrols. As time goes by, she soon discovers that children who go against the teachings of the neighborhood are taken to a factory where they are turned into fertilizer, and do indeed become evergreen. Eventually, the neighborhood council deems Jenna to be a threat to the neighborhood, and she is taken to the plant to become evergreen herself.\n'

Output: 
['human vs. captivity, husband and wife']


--- Example 2 ---
Input Text:  
"\n==Plot==\n[[Bender Bending Rodríguez|Bender]] is discovered to be sleepwalking and sleep bending, bending everything in sight, like a vital part of the [[Planet Express Ship]], Hermes' ruler, Amy's college javelin, Zoidberg's prized pet Slinky, in which he reacts to by crying and sobbing, and even the [[Hubert J. Farnsworth|Professor]]. Until Bender can satisfy his lust for bending and overcome his sleepwalking, he is forbidden from returning to [[Planet Express]].\n\nBender finds work at a factory where all the workers are on strike. After securing a job Bender is introduced to his co-workers: an attractive fembot, [[Angleyne]] and [[Flexo]]. Bender flirts with Angelyne who enjoys his advances and falls in love with him. Bender helps Angelyne bend and the two share a kiss before dating, things are fine until he makes a shocking discovery: Flexo and Angelyne are divorced. Naturally Bender perceives Flexo as a threat to his romance and attempts to find out who Angelyne loves by taking her to [[The Hip Joint]] posing as Flexo. After a passionate kiss leads to Bender's disguise being foiled, he takes off to the factory to kill Flexo. Unfortunately, the [[Robot Mafia]] also want to kill Flexo which they attempt by dropping an unbendable girder on him. Angelyne arrives on the scene too late, but she rushes to crippled Flexo's side professing her love for him. Bender, wanting to make Angelyne happy, decides to bend the girder to save Flexo. After succeeding Bender returns to Planet Express, his bend lust satisfied, but gets irate after being bugged to de-bend the Professor, so he simply bends him 180 degrees much to the dismay of his co-workers.\n"

Output:

['humanoid robot']


--- Instructions ---

- Return a valid Python list of strings.  
- Use only the exact theme names listed above.  
- Do NOT include any explanation.  
- Do NOT invent new labels.  



--- New Input ---

Text:  
{text}

Output:
"""


In [9]:
models = "mistral:7b-instruct"
output = 'prompting_sum/instruct/prompt4/test_mistral:latest.csv'
# Call with any model and output name
df1 =main(df1=test, output_csv=output, text_column="content", model=models)




Processing 129 rows using model: mistral:7b-instruct


100%|█████████████████████████████████████████| 129/129 [31:52<00:00, 14.82s/it]


In [10]:
models = "gemma3:12b-it-qat"
output = 'prompting_sum/instruct/prompt4/test_gemma3:12b-it-qat.csv'
df4 = main(df1=test, output_csv=output, text_column="content", model=models)


Processing 129 rows using model: gemma3:12b-it-qat


100%|███████████████████████████████████████| 129/129 [1:03:56<00:00, 29.74s/it]


In [11]:
models = "llama3.1:8b-instruct-q8_0"
output = 'prompting_sum/instruct/prompt4/test_llama3.1:8b-instruct-q8_0.csv'
df5 = main(df1=test, output_csv=output, text_column="content", model=models)

Processing 129 rows using model: llama3.1:8b-instruct-q8_0


100%|█████████████████████████████████████████| 129/129 [26:19<00:00, 12.25s/it]
