In [None]:
import pandas as pd
from groq import Groq
import warnings

warnings.filterwarnings("ignore", category=DeprecationWarning)

In [None]:
!pip install groq

In [None]:
client = Groq(
    api_key="API_KEY",
)

In [None]:
df = pd.read_csv('FinalClassified_cleaned.csv')
print(df.shape)
df.head()

In [None]:
df['BinaryClassification'].value_counts()

In [None]:
df_copy = df[df['BinaryClassification'] == 1].copy()
df_copy.shape

In [None]:
df_copy.drop(columns=['BinaryClassification', 'word_count'], inplace=True)

In [None]:
df_copy.head()

## Topic Classification with Llama

In [None]:
def get_classification(post, examples, context):
    prompt = f"""You're working on a project about the history of Japanese incarceration. You have enough knowledge about this domain and can consider the contexts inside the topic.
Based on your memory and the below definitions for each topic, classify transcripts in each row into the corresponding topic category.
The sentences are from the Densho repository, an extensive digital archive focused on the history of Japanese Americans during World War II.
The repository aims to preserve and share the stories of Japanese Americans who were forcibly relocated and incarcerated in internment camps by the U.S. government following the attack on Pearl Harbor. Return only the topic numbers as outputs. Return only the topic category number as outputs.

###Topic Categories###
0 - Biographic Information of the person
1 - Life before the Incarceration
2 - Life during the Incarceration
3 - Military services
4 - Returning of Japanese Americans after WWII
5 - Movements for peace and justice

Category 0: Biographical Information
Sentences in this category should focus on factual details about individuals, such as their name, birthdate, birthplace (including camps), family members, or religious affiliations. It also includes sentences mainly describing how old they were when they came to the U.S. (e.g., “My father came to the U.S. in 1905 at the age of fifteen with his father”) without a broader context about their lives or experiences in the new country, then they must be labeled here, otherwise in Category 1. If the sentence provides additional context about their life in the U.S., such as challenges, achievements, community involvement, family life, or education, it should be categorized in Category 1 instead. For example, sentences about establishing a business, adapting to a new culture, or interacting with the community belong in Category 1, even if they also include factual details.

Category 1: Life Before the Incarceration
This category includes sentences that describe how Japanese Americans or their families lived before the declaration of incarceration. It encompasses their daily lives, educational experiences, cultural practices, community dynamics, and the challenges they faced, such as racial discrimination or political tensions leading up to World War II. It also includes sentences about family customs, cultural teachings, and social hierarchies passed down within families, including Japanese traditions, values, and practices instilled by parents or elders. For example, reflections on how children were taught to follow Japanese customs, or observe traditional roles in the family fit into this category. Sentences that reflect societal attitudes, media portrayals, or policies (e.g., Alien Registration Act) before Pearl Harbor or the declaration of incarceration also belong here. If sentences primarily focus on factual details (e.g., names, birthdates, birthplaces, or religious affiliations) without additional life context, they should be categorized in Category 0 instead.
Sentences also can be about the story of how they came to the U.S. but if they include factual biographical information (e.g., “My father was 19 when he came to the U.S.”) without a broader life context, it should be categorized in Category 0 instead.
While sentences about pre-war discrimination, anti-Japanese sentiment, or the political environment may refer to the growing threat of war, they are still part of the pre-incarceration context and should remain in this category. Additionally, personal reflections on how societal attitudes evolved leading up to the war, or the ways these attitudes shaped Japanese Americans' lives, should be categorized here.
If sentences discuss specific events or societal struggles after Pearl Harbor or during the incarceration period, such as transportation to camps, FBI investigations, or experiences within the camps, they should be categorized in Category 2.

Category 2: Life During the Incarceration
This category includes sentences describing events and experiences spanning from the day Japan bombed Pearl Harbor (December 7, 1941) to the end of incarceration, encompassing the period leading up to internment. It captures societal struggles, acts of discrimination, and preparations for removal, as well as the daily realities of life in incarceration camps. Stories in this category may include memories of Pearl Harbor and the immediate societal fallout, such as FBI investigations, heightened community tensions, and both acts of kindness and hostility. They also cover personal challenges during the removal process, including packing belongings, selling off property, and bidding farewell to friends and neighbors, as well as experiences during transportation to camps or while awaiting relocation orders, often marked by discrimination and difficulties accessing services. Additionally, this category encompasses life within the camps, including food, work, education, recreation, and the social dynamics of those confined together.
This category also includes legal challenges or defiance of laws during the incarceration period, such as acts of civil disobedience or challenges to curfews, incarceration orders, or other wartime restrictions. Narratives reflecting on these events as they occurred during the incarceration time, whether supportive or critical, also belong here. Furthermore, perspectives from non-Japanese individuals about the removal or treatment of Japanese Americans during this time fit within this category.
Category 2 focuses on events tied directly to incarceration and wartime struggles. If sentences focus primarily on pre-war life, such as stories about businesses, schools, neighborhood life, societal attitudes, or challenges leading up to Pearl Harbor and incarceration, they should be categorized in Category 1 instead.

Category 3: Military services
This category includes sentences that describe decisions about military service, whether individuals chose to join the U.S. Army, resisted conscription, or, in some cases, considered service in the Japanese Army. It also encompasses reflections on the loyalty questionnaire, particularly questions related to allegiance to the U.S. and military service, along with the consequences of their responses. Sentences in this category often explore societal perceptions and judgments of those who served, resisted, or expressed defiance, including terms like ""yes-yes"" or ""no-no"" and their broader implications.
Additionally, this category includes sentences discussing the post-war benefits and opportunities that stemmed from military service, such as gaining employment, accessing education, or receiving recognition through veterans' programs. If a sentence primarily focuses on military service, the loyalty questionnaire, or the societal and personal impacts of military involvement, it should be categorized here rather than in Category 2.

Category 4: Returning of Japanese Americans after WWII
This category must include sentences describing stories after the war ended, focusing on adjustments to life after leaving the camps or surviving in post-war Japan. These stories may consist of challenges like finding employment, gaining access to education, and dealing with societal treatment in the U.S. or Japan. Narratives about the survival strategies of those in Japan, such as participating in illegal markets or navigating post-war chaos, also fall under this category, as do interactions with American soldiers or the impact of post-war policies on civilians.
Additionally, this category includes reflections on the emotional and societal aftermath of the war, such as shock at Japan's surrender, the chaos of rebuilding lives, and how people coped with these changes. Narratives by non-Japanese individuals about their perspectives on welcoming or rejecting Japanese Americans also belong here.

Category 5: Movements for Peace and Justice
This category includes sentences describing redress movements and other activities aimed at seeking justice after the war ended. It focuses on efforts to address the wrongs of incarceration, how individuals and communities advocated for recognition, reparations, or historical acknowledgment, and reflections on the broader lessons learned from the incarceration experience. Sentences reflecting on the impact of these movements or the legacy of the incarceration history during interviews also belong here.
Legal challenges or acts of resistance occurring during the incarceration period, such as defying curfews or incarceration orders, belong in Category 2 unless they are explicitly discussed in the context of post-war justice movements.


# Prompting Strategy:
# If labeled examples are provided in the `examples` field, the model operates in a few-shot setting.
# If no examples are provided, the prompt functions in a zero-shot setting based solely on the topic definitions and instructions.

Use these examples of transcripts that were classified: "{examples}"

Transcript: "{post}"
"""

    try:
        response = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        temperature=0,
        max_tokens=4096,
        top_p=0.95,
        model="llama-3.3-70b-versatile",
        )
        # model="Qwen/Qwen2.5-32B-Instruct",
        # messages=[
        #     {
        #         "role": "user",
        #         "content": prompt,
        #     }
        # ],
        # temperature=0,
        # max_tokens=4096,
        # top_p=0.95,
        # )
        result = response.choices[0].message.content
        return result.strip()
    except Exception as e:
        print(f"Error processing request: {e}")
        return None

In [None]:
df_test = df_copy.iloc[0:92191]
print(df_test.shape)
df_test.head()

In [None]:
# Few-shot prompting strategy:
# We use all rows from `Memory_dataset_TextClassification.xlsx` as few-shot examples.
# This memory dataset represents 40% of the full labeled corpus and is pre-selected.
# These examples are added to the prompt to guide classification.
# To switch to zero-shot prompting, replace `exmpls` with an empty string.


all_res = []

# This dataset is a 40% memory sample used for few-shot prompting
data_m = pd.read_excel('/content/Memory_dataset_TextClassification.xlsx')

for index, row in df_test.iterrows():
    # Use all available few-shot examples from the memory dataset (already 40% of total)
    df_few_shot_examples = data_m.copy()

    exmpls = "\n".join([
        f"Transcript: {r['Transcript']}\nTopic Label: {r['Topic Label']}\n"
        for _, r in df_few_shot_examples.iterrows()
    ])

    post = row['Sentence']
    print(index)
    result = get_classification(post, exmpls, "")
    if result:
        all_res.append(result)
    else:
        all_res.append("Error")

# Add the results to the DataFrame and save
df_class = df_test.copy()
df_class['Topic_Classification'] = all_res
df_class.to_csv('TopicClassification_results.csv')

In [None]:
df_class['Topic_Classification'].value_counts()

In [None]:
df_class.shape