In [1]:
# Import the functions
from LabelGenius import *

Demo 1: Single-Category Classification using N24News Dataset
-------------------------------------------------------------

This demo shows how to classify a single news article into one of 24 category
using the N24News dataset. Each article in the dataset includes both textual
and visual information.

Source: https://aclanthology.org/2022.lrec-1.729/


Each article contains the following fields:
- 'section': Ground truth label (one of 24 category)
- 'headline': Title of the article
- 'abstract': Short summary of the article
- 'article': Full text content
- 'article_url': Link to the original article
- 'image': Encoded image or metadata (optional)
- 'caption': Image caption
- 'image_id': Unique image identifier
- 'img_dir': Path to the associated image (e.g., 'N24News/imgs_200_sample1/12345.jpg')
- 'article_id': Unique article identifier

Image file: Multimodal_image

Example category (See prompt_D1 for the complete category):
------------------------
1. Health
2. Science
3. Television
...
24. Global Business

Reference:
----------
Wang, Z., Shan, X., Zhang, X., & Yang, J. (2022).
N24News: A New Dataset for Multimodal News Classification.
In *Proceedings of the Thirteenth Language Resources and Evaluation Conference* (pp. 6768–6775). LREC.


### Demo 1a: Single-Category Text Classification

**Datasets:**
- `D1_1.csv`: Used for initial labeling and fine-tuning.
- `D1_1.csv`: Used for testing the fine-tuned model's performance.


## GPT: third-party API labeling


In [2]:
# Define the list of 24 category labels
category_D1_GPT = [
    "1", "2", "3", "4", "5", "6",
    "7", "8", "9", "10", "11", "12",
    "13", "14", "15", "16", "17", "18",
    "19", "20", "21", "22", "23", "24"
]


# Define the descriptive prompts for each category
prompt_D1_GPT = ["""You are given a short news article. Based on its content, return the most appropriate category label from the list below. Respond with only a number from 1 to 24, corresponding to the best-matching category:

1. Health – medical news, public health, fitness, mental health, wellness
2. Science – scientific discoveries, research studies, space, innovations
3. Television – TV shows, reviews, industry news, streaming
4. Travel – tourism, destinations, travel guides, airlines, vacation trends
5. Movies – film news, reviews, box office, upcoming releases
6. Dance – ballet, contemporary styles, street dance, performances
7. Real Estate – housing trends, property sales, architecture, urban planning
8. Economy – macroeconomics, inflation, markets, GDP, financial policies
9. Sports – competitions, athletes, Demo_results, professional sports
10. Theater – plays, Broadway, live performances, stage reviews
11. Opinion – editorials, commentary, analysis
12. Music – albums, artists, concerts, festivals, music trends
13. Books – literature, bestsellers, author interviews, book reviews
14. Art & Design – fine arts, museums, exhibitions, visual/design trends
15. Style – fashion trends, beauty, personal style, aesthetics
16. Media – journalism, publishing, digital media, mass communication
17. Food – restaurants, recipes, cooking, culinary culture
18. Well – lifestyle, self-care, mental well-being, personal development
19. Fashion – clothing, designers, fashion weeks, industry insights
20. Technology – AI, gadgets, software, cybersecurity, tech innovations
21. Your Money – personal finance, investing, budgeting, financial planning
22. Education – schools, universities, learning methods, education policies
23. Automobiles – car news, EVs, reviews, industry trends
24. Global Business – international trade, corporations, mergers, global markets

Return the categorty label only (e.g., 5), no extra words."""]



api_key = "XXXXX"

## Price estimation

In [3]:
import os
import time
import pandas as pd
from openai import OpenAI
from tqdm.auto import tqdm


# ── CONFIG ────────────────────────────────────────────────────────────────
os.environ["OPENAI_API_KEY"] = api_key
client = OpenAI()

# ── Testing (copied from OpenAI playground) ───────────────────────────────
response = client.responses.create(
  model="o4-mini",
  input=[
    {
      "role": "developer",
      "content": [
        {
          "type": "input_text",
          "text": """You are given a short news article. Based on its content, return the most appropriate category label from the list below. Respond with only a number from 1 to 24, corresponding to the best-matching category:

1. Health – medical news, public health, fitness, mental health, wellness
2. Science – scientific discoveries, research studies, space, innovations
3. Television – TV shows, reviews, industry news, streaming
4. Travel – tourism, destinations, travel guides, airlines, vacation trends
5. Movies – film news, reviews, box office, upcoming releases
6. Dance – ballet, contemporary styles, street dance, performances
7. Real Estate – housing trends, property sales, architecture, urban planning
8. Economy – macroeconomics, inflation, markets, GDP, financial policies
9. Sports – competitions, athletes, Demo_results, professional sports
10. Theater – plays, Broadway, live performances, stage reviews
11. Opinion – editorials, commentary, analysis
12. Music – albums, artists, concerts, festivals, music trends
13. Books – literature, bestsellers, author interviews, book reviews
14. Art & Design – fine arts, museums, exhibitions, visual/design trends
15. Style – fashion trends, beauty, personal style, aesthetics
16. Media – journalism, publishing, digital media, mass communication
17. Food – restaurants, recipes, cooking, culinary culture
18. Well – lifestyle, self-care, mental well-being, personal development
19. Fashion – clothing, designers, fashion weeks, industry insights
20. Technology – AI, gadgets, software, cybersecurity, tech innovations
21. Your Money – personal finance, investing, budgeting, financial planning
22. Education – schools, universities, learning methods, education policies
23. Automobiles – car news, EVs, reviews, industry trends
24. Global Business – international trade, corporations, mergers, global markets

Return the complete label only (e.g., 1), no extra words."""
            }
      ]
    },
    {
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": "\"\"\" Our guide to the city's best classical music and opera.\"\"\"\n      "
        }
      ]
    }
  ],
  text={
    "format": {
      "type": "text"
    }
  },
  reasoning={
    "effort": "medium"
  },
  tools=[],
  store=True
)

In [4]:
response

Response(id='resp_68216f03ea648191af9e6b0d33ad604609036a8636a175ad', created_at=1747021571.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='o4-mini-2025-04-16', object='response', output=[ResponseReasoningItem(id='rs_68216f046a948191b3923e0203b4a35009036a8636a175ad', summary=[], type='reasoning', status=None), ResponseOutputMessage(id='msg_68216f0531a4819182252c135981c1d309036a8636a175ad', content=[ResponseOutputText(annotations=[], text='12', type='output_text')], role='assistant', status='completed', type='message')], parallel_tool_calls=True, temperature=1.0, tool_choice='auto', tools=[], top_p=1.0, max_output_tokens=None, previous_response_id=None, reasoning=Reasoning(effort='medium', generate_summary=None, summary=None), status='completed', text=ResponseTextConfig(format=ResponseFormatText(type='text')), truncation='disabled', usage=ResponseUsage(input_tokens=439, input_tokens_details=InputTokensDetails(cached_tokens=0), output_tokens=71, output_token

In [5]:
price_estimation (response,
                  num_rows = 1000,
                  input_cost_per_million = 1.10,
                  output_cost_per_million = 4.44,
                  num_votes = 3)

# https://openai.com/api/pricing/


🧮 Estimated Cost for 3,000 calls (1,000 rows × 3 votes)
• Avg prompt tokens/call:     439
• Avg completion tokens/call: 71
• Pricing ($/1M tokens): prompt=$1.1, completion=$4.44
💰 Total: $2.3944    (±10% → $2.1550–$2.6339)



2.39442

## GPT-4o

In [6]:
D1a_GPT_4o_inital_lableing = classification_GPT(
    text_path="Demo_data/D1_1.csv",
    category = category_D1_GPT,
    prompt = prompt_D1_GPT,
    column_4_labeling=["headline", "article", "abstract"],
    model = "gpt-4o-mini",
    api_key = api_key,
    temperature= 1, #a value range from 0 to 2
    mode = "text",
    output_column_name="D1a_GPT_4o_inital_lableing",
    num_themes = 1,
    num_votes = 3)

D1a_GPT_4o_inital_lableing.to_csv("Demo_result/D1a_GPT_4o_inital_lableing.csv", index=False)

Classifying text_class: 100%|██████████| 200/200 [02:05<00:00,  1.59item/s]


In [7]:
D1a_GPT_4o_inital_lableing

Unnamed: 0,section,headline,article_url,article,abstract,article_id,image,caption,image_id,image_path,section_numeric,text_content,image_data_url,final_input,D1a_GPT_4o_inital_lableing,D1a_GPT_4o_inital_lableing_raw,D1a_GPT_4o_inital_lableing_1
0,Fashion & Style,"On This Runway, Non-Models and Cool Kids",https://www.nytimes.com/2016/09/10/fashion/eck...,"Over Labor Day weekend, a steady stream of hop...",How the Eckhaus Latta designers select fashion...,e9cd6477-5eb6-58b2-8e33-fd2d881bf656,https://static01.nyt.com/images/2016/09/10/fas...,"Mike Eckhaus, center right, and Zoe Latta, cen...",e9cd6477-5eb6-58b2-8e33-fd2d881bf656,Demo_data/D1_imgs/1e9cd6477-5eb6-58b2-8e33-fd2...,19,"On This Runway, Non-Models and Cool Kids Over ...",,"On This Runway, Non-Models and Cool Kids Over ...",19,[19],19
1,Theater,"The New 42nd Street, a Theater Nonprofit, Name...",https://www.nytimes.com/2019/06/11/theater/new...,The New 42nd Street -- the nonprofit organizat...,Russell Granet will be the new president and c...,acdcd1ef-71a9-55f2-9993-7a231d57396f,https://static01.nyt.com/images/2019/06/11/art...,"The New 42nd Street board chairwoman, Fiona Ru...",acdcd1ef-71a9-55f2-9993-7a231d57396f,Demo_data/D1_imgs/1acdcd1ef-71a9-55f2-9993-7a2...,10,"The New 42nd Street, a Theater Nonprofit, Name...",,"The New 42nd Street, a Theater Nonprofit, Name...",10,[10],10
2,Economy,Tax Tactics Threaten Public Funds,https://www.nytimes.com/2014/10/02/business/ec...,When the European Commission charged this week...,If global corporations can continue to evade t...,1de5c156-9cea-5971-9fdf-b6a4ce9bf35c,https://static01.nyt.com/images/2012/02/29/bus...,Eduardo Porter,1de5c156-9cea-5971-9fdf-b6a4ce9bf35c,Demo_data/D1_imgs/11de5c156-9cea-5971-9fdf-b6a...,8,Tax Tactics Threaten Public Funds When the Eur...,,Tax Tactics Threaten Public Funds When the Eur...,24,[24],24
3,Television,Review: 'The Chi' Returns to the South Side of...,https://www.nytimes.com/2019/04/05/arts/televi...,"""The Chi"" was built around a series of shootin...",Lena Waithe's neighborhood drama on Showtime d...,7bb11a9c-2f6d-57f9-bb36-db7f00f59589,https://static01.nyt.com/images/2019/04/05/art...,"Alex Hibbert as Kevin in ""The Chi,"" a sprawlin...",7bb11a9c-2f6d-57f9-bb36-db7f00f59589,Demo_data/D1_imgs/17bb11a9c-2f6d-57f9-bb36-db7...,3,Review: 'The Chi' Returns to the South Side of...,,Review: 'The Chi' Returns to the South Side of...,3,[3],3
4,Opinion,Banks Should Face History and Pay Reparations,https://www.nytimes.com/2020/06/26/opinion/sun...,Ms. Blackwell is founder in residence at Polic...,The financial industry can close the wealth ga...,165426ae-1e7c-5193-8596-ef96357645a8,https://static01.nyt.com/images/2020/06/25/opi...,A Wells Fargo bank in Minneapolis was set on f...,165426ae-1e7c-5193-8596-ef96357645a8,Demo_data/D1_imgs/1165426ae-1e7c-5193-8596-ef9...,11,Banks Should Face History and Pay Reparations ...,,Banks Should Face History and Pay Reparations ...,11,[11],11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,Automobiles,Wheelies: The Ram On Edition,https://www.nytimes.com/2014/02/05/automobiles...,A roundup of motoring news from the web:\n\n# ...,Chrysler executives consider opening a third f...,ad722160-9bde-5350-abf3-a636bc1b6c22,https://static01.nyt.com/images/2014/02/04/aut...,The Ram 1500.,ad722160-9bde-5350-abf3-a636bc1b6c22,Demo_data/D1_imgs/1ad722160-9bde-5350-abf3-a63...,23,Wheelies: The Ram On Edition A roundup of moto...,,Wheelies: The Ram On Edition A roundup of moto...,1,[1],1
196,Science,Geminids Meteor Shower 2020: Watch It Peak in ...,https://www.nytimes.com/2020/12/13/science/gem...,All year long as Earth revolves around the sun...,Meteor showers can light up night skies from d...,bd8aca0f-81f3-560c-a1ca-20fbefd4af67,https://static01.nyt.com/images/2017/07/19/sci...,A meteor from the Geminids streaking between t...,bd8aca0f-81f3-560c-a1ca-20fbefd4af67,Demo_data/D1_imgs/1bd8aca0f-81f3-560c-a1ca-20f...,2,Geminids Meteor Shower 2020: Watch It Peak in ...,,Geminids Meteor Shower 2020: Watch It Peak in ...,2,[2],2
197,Opinion,The Justice Department's Shameful Rush to Fede...,https://www.nytimes.com/2020/07/17/opinion/jus...,"Early on Tuesday morning, while much of the co...",The push to impose the federal death penalty n...,5da51505-585c-5c01-8f29-d78297ac0aa4,https://static01.nyt.com/images/2020/07/16/opi...,The execution chamber in the U.S. Penitentiary...,5da51505-585c-5c01-8f29-d78297ac0aa4,Demo_data/D1_imgs/15da51505-585c-5c01-8f29-d78...,11,The Justice Department's Shameful Rush to Fede...,,The Justice Department's Shameful Rush to Fede...,11,[11],11
198,Style,How Do You Dress a Russian Doll?,https://www.nytimes.com/2019/02/14/style/russi...,What would you wear if you could attend your o...,"Glumly, it seems, but that hasn't stopped styl...",8a5113f7-3a30-56c0-99b2-ab36e31c6c1f,https://static01.nyt.com/images/2019/02/14/fas...,Rebecca Henderson and Greta Lee in the Netflix...,8a5113f7-3a30-56c0-99b2-ab36e31c6c1f,Demo_data/D1_imgs/18a5113f7-3a30-56c0-99b2-ab3...,15,How Do You Dress a Russian Doll? What would yo...,,How Do You Dress a Russian Doll? What would yo...,3,[3],3


In [8]:
## Check the accuracy: GPT 4o inital labeling
D1a_GPT_4o_inital_lableing['D1a_GPT_4o_inital_lableing'] = pd.to_numeric(D1a_GPT_4o_inital_lableing['D1a_GPT_4o_inital_lableing'], errors='coerce')
D1a_GPT_4o_inital_lableing['section_numeric'] = pd.to_numeric(D1a_GPT_4o_inital_lableing['section_numeric'], errors='coerce')

auto_verification(
    D1a_GPT_4o_inital_lableing,
    predicted_cols="D1a_GPT_4o_inital_lableing",
    true_cols="section_numeric",
    category=category_D1_GPT
)


== Verification of 'D1a_GPT_4o_inital_lableing' vs. 'section_numeric' ==
Accuracy:   76.50%
Macro F1:   66.79%
Micro  F1:  76.50%

Full classification report:
              precision    recall  f1-score   support

           1       0.48      1.00      0.65        13
           2       1.00      0.91      0.95        11
           3       0.67      1.00      0.80        12
           4       1.00      0.89      0.94         9
           5       0.83      0.83      0.83         6
           6       0.86      0.67      0.75         9
           7       0.67      1.00      0.80         2
           8       0.80      0.80      0.80         5
           9       1.00      0.89      0.94         9
          10       0.59      0.91      0.71        11
          11       1.00      0.57      0.73        14
          12       1.00      0.70      0.82        10
          13       1.00      0.89      0.94         9
          14       0.94      0.83      0.88        18
          15       0.00      

### Note: Fine-Tune GPT models:

Ensure that fine-tuning is performed on snapshot models, meaning the model's state is preserved with the training data up to that point. This approach allows the model to learn from incremental updates effectively.


GPT reasoning models (i.e., o-series) can not be fine-tuned for now

#### Minimum Data Requirement:
Fine-tuning for GPT-4o starts to show effective learning with around ~20 examples each category. Insufficient data may not capture nuanced learning effectively.

#### Bad Perormance Tiny Datasets:

When fine-tuning with very small datasets, the optimizer may converge to simply output the majority token as this is the easiest way to minimize loss.

This behavior is not representative of true learning—it reflects a bias towards frequent labels instead of understanding contextual differences.


In [9]:
# Prepare the data for GPT-4o 
# The JSONL should have three parts: 
# 1. system_prompt: coding instruction
# 2. input_col: The information needed to be labeled
# 3. label_col: the label


generate_GPT_finetune_jsonl(
    D1a_GPT_4o_inital_lableing,
    output_path="Demo_result/D1a_GPT_4o_inital_lableing.jsonl",
    system_prompt=prompt_D1_GPT,
    input_col=["headline", "article", "abstract"],
    label_col=["section_numeric"]
)


# GPT-4o Fine-Tune Hyperparameters

---

##  **1️⃣ Batch Size Considerations**
The batch size determines how many samples are processed at once. Larger batch sizes may speed up training but can lead to instability, especially with limited data.

###  **Recommendations:**
| Dataset Size         | Recommended Batch Size |
|-----------------------|------------------------|
| **< 1,000 samples**  | `batch_size: 4`       |
| **1,000 - 10,000**   | `batch_size: 8` or `16` |
| **> 10,000**         | `batch_size: 32`      |
| **> 100,000**        | `batch_size: 64`      |

###  **Caution:**
- Increasing batch size can lead to overfitting if the data is not diverse.
- If training loss is unstable, consider lowering the batch size.

---

##  **2️⃣ Learning Rate Multiplier**
The learning rate multiplier scales the base learning rate of the model. A value of **0.1** means the effective learning rate is **10%** of the model's default.

###  **Recommendations:**
| Data Type                         | Learning Rate Multiplier |
|-----------------------------------|--------------------------|
| **High variance text** (e.g., news articles, social media posts) | `0.02` to `0.05` |
| **Domain-specific text** (e.g., scientific abstracts, legal documents) | `0.02` to `0.1`  |
| **Noisy or mixed-domain data** (e.g., user-generated content) | `0.01` to `0.02` |
| **Highly structured data** (e.g., technical manuals) | `0.05` to `0.1` |

###  **Caution:**
- If loss suddenly spikes, reduce the learning rate multiplier.
- If the model underfits (low accuracy and low loss), slightly increase the multiplier.
- Avoid setting too high a learning rate (`> 0.1`) unless you have very clean and structured data.

---

In [10]:
# Fine-tune GPT-4o
D1a_GPT_4o_model_finetune = finetune_GPT(
    training_file_path="Demo_result/D1a_GPT_4o_inital_lableing.jsonl",
    model="gpt-4o-mini-2024-07-18",  
    hyperparameters={"batch_size":8, "learning_rate_multiplier":0.01},
    api_key= api_key  
)

Started fine-tune job ftjob-VJfdE7SiU6JSXu1K9EIRIo7Z
[0s] status=validating_files
[15s] status=validating_files
[30s] status=validating_files
[45s] status=validating_files
[60s] status=validating_files
[75s] status=validating_files
[90s] status=validating_files
[105s] status=running
[120s] status=running
[135s] status=running
[150s] status=running
[165s] status=running
[180s] status=running
[195s] status=running
[210s] status=running
[225s] status=running
[240s] status=running
[255s] status=running
[270s] status=running
[285s] status=running
[300s] status=running
[315s] status=running
[330s] status=running
[345s] status=running
[360s] status=running
[375s] status=running
[390s] status=running
[405s] status=running
[420s] status=running
[435s] status=running
[450s] status=succeeded
✅ succeeded: ft:gpt-4o-mini-2024-07-18:jcs-research::BWEYV9Wd


In [12]:
D1a_GPT_4o_model_finetune

'ft:gpt-4o-mini-2024-07-18:jcs-research::BWEYV9Wd'

#### Note. You can wait in the code untill status = finish 
##### Or you will receive an email from OpenAI when the fine tune is done 

#### Example email
Hi JCs‘ research,
Your fine-tuning job ftjob-xxxx has successfully completed, and a new model **ft:gpt-4o-mini-2024-07-18:xxx::xxxx** has been created for your use.

Copy the identifier to the classification_GPT()'s model.

In [13]:
# Classify with fine‑tuned 4o model
D1_GPT_4o_finetune = classification_GPT(
    text_path="Demo_data/D1_2.csv",
    category = category_D1_GPT,
    prompt = prompt_D1_GPT,
    column_4_labeling=["headline", "article", "abstract"],
    model = D1a_GPT_4o_model_finetune,
    api_key = api_key,
    temperature = 1,
    mode = "text",
    output_column_name="D1_GPT_4o_finetune",
    num_themes = 1,
    num_votes = 1)



D1_GPT_4o_finetune.to_csv("Demo_result/D1_GPT_4o_finetune.csv", index=False)

Classifying text_class: 100%|██████████| 200/200 [02:23<00:00,  1.39item/s]


In [14]:
## Check the accuracy: GPT o4 initial labeling
auto_verification(
    D1_GPT_4o_finetune,
    predicted_cols="D1_GPT_4o_finetune",
    true_cols="section_numeric",
    category=category_D1_GPT
);


== Verification of 'D1_GPT_4o_finetune' vs. 'section_numeric' ==
Accuracy:   75.14%
Macro F1:   62.56%
Micro  F1:  75.14%

Full classification report:
              precision    recall  f1-score   support

         1.0       0.00      0.00      0.00         4
         2.0       0.71      0.71      0.71         7
         3.0       0.81      1.00      0.89        17
         4.0       1.00      0.80      0.89         5
         5.0       0.50      0.67      0.57         3
         6.0       0.89      1.00      0.94         8
         7.0       1.00      0.87      0.93        15
         8.0       0.56      0.83      0.67         6
         9.0       0.75      1.00      0.86         6
        10.0       0.87      1.00      0.93        13
        11.0       0.88      0.78      0.82         9
        12.0       0.83      1.00      0.91        10
        13.0       1.00      1.00      1.00        12
        14.0       0.85      1.00      0.92        11
        15.0       0.00      0.00    

#### Note:
The error observed here is expected and occurs because OpenAI did not return a result from the anticipated category. The code is designed to handle this automatically by sending another request (Default = 3). However, if you consistently encounter this issue throughout the output, please verify the prompt settings in the Playground.

## GPT-o4 (reasoning model)


In [16]:
D1a_GPT_o4_inital_lableing = classification_GPT(
    text_path="Demo_data/D1_1.csv",
    category = category_D1_GPT,
    prompt = prompt_D1_GPT,
    column_4_labeling=["headline", "article", "abstract"],
    model = "o4-mini",
    api_key = api_key,
    effort = "medium",
    mode = "text",
    output_column_name="D1a_GPT_o4_inital_lableing",
    num_themes = 1,
    num_votes = 1)


D1a_GPT_4o_inital_lableing.to_csv("Demo_result/D1a_GPT_o4_inital_lableing.csv", index=False)

Classifying text_class: 100%|██████████| 200/200 [10:06<00:00,  3.03s/item]


In [21]:
## Check the accuracy: GPT o4 initial labeling
auto_verification(
    D1a_GPT_o4_inital_lableing,
    predicted_cols="D1a_GPT_o4_inital_lableing",
    true_cols="section_numeric",
    category=category_D1_GPT
);


== Verification of 'D1a_GPT_o4_inital_lableing' vs. 'section_numeric' ==
Accuracy:   85.79%
Macro F1:   70.17%
Micro  F1:  85.79%

Full classification report:
              precision    recall  f1-score   support

         1.0       0.00      0.00      0.00         2
         2.0       1.00      1.00      1.00        10
         3.0       0.80      1.00      0.89        12
         4.0       1.00      0.89      0.94         9
         5.0       0.75      1.00      0.86         6
         6.0       0.89      0.89      0.89         9
         7.0       0.50      1.00      0.67         2
         8.0       0.67      0.80      0.73         5
         9.0       1.00      1.00      1.00         9
        10.0       0.91      0.91      0.91        11
        11.0       0.86      1.00      0.92        12
        12.0       1.00      0.90      0.95        10
        13.0       0.89      0.89      0.89         9
        14.0       1.00      0.89      0.94        18
        15.0       1.00      

#### Note: GPT reasnong models (i.e., o-series) can not be finetuned for now

# Demo 1b: Classify single-category image data

**Datasets:**
- `imgs_40`: Used for initial labeling and fine-tuning.
- `imgs_40_2`: Used for testing the fine-tuned model's performance.


In [16]:
# Define the list of 24 category labels
category_D1_GPT = [
    "1", "2", "3", "4", "5", "6",
    "7", "8", "9", "10", "11", "12",
    "13", "14", "15", "16", "17", "18",
    "19", "20", "21", "22", "23", "24"
]


# Define the descriptive prompts for each category
prompt_D1_GPT = ["""You are given an image. Based on its content, return the most appropriate category label from the list below. Respond with only a number from 1 to 24, corresponding to the best-matching category:

1. Health – medical news, public health, fitness, mental health, wellness
2. Science – scientific discoveries, research studies, space, innovations
3. Television – TV shows, reviews, industry news, streaming
4. Travel – tourism, destinations, travel guides, airlines, vacation trends
5. Movies – film news, reviews, box office, upcoming releases
6. Dance – ballet, contemporary styles, street dance, performances
7. Real Estate – housing trends, property sales, architecture, urban planning
8. Economy – macroeconomics, inflation, markets, GDP, financial policies
9. Sports – competitions, athletes, Demo_results, professional sports
10. Theater – plays, Broadway, live performances, stage reviews
11. Opinion – editorials, commentary, analysis
12. Music – albums, artists, concerts, festivals, music trends
13. Books – literature, bestsellers, author interviews, book reviews
14. Art & Design – fine arts, museums, exhibitions, visual/design trends
15. Style – fashion trends, beauty, personal style, aesthetics
16. Media – journalism, publishing, digital media, mass communication
17. Food – restaurants, recipes, cooking, culinary culture
18. Well – lifestyle, self-care, mental well-being, personal development
19. Fashion – clothing, designers, fashion weeks, industry insights
20. Technology – AI, gadgets, software, cybersecurity, tech innovations
21. Your Money – personal finance, investing, budgeting, financial planning
22. Education – schools, universities, learning methods, education policies
23. Automobiles – car news, EVs, reviews, industry trends
24. Global Business – international trade, corporations, mergers, global markets

Return the categorty label only (e.g., 5), no extra words."""]



api_key = "XXXXX"

## GPT-4o


In [17]:
D1b_GPT_4o_inital_lableing = classification_GPT(
    image_dir="Demo_data/D1_imgs_1",
    text_path="Demo_data/D1_1.csv",
    category = category_D1_GPT,
    prompt = prompt_D1_GPT,
    model = "gpt-4o-mini",
    api_key = api_key,
    temperature = 0.8,
    mode = "image",
    output_column_name="D1b_GPT_4o_inital_lableing",
    num_themes = 1,
    num_votes = 3)


D1b_GPT_4o_inital_lableing.to_csv("Demo_result/D1b_GPT_4o_inital_lableing.csv", index=False)

Classifying image_class: 100%|██████████| 200/200 [07:10<00:00,  2.15s/item]


In [26]:
D1b_GPT_4o_inital_lableing

Unnamed: 0,image_id,image_dir,text_content,image_data_url,final_input,D1b_GPT_4o_inital_lableing,D1b_GPT_4o_inital_lableing_raw,D1b_GPT_4o_inital_lableing_1,section,headline,article_url,article,abstract,article_id,image,caption,image_path,section_numeric
0,9f3e67c0-3aaa-5f36-a88d-14218feab404,Demo_data/D1_imgs_1/9f3e67c0-3aaa-5f36-a88d-14...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEASAB...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEASAB...",11,[11],11,Opinion,We Are Not Done With Abolition,https://www.nytimes.com/2020/12/15/opinion/abo...,"Mr. Foner is the author of "" The Second Foundi...",The framers of the 13th Amendment did not inte...,9f3e67c0-3aaa-5f36-a88d-14218feab404,https://static01.nyt.com/images/2020/12/15/opi...,Convicts working on a prison farm in 1934.,Demo_data/D1_imgs/19f3e67c0-3aaa-5f36-a88d-142...,11
1,8a5113f7-3a30-56c0-99b2-ab36e31c6c1f,Demo_data/D1_imgs_1/8a5113f7-3a30-56c0-99b2-ab...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...",3,[3],3,Style,How Do You Dress a Russian Doll?,https://www.nytimes.com/2019/02/14/style/russi...,What would you wear if you could attend your o...,"Glumly, it seems, but that hasn't stopped styl...",8a5113f7-3a30-56c0-99b2-ab36e31c6c1f,https://static01.nyt.com/images/2019/02/14/fas...,Rebecca Henderson and Greta Lee in the Netflix...,Demo_data/D1_imgs/18a5113f7-3a30-56c0-99b2-ab3...,15
2,d0670a68-a8b2-5b1c-9ff1-00f75f687f5e,Demo_data/D1_imgs_1/d0670a68-a8b2-5b1c-9ff1-00...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...",1,[1],1,Technology,The Coder and the Dictator,https://www.nytimes.com/2020/03/20/technology/...,Gabriel Jimenez hated the Venezuelan strongman...,Gabriel Jiménez hated the Venezuelan strongman...,d0670a68-a8b2-5b1c-9ff1-00f75f687f5e,https://static01.nyt.com/images/2020/03/22/bus...,Gabriel Jim&eacute;nez.,Demo_data/D1_imgs/1d0670a68-a8b2-5b1c-9ff1-00f...,20
3,4fd57ef1-a908-5e61-9602-2cfe4fbef7b1,Demo_data/D1_imgs_1/4fd57ef1-a908-5e61-9602-2c...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...",16,[16],16,Art & Design,Oprah Earned This Museum Show. And It's a Pote...,https://www.nytimes.com/2018/06/21/arts/design...,WASHINGTON -- Has any American spent more of h...,An exhibition at the Smithsonian's National Mu...,4fd57ef1-a908-5e61-9602-2cfe4fbef7b1,https://static01.nyt.com/images/2018/06/22/art...,"Scenes from a career: clockwise from top left,...",Demo_data/D1_imgs/14fd57ef1-a908-5e61-9602-2cf...,14
4,e05685c9-6cca-5415-94c4-b4977e4fbcea,Demo_data/D1_imgs_1/e05685c9-6cca-5415-94c4-b4...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...",18,[18],18,Opinion,Who Bears Witness to a Hidden Epidemic?,https://www.nytimes.com/2020/07/14/opinion/sun...,We know domestic violence is spiking under loc...,We know domestic violence is spiking under loc...,e05685c9-6cca-5415-94c4-b4977e4fbcea,https://static01.nyt.com/images/2020/07/19/opi...,"Latrice Brooks, a crisis interventionist, work...",Demo_data/D1_imgs/1e05685c9-6cca-5415-94c4-b49...,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,0458d47a-a77e-5b74-a0ef-f20bac6776f8,Demo_data/D1_imgs_1/0458d47a-a77e-5b74-a0ef-f2...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...",3,[3],3,Books,"In 'Want,' a Lost Friendship and the Anxieties...",https://www.nytimes.com/2020/07/07/books/revie...,"Last winter, when we were all still riding the...",Lynn Steger Strong's new novel follows a Brook...,0458d47a-a77e-5b74-a0ef-f20bac6776f8,https://static01.nyt.com/images/2020/07/19/boo...,Lynn Steger Strong,Demo_data/D1_imgs/10458d47a-a77e-5b74-a0ef-f20...,13
196,ab747a06-1499-5241-be7b-92ad2f1fca50,Demo_data/D1_imgs_1/ab747a06-1499-5241-be7b-92...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...",3,[3],3,Television,What's on TV Tuesday: 'The Story of God' and '...,https://www.nytimes.com/2019/03/05/arts/televi...,THE DAWN WALL (2018) Stream on Amazon or Netfl...,Season 3 of Morgan Freeman's travelogue series...,ab747a06-1499-5241-be7b-92ad2f1fca50,https://static01.nyt.com/images/2019/03/05/art...,Morgan Freeman in &ldquo;The Story of God.&rdquo;,Demo_data/D1_imgs/1ab747a06-1499-5241-be7b-92a...,3
197,1fa874fc-425e-505a-9b4f-a8ba6d1d4154,Demo_data/D1_imgs_1/1fa874fc-425e-505a-9b4f-a8...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...",17,[17],17,Food,For Caviar Tastes on a Craft-Beer Budget,https://www.nytimes.com/2018/01/22/dining/thom...,Reasonably priced does not spring to mind at m...,Regiis Ova is a more affordably priced line of...,1fa874fc-425e-505a-9b4f-a8ba6d1d4154,https://static01.nyt.com/images/2018/01/24/din...,A new line of reasonably priced caviars is now...,Demo_data/D1_imgs/11fa874fc-425e-505a-9b4f-a8b...,17
198,aba6dece-c19b-5fda-95ed-296b8b604b5a,Demo_data/D1_imgs_1/aba6dece-c19b-5fda-95ed-29...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...",4,[4],4,Travel,36 Hours in Chattanooga,https://www.nytimes.com/2018/05/24/travel/what...,Nearly geeky in its optimism -- and all the be...,Nearly geeky in its optimism — and all the bet...,aba6dece-c19b-5fda-95ed-296b8b604b5a,https://static01.nyt.com/images/2018/06/03/tra...,A view of the Tennessee River and the Hunter M...,Demo_data/D1_imgs/1aba6dece-c19b-5fda-95ed-296...,4


In [19]:
## Check the accuracy: GPT 4o inital labeling
# Merge the Demo_result of the image with the human label
D1b_CLIP_human = pd.read_csv("Demo_data/D1_1.csv")
D1b_GPT_4o_inital_lableing = pd.merge(D1b_GPT_4o_inital_lableing, D1b_CLIP_human, on="image_id", how="inner")
D1b_GPT_4o_inital_lableing.to_csv("Demo_result/D1b_GPT_4o_inital_lableing.csv", index=False)


auto_verification(
    D1b_GPT_4o_inital_lableing,
    predicted_cols="D1b_GPT_4o_inital_lableing",
    true_cols="section_numeric",
    category=category_D1_GPT
);


== Verification of 'D1b_GPT_4o_inital_lableing' vs. 'section_numeric' ==
Accuracy:   54.64%
Macro F1:   43.72%
Micro  F1:  54.64%

Full classification report:
              precision    recall  f1-score   support

           1       0.33      0.50      0.40         6
           2       0.54      0.78      0.64         9
           3       0.50      0.67      0.57        12
           4       0.50      0.67      0.57         9
           5       0.40      0.33      0.36         6
           6       1.00      0.78      0.88         9
           7       0.00      0.00      0.00         2
           8       1.00      0.25      0.40         4
           9       0.89      0.89      0.89         9
          10       0.67      0.73      0.70        11
          11       0.38      0.30      0.33        10
          12       0.78      0.70      0.74        10
          13       0.25      0.11      0.15         9
          14       0.71      0.67      0.69        18
          15       0.20      

## GPT-o4 (reasoning model)


In [30]:
D1b_GPT_o4_inital_lableing = classification_GPT(
    image_dir="Demo_data/D1_imgs_1",
    text_path="Demo_data/D1_1.csv",
    category = category_D1_GPT,
    prompt = prompt_D1_GPT,
    model = "o4-mini",
    api_key = api_key,
    effort = "medium",
    mode = "image",
    output_column_name="D1b_GPT_o4_inital_lableing",
    num_themes = 1,
    num_votes = 1)


D1b_GPT_o4_inital_lableing.to_csv("Demo_result/D1b_GPT_o4_inital_lableing.csv", index=False)

Classifying image_class: 100%|██████████| 200/200 [14:30<00:00,  4.35s/item]


In [31]:
D1b_GPT_o4_inital_lableing

Unnamed: 0,image_id,image_dir,text_content,image_data_url,final_input,D1b_GPT_o4_inital_lableing,D1b_GPT_o4_inital_lableing_raw,D1b_GPT_o4_inital_lableing_1
0,9f3e67c0-3aaa-5f36-a88d-14218feab404,Demo_data/D1_imgs_1/9f3e67c0-3aaa-5f36-a88d-14...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEASAB...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEASAB...",7,[7],7
1,8a5113f7-3a30-56c0-99b2-ab36e31c6c1f,Demo_data/D1_imgs_1/8a5113f7-3a30-56c0-99b2-ab...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...",3,[3],3
2,d0670a68-a8b2-5b1c-9ff1-00f75f687f5e,Demo_data/D1_imgs_1/d0670a68-a8b2-5b1c-9ff1-00...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...",11,[11],11
3,4fd57ef1-a908-5e61-9602-2cfe4fbef7b1,Demo_data/D1_imgs_1/4fd57ef1-a908-5e61-9602-2c...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...",3,[3],3
4,e05685c9-6cca-5415-94c4-b4977e4fbcea,Demo_data/D1_imgs_1/e05685c9-6cca-5415-94c4-b4...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...",11,[11],11
...,...,...,...,...,...,...,...,...
195,0458d47a-a77e-5b74-a0ef-f20bac6776f8,Demo_data/D1_imgs_1/0458d47a-a77e-5b74-a0ef-f2...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...",11,[11],11
196,ab747a06-1499-5241-be7b-92ad2f1fca50,Demo_data/D1_imgs_1/ab747a06-1499-5241-be7b-92...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...",5,[5],5
197,1fa874fc-425e-505a-9b4f-a8ba6d1d4154,Demo_data/D1_imgs_1/1fa874fc-425e-505a-9b4f-a8...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...",17,[17],17
198,aba6dece-c19b-5fda-95ed-296b8b604b5a,Demo_data/D1_imgs_1/aba6dece-c19b-5fda-95ed-29...,,"data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...","data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...",4,[4],4


In [38]:
## Check the accuracy: GPT o4 initial labeling
D1b_CLIP_human = pd.read_csv("Demo_data/D1_1.csv")
D1b_GPT_o4_inital_lableing = pd.merge(D1b_GPT_o4_inital_lableing, D1b_CLIP_human, on="image_id", how="inner")
D1b_GPT_4o_inital_lableing.to_csv("Demo_result/D1b_GPT_4o_inital_lableing.csv", index=False)


auto_verification(
    D1b_GPT_o4_inital_lableing,
    predicted_cols="D1b_GPT_o4_inital_lableing",
    true_cols="section_numeric",
    category=category_D1_GPT
);


== Verification of 'D1b_GPT_o4_inital_lableing' vs. 'section_numeric' ==
Accuracy:   57.87%
Macro F1:   50.00%
Micro  F1:  57.87%

Full classification report:
              precision    recall  f1-score   support

           2       0.73      0.73      0.73        11
           3       0.44      0.36      0.40        11
           4       0.60      0.67      0.63         9
           5       0.22      0.40      0.29         5
           6       0.89      1.00      0.94         8
           7       0.17      0.50      0.25         2
           8       0.67      0.50      0.57         4
           9       0.89      0.89      0.89         9
          10       0.86      0.55      0.67        11
          11       0.31      0.45      0.37        11
          12       0.90      0.90      0.90        10
          13       0.50      0.11      0.18         9
          14       0.60      0.50      0.55        18
          15       0.33      0.14      0.20         7
          16       0.18      

#### Note: the current GPT models have strict restrictions on fine-tuning images. So it is not used here.

# Demo 1c: Classify single-category text + image data

**Datasets:**
- `nytimes_40.csv`: Used for initial labeling and fine-tuning.
- `imgs_40`: Used for initial labeling and fine-tuning.

- `nytimes_40_2.csv`: Used for testing the fine-tuned model's performance.
- `imgs_40_2`: Used for testing the fine-tuned model's performance.


The text dataset should contain a column `img_dir` to map the images for each row.

## CLIP: local labeling

In [40]:
# Define the list of 24 category labels
category_D1_GPT = [
    "1", "2", "3", "4", "5", "6",
    "7", "8", "9", "10", "11", "12",
    "13", "14", "15", "16", "17", "18",
    "19", "20", "21", "22", "23", "24"
]


# Define the descriptive prompts for each category
prompt_D1_GPT = ["""You are given an article and image. Based on its content, return the most appropriate category label from the list below. Respond with only a number from 1 to 24, corresponding to the best-matching category:

1. Health – medical news, public health, fitness, mental health, wellness
2. Science – scientific discoveries, research studies, space, innovations
3. Television – TV shows, reviews, industry news, streaming
4. Travel – tourism, destinations, travel guides, airlines, vacation trends
5. Movies – film news, reviews, box office, upcoming releases
6. Dance – ballet, contemporary styles, street dance, performances
7. Real Estate – housing trends, property sales, architecture, urban planning
8. Economy – macroeconomics, inflation, markets, GDP, financial policies
9. Sports – competitions, athletes, Demo_results, professional sports
10. Theater – plays, Broadway, live performances, stage reviews
11. Opinion – editorials, commentary, analysis
12. Music – albums, artists, concerts, festivals, music trends
13. Books – literature, bestsellers, author interviews, book reviews
14. Art & Design – fine arts, museums, exhibitions, visual/design trends
15. Style – fashion trends, beauty, personal style, aesthetics
16. Media – journalism, publishing, digital media, mass communication
17. Food – restaurants, recipes, cooking, culinary culture
18. Well – lifestyle, self-care, mental well-being, personal development
19. Fashion – clothing, designers, fashion weeks, industry insights
20. Technology – AI, gadgets, software, cybersecurity, tech innovations
21. Your Money – personal finance, investing, budgeting, financial planning
22. Education – schools, universities, learning methods, education policies
23. Automobiles – car news, EVs, reviews, industry trends
24. Global Business – international trade, corporations, mergers, global markets

Return the categorty label only (e.g., 5), no extra words."""]



api_key = "xxxxx"

## GPT: third-party API labeling


## GPT-4o


In [41]:
D1c_GPT_4o_inital_lableing = classification_GPT(
    text_path="Demo_data/D1_1.csv",
    image_dir="Demo_data/D1_imgs_1",
    category=category_D1_GPT,
    prompt=prompt_D1_GPT,
    column_4_labeling=["headline", "article", "abstract"],
    model = "gpt-4o-mini",
    api_key = api_key,
    temperature = 1,
    mode = "both",
    output_column_name="D1c_GPT_4o_inital_lableing",
    num_themes = 1,
    num_votes = 1)


D1c_GPT_4o_inital_lableing.to_csv("Demo_result/D1c_GPT_4o_inital_lableing.csv", index=False)

Classifying text_class: 100%|██████████| 200/200 [02:45<00:00,  1.21item/s]
Classifying image_class: 100%|██████████| 200/200 [07:06<00:00,  2.13s/item]
Classifying final_class: 100%|██████████| 200/200 [06:54<00:00,  2.07s/item]


In [None]:
D1c_GPT_4o_inital_lableing

In [43]:
## Check the accuracy: GPT 4o inital labeling
auto_verification(
    D1c_GPT_4o_inital_lableing,
    predicted_cols="D1c_GPT_4o_inital_lableing",
    true_cols="section_numeric",
    category=category_D1_GPT
);


== Verification of 'D1c_GPT_4o_inital_lableing' vs. 'section_numeric' ==
Accuracy:   79.89%
Macro F1:   68.88%
Micro  F1:  79.89%

Full classification report:
              precision    recall  f1-score   support

         2.0       1.00      1.00      1.00        10
         3.0       0.69      1.00      0.81        11
         4.0       1.00      0.89      0.94         9
         5.0       0.86      1.00      0.92         6
         6.0       0.86      0.67      0.75         9
         7.0       0.67      1.00      0.80         2
         8.0       1.00      1.00      1.00         5
         9.0       1.00      1.00      1.00         8
        10.0       0.62      0.91      0.74        11
        11.0       0.86      0.67      0.75         9
        12.0       0.89      0.80      0.84        10
        13.0       1.00      0.78      0.88         9
        14.0       0.89      0.94      0.92        18
        15.0       0.00      0.00      0.00         6
        16.0       0.62      

## GPT-o4 (reasoning model)

##### The structure and usage are identical to the previous example. However, instead of adjusting the temperature parameter to control randomness, the effort parameter is used to influence the model's reasoning depth and complexity.


In [None]:
D1c_GPT_o4_inital_lableing = classification_GPT(
    text_path="Demo_data/D1_1.csv",
    image_dir="Demo_data/D1_imgs_1",
    category=category_D1_GPT,
    prompt=prompt_D1_GPT,
    column_4_labeling=["headline", "article", "abstract"],
    model = "o4-mini",
    api_key = api_key,
    effort = "medium",
    mode = "both",
    output_column_name="D1c_GPT_o4_inital_lableing",
    num_themes = 1,
    num_votes = 1)


D1c_GPT_o4_inital_lableing.to_csv("Demo_result/D1c_GPT_o4_inital_lableing.csv", index=False)


In [None]:
## Check the accuracy: GPT o4 initial labeling
auto_verification(
    D1c_GPT_o4_inital_lableing,
    predicted_cols="D1c_GPT_o4_inital_lableing",
    true_cols="section_numeric",
    category=category_D1
);

Demo 2: Multi-Label Theme Classification using News Headlines
--------------------------------------------------------------------

This demo shows how to classify U.S. immigration-related news headlines into
one or two dominant thematic category using a multi-label text-only classification task.
The coding scheme is adapted from Guo et al. (2023).

Dataset: Guo et al. (2023) Immigration News Dataset
Source: https://doi.org/10.1177/19401612231204535


**Datasets:**
- `Text_multiple_category_40.xlsx`: Used for initial labeling and fine-tuning.
- `Text_multiple category_40_2.xlsx`: Used for testing the fine-tuned model's performance.


Each sample in the dataset includes:
- A single new **Post_Title**


Themes:
-------
1) Economic consequences  
2) Crime/safety  
3) Family  
4) Immigrant wellbeing  
5) Culture/society  
6) Politics  
7) Legislation/regulation  
8) Public opinion  
99) None of the above


Reference:
----------
Guo, L., Su, C. C., & Chen, H.-T. (2023).
Do News Frames Really Have Some Influence in the Real World?
A Computational Analysis of Cumulative Framing Effects on Emotions and Opinions About Immigration.
The International Journal of Press/Politics. https://doi.org/10.1177/19401612231204535


#### Note: Based on our tests, CLIP did not perform well for multi-category classification. Its embedding-based matching struggles with distinguishing the order of categories and handling missing categories effectively. Therefore, we recommend using GPT models for multi-category classification tasks for improved accuracy and reliability.

## GPT: third-party API labeling


## GPT 4o-mini

In [58]:
category_D2_GPT = [
    "0", "1"
]

prompt_D2_GPT = '''Here's a news article headline. Please label if it belongs to the following theme.
            Return <1> if this headline belongs to these themes and return <0> if it does not belong to the themes.
            Please code for each of the following 8 topics.
            Please identify up to two dominant themes from the headline, which means you can have a max of 2 <1> in the answer you generated.
            You don't have to label two topic if you don't fint it apply. Just enter 0s.
            - Economic consequences: The story is about economic benefits or costs, or the costs involving immigration-related issues, including: Cost of mass deportation; Economic benefits of immigration (more tax revenue, cheap labor; Economic costs of immigration (taking jobs from Americans, immigrants using healthcare and educational services, overcrowding, housing concerns)
            - Crime/safety: The story is about threats to American's safety, including: Immigration described as a major cause of increased rates of crime, gangs, drug trafficking, etc; Immigrants described as law-breakers who deserve punishment; Immigration described as a threat to national security via terrorism
            - Family: The story is about the impact of immigration on families, including: Separating children from parents; Breaking up multi-generational families; Interfering with children's continued schooling
            - Immigrant wellbeing: This story is about the negative impact of the immigration process on immigrants, including: Prejudice and bias toward immigrants; Physical and/or mental health or safety of immigrants; Immigration policies described as violations of immigrants' civil rights and liberties; Immigration policies regarding illegal immigrants described as unfair to immigrants who have waited to become citizens the legal way
            - Culture/society: This story is about societal-wide factors or consequences related to immigration, including:; Immigration as a threat to American cultural identity, way of living, the predominance of English and Christianity, etc.; Immigrants as isolated from the rest of America, unable to assimilate into communities; Immigration as part of the celebrated history of immigration in America / America-as-melting-pot; Immigration policies as exemplars of society's immorality; Impact of immigration on a specific subculture/community in the US
            - Politics:The story is mainly about the political issues around immigration, including: Political campaigns and upcoming elections (e.g., using immigration as a wedge issue or motivating force to get people to the polls); Fighting between the Democratic and Republican parties, or politicians; One political party or one politician’s stance on immigration. Therefore, when the news headline mentions a politician’s name, it often indicates the theme of politics
            - Legislation/regulation: The story is about issues related to regulating immigration through legislation and other institutional measures: New immigration legislation being introduced/argued over; Flaws in current/old legislation; Enforcement of current legislation
            - Public opinion: The study is about the public’s, including a specific community’s, reactions to immigration-related issues, including: Public opinion polls; Protests; Social media backlash; Community outrage; Celebrity responses/protests
            Answer using the following format [0, 0, 0, 0, 0, 0, 0, 0]. Do not provide any other information'''


api_key = "xxxxxx"

In [59]:
D2_GPT_4o_inital_lableing = classification_GPT(
    text_path="Demo_data/D2_1.xlsx",
    category=["0", "1"],
    prompt=prompt_D2_GPT,          
    column_4_labeling=["Post_Title"],
    model="gpt-4o-mini",
    api_key=api_key,
    temperature=0.8,
    mode="text",
    output_column_name="D2_GPT_4o_initial_labeling",
    num_themes=8,
    num_votes=3,
)



D2_GPT_4o_inital_lableing.to_csv("Demo_result/D2_GPT_4o_inital_lableing.csv", index=False)

Classifying text_class: 100%|██████████| 161/161 [02:35<00:00,  1.04item/s]


In [60]:
D2_GPT_4o_inital_lableing

Unnamed: 0,ID,ID_original,GUID,Date (GMT),URL,Post_Title,Q1,Q2,Q3_1_og,Q3_2_og,...,D2_GPT_4o_initial_labeling,D2_GPT_4o_initial_labeling_raw,D2_GPT_4o_initial_labeling_1,D2_GPT_4o_initial_labeling_2,D2_GPT_4o_initial_labeling_3,D2_GPT_4o_initial_labeling_4,D2_GPT_4o_initial_labeling_5,D2_GPT_4o_initial_labeling_6,D2_GPT_4o_initial_labeling_7,D2_GPT_4o_initial_labeling_8
0,1396,6481,585fc69e-51da-4017-bbfe-e104b8880f52,2018-01-06 09:17:47,https://www.yahoo.com/news/trump-administratio...,Trump administration considers eliminating imm...,1,2,6,7,...,"[0, 0, 0, 0, 0, 1, 1, 0]","[0, 0, 0, 0, 0, 1, 1, 0]",0,0,0,0,0,1,1,0
1,354,1728,9d429eec-01ad-4a6d-aad1-5d961d88baa1,2018-01-05 20:42:02,https://www.newsmax.com/newsfront/us-san-franc...,Immigrant Acquitted of Killing Is Sentenced fo...,1,1,2,99,...,"[0, 1, 0, 0, 0, 0, 0, 0]","[0, 1, 0, 0, 0, 0, 0, 0]",0,1,0,0,0,0,0,0
2,1336,6176,d27c43fa-c5e3-41a0-9f89-ef7922db4bf3,2018-01-14 13:05:06,http://www.latimes.com/nation/sns-bc-eu--europ...,Pope: It's a sin if fear makes us hostile to m...,2,99,99,99,...,"[0, 0, 0, 0, 0, 0, 0, 1]","[0, 0, 0, 0, 0, 0, 0, 1]",0,0,0,0,0,0,0,1
3,907,4375,ce5a0f99-1795-4e65-b6f1-6352b753b9c0,2018-01-12 00:14:46,https://www.yahoo.com/news/m/3dfd9bf2-167d-398...,ICE agents descend on dozens of 7-Eleven store...,1,2,2,1,...,"[0, 1, 0, 0, 0, 0, 0, 0]","[0, 1, 0, 0, 0, 0, 0, 0]",0,1,0,0,0,0,0,0
4,1291,5956,73c5fca9-683d-4292-9b5a-879f8df0e7b1,2018-01-10 17:20:29,http://www.breitbart.com/big-hollywood/2018/01...,James Woods Warns Trump: 'If You Fold on Immig...,1,2,6,99,...,"[0, 0, 0, 0, 0, 1, 0, 0]","[0, 0, 0, 0, 0, 1, 0, 0]",0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156,555,2708,0184f6ac-18d3-4de1-b30d-bc760cc8c912,2018-01-05 10:35:56,http://www.breitbart.com/london/2018/01/05/loc...,Study: Locally-Rooted People Affected by Immig...,2,99,99,99,...,"[0, 0, 0, 0, 0, 0, 0, 1]","[0, 0, 0, 0, 0, 0, 0, 1]",0,0,0,0,0,0,0,1
157,1676,7782,c2387d10-a75b-4c9b-b020-f697f7efb2ed,2018-01-07 00:00:00,https://www.yahoo.com/news/m/7a5c41e4-2df1-3b0...,At least 8 migrants drown after dinghy sinks o...,2,99,99,99,...,"[0, 0, 0, 1, 0, 0, 0, 0]","[0, 0, 0, 1, 0, 0, 0, 0]",0,0,0,1,0,0,0,0
158,584,2882,66a4d708-50fa-4d04-b849-fdf7afa19bec,2018-01-10 23:24:03,http://www.chicagotribune.com/news/opinion/edi...,Editorial: Trump goes looking for an immigrati...,1,2,6,7,...,"[0, 0, 0, 0, 0, 1, 0, 0]","[0, 0, 0, 0, 0, 1, 0, 0]",0,0,0,0,0,1,0,0
159,1643,7610,dda8c2df-588a-4c90-a25a-c929a99173c1,2018-01-11 13:03:10,https://www.huffingtonpost.com/entry/immigrati...,Immigration Agents Raid Nearly 100 7-Eleven St...,1,2,1,99,...,"[0, 1, 0, 0, 0, 0, 0, 0]","[0, 1, 0, 0, 0, 0, 0, 0]",0,1,0,0,0,0,0,0


In [61]:
auto_verification(
    D2_GPT_4o_inital_lableing,
    predicted_cols=[
        "D2_GPT_4o_initial_labeling_1",
        "D2_GPT_4o_initial_labeling_2",
        "D2_GPT_4o_initial_labeling_3",
        "D2_GPT_4o_initial_labeling_4",
        "D2_GPT_4o_initial_labeling_5",
        "D2_GPT_4o_initial_labeling_6",
        "D2_GPT_4o_initial_labeling_7",
        "D2_GPT_4o_initial_labeling_8"
    ],
    true_cols=[
        "Q3_1", "Q3_2", "Q3_3", "Q3_4",
        "Q3_5", "Q3_6", "Q3_7", "Q3_8"
    ],
    category=category_D2_GPT
)



== Verification of 'D2_GPT_4o_initial_labeling_1' vs. 'Q3_1' ==
Accuracy:   93.17%
Macro F1:   55.91%
Micro  F1:  93.17%

Full classification report:
              precision    recall  f1-score   support

           0       0.95      0.98      0.96       152
           1       0.25      0.11      0.15         9

    accuracy                           0.93       161
   macro avg       0.60      0.55      0.56       161
weighted avg       0.91      0.93      0.92       161


Confusion matrix:
[[149   3]
 [  8   1]]

== Verification of 'D2_GPT_4o_initial_labeling_2' vs. 'Q3_2' ==
Accuracy:   91.30%
Macro F1:   81.57%
Micro  F1:  91.30%

Full classification report:
              precision    recall  f1-score   support

           0       0.95      0.95      0.95       139
           1       0.68      0.68      0.68        22

    accuracy                           0.91       161
   macro avg       0.82      0.82      0.82       161
weighted avg       0.91      0.91      0.91       161


C

### finetune: GPT 4o

In [62]:
# Prepare the data for GPT-4o 
# The JSONL should have three parts: 
# 1. system_prompt: coding instruction
# 2. input_col: The information needed to be labeled
# 3. label_col: the label

generate_GPT_finetune_jsonl(D2_GPT_4o_inital_lableing, 
                        output_path="Demo_result/D2_GPT_4o_inital_lableing.jsonl", 
                        system_prompt = prompt_D2_GPT,
                        input_col = ["Post_Title"],
                        label_col=["Q3_clean"])


In [63]:
# Fine-tune GPT-4o
D2_GPT_4o_model_finetune = finetune_GPT(
    training_file_path="Demo_result/D2_GPT_4o_inital_lableing.jsonl",
    model="gpt-4o-mini-2024-07-18",  
    hyperparameters={"batch_size":8, "learning_rate_multiplier":0.01},
    api_key= api_key 
)

Started fine-tune job ftjob-JMPqOUy8iOe192Zo17Po19ZG
[0s] status=validating_files
[15s] status=validating_files
[30s] status=validating_files
[45s] status=validating_files
[60s] status=validating_files
[75s] status=validating_files
[90s] status=validating_files
[105s] status=running
[120s] status=running
[135s] status=running
[150s] status=running
[165s] status=running
[180s] status=running
[195s] status=running
[210s] status=running
[225s] status=running
[240s] status=running
[255s] status=running
[270s] status=running
[285s] status=running
[300s] status=running
[315s] status=running
[330s] status=running
[345s] status=succeeded
✅ succeeded: ft:gpt-4o-mini-2024-07-18:jcs-research::BWQFmIyA


In [64]:
D2_GPT_4o_model_finetune

'ft:gpt-4o-mini-2024-07-18:jcs-research::BWQFmIyA'

In [66]:
# test the fineune model to see the example result


response = client.responses.create(
  model=D2_GPT_4o_model_finetune,
  input=[
    {
      "role": "developer",
      "content": [
        {
          "type": "input_text",
          "text": prompt_D2_GPT,

            }
      ]
    },
    {
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": "\"\"\" Our guide to the city's best classical music and opera.\"\"\"\n      "
        }
      ]
    }
  ],
  text={
    "format": {
      "type": "text"
    }
  },

  tools=[],
  store=True
)

In [67]:
response

Response(id='resp_6822212502fc8191a87dfea4a27d852a091f50e941fefce6', created_at=1747067173.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='ft:gpt-4o-mini-2024-07-18:jcs-research::BWQFmIyA', object='response', output=[ResponseOutputMessage(id='msg_68222125e27c81918ff6e4b202d95579091f50e941fefce6', content=[ResponseOutputText(annotations=[], text='[0, 0, 0, 0, 1, 0, 0, 0]', type='output_text')], role='assistant', status='completed', type='message')], parallel_tool_calls=True, temperature=1.0, tool_choice='auto', tools=[], top_p=1.0, max_output_tokens=None, previous_response_id=None, reasoning=Reasoning(effort=None, generate_summary=None, summary=None), status='completed', text=ResponseTextConfig(format=ResponseFormatText(type='text')), truncation='disabled', usage=ResponseUsage(input_tokens=682, input_tokens_details=InputTokensDetails(cached_tokens=0), output_tokens=25, output_tokens_details=OutputTokensDetails(reasoning_tokens=0), total_tokens=707), user=N

In [71]:
# Classify with fine‑tuned 4o model
D2_GPT_4o_finetune = classification_GPT(
    text_path="Demo_data/D2_2.xlsx",
    category = category_D2_GPT,
    prompt = prompt_D2_GPT,
    column_4_labeling=["Post_Title"],
    model = D2_GPT_4o_model_finetune,
    api_key = api_key,
    temperature = 0.8,
    mode = "text",
    output_column_name="D2_GPT_4o_finetune",
    num_themes = 8,
    num_votes = 1)



D2_GPT_4o_finetune.to_csv("Demo_result/D2_GPT_4o_finetune.csv", index=False)

Classifying text_class: 100%|██████████| 160/160 [03:15<00:00,  1.22s/item]


In [72]:
D2_GPT_4o_finetune

Unnamed: 0,ID,ID_original,GUID,Date (GMT),URL,Post_Title,Q1,Q2,Q3_1_og,Q3_2_og,...,D2_GPT_4o_finetune,D2_GPT_4o_finetune_raw,D2_GPT_4o_finetune_1,D2_GPT_4o_finetune_2,D2_GPT_4o_finetune_3,D2_GPT_4o_finetune_4,D2_GPT_4o_finetune_5,D2_GPT_4o_finetune_6,D2_GPT_4o_finetune_7,D2_GPT_4o_finetune_8
0,989,4719,f10212aa-31cc-4748-a0e8-c0a3db3a1a98,2018-01-08 14:17:04,http://www.newsweek.com/clean-dream-act-hunger...,Jailed Immigrants Launch Hunger Strike until C...,1,2,8,7,...,"[0, 0, 0, 1, 0, 1, 0, 0]","[0, 0, 0, 1, 0, 1, 0, 0]",0,0,0,1,0,1,0,0
1,110,552,acf6c4c2-ce94-4c25-9fe3-aaa8b059786b,2018-01-10 00:40:32,http://thehill.com/podcasts/hillcast/368157-li...,Listen: Trump's unusual immigration meeting,1,2,6,99,...,"[0, 0, 0, 0, 0, 1, 0, 0]","[0, 0, 0, 0, 0, 1, 0, 0]",0,0,0,0,0,1,0,0
2,1166,5514,b8c69f81-c592-4d18-ae3d-f3c86706e509,2018-01-10 14:24:58,https://www.yahoo.com/news/m/f154972f-3b1e-399...,Not losing it: Trump lets live TV cover White ...,1,2,6,99,...,"[0, 0, 0, 0, 0, 1, 0, 0]","[0, 0, 0, 0, 0, 1, 0, 0]",0,0,0,0,0,1,0,0
3,1084,5146,54f419c7-91a0-4ec5-8370-4cb6e35da226,2018-01-11 15:55:29,https://www.washingtonpost.com/politics/4th-im...,4th immigrant teen in custody says US preventi...,1,1,3,4,...,"[0, 0, 0, 1, 0, 0, 0, 0]","[0, 0, 0, 1, 0, 0, 0, 0]",0,0,0,1,0,0,0,0
4,1467,6801,b4121087-567e-4532-89f3-dcab7b7a9d54,2018-01-09 03:06:09,http://www.breitbart.com/big-government/2018/0...,Jeff Flake Admits GOP Establishment Working Ag...,1,2,6,7,...,"[0, 0, 0, 0, 0, 1, 0, 0]","[0, 0, 0, 0, 0, 1, 0, 0]",0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,1553,7187,b4291bc4-6959-47ff-88f1-c50af55c818f,2018-01-08 22:57:55,https://www.nytimes.com/aponline/2018/01/08/wo...,South Korean Officials Head to Border for Talk...,2,99,99,99,...,"[0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0
156,1570,7266,2ed132c4-cc5d-45fa-9e03-99fc5a1fb83f,2018-01-03 12:57:54,http://www.latimes.com/world/la-fg-korea-commu...,North Korea reopens cross-border communication...,2,99,99,99,...,"[0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0
157,1813,8348,7ff33a43-9cb4-408b-acc1-69851d62e447,2018-01-04 16:25:26,http://www.newsweek.com/sanctuary-city-mayor-s...,Sanctuary City Mayor Stands Up to Immigration ...,1,2,6,99,...,"[0, 0, 0, 0, 0, 1, 0, 0]","[0, 0, 0, 0, 0, 1, 0, 0]",0,0,0,0,0,1,0,0
158,1576,7281,a5a10fc0-e6ff-4bda-ab00-324867aed841,2018-01-10 02:34:25,http://www.theblaze.com/news/2018/01/09/tucker...,Tucker Carlson rips into Trump for trusting De...,1,2,6,7,...,"[0, 0, 0, 0, 0, 1, 0, 0]","[0, 0, 0, 0, 0, 1, 0, 0]",0,0,0,0,0,1,0,0


In [74]:
auto_verification(
    D2_GPT_4o_finetune,
    predicted_cols=[
        "D2_GPT_4o_finetune_1",
        "D2_GPT_4o_finetune_2",
        "D2_GPT_4o_finetune_3",
        "D2_GPT_4o_finetune_4",
        "D2_GPT_4o_finetune_5",
        "D2_GPT_4o_finetune_6",
        "D2_GPT_4o_finetune_7",
        "D2_GPT_4o_finetune_8"
    ],
    true_cols=[
        "Q3_1", "Q3_2", "Q3_3", "Q3_4",
        "Q3_5", "Q3_6", "Q3_7", "Q3_8"
    ],
    category=category_D2_GPT
)



== Verification of 'D2_GPT_4o_finetune_1' vs. 'Q3_1' ==
Accuracy:   91.25%
Macro F1:   53.95%
Micro  F1:  91.25%

Full classification report:
              precision    recall  f1-score   support

           0       0.93      0.98      0.95       148
           1       0.25      0.08      0.12        12

    accuracy                           0.91       160
   macro avg       0.59      0.53      0.54       160
weighted avg       0.88      0.91      0.89       160


Confusion matrix:
[[145   3]
 [ 11   1]]

== Verification of 'D2_GPT_4o_finetune_2' vs. 'Q3_2' ==
Accuracy:   88.75%
Macro F1:   68.75%
Micro  F1:  88.75%

Full classification report:
              precision    recall  f1-score   support

           0       0.98      0.90      0.94       150
           1       0.32      0.70      0.44        10

    accuracy                           0.89       160
   macro avg       0.65      0.80      0.69       160
weighted avg       0.94      0.89      0.91       160


Confusion matrix:

## o4-mini (reasning model)

##### The structure and usage are identical to the previous example. However, instead of adjusting the temperature parameter to control randomness, the effort parameter is used to influence the model's reasoning depth and complexity.


In [None]:
D2_GPT_o4_initial_labeling = classification_GPT(
    text_path="Text_multiple_category_1.xlsx",
    category=["0", "1"],
    prompt=prompt_D2_GPT,          
    column_4_labeling=["Post_Title"],
    model="gpt-4o-mini",
    api_key=api_key,
    effort="medium",
    mode="text",
    output_column_name="D2_GPT_o4_initial_labeling",
    num_themes=8,
    num_votes=3,
)



D2_GPT_4o_inital_lableing.to_csv("Demo_result/D2_GPT_o4_initial_labeling.csv", index=False)

In [None]:
auto_verification(
    D2_GPT_4o_inital_lableing,
    predicted_cols=[
        "D2_GPT_o4_initial_labeling_1",
        "D2_GPT_o4_initial_labeling_2",
        "D2_GPT_o4_initial_labeling_3",
        "D2_GPT_o4_initial_labeling_4",
        "D2_GPT_o4_initial_labeling_5",
        "D2_GPT_o4_initial_labeling_6",
        "D2_GPT_o4_initial_labeling_7",
        "D2_GPT_o4_initial_labeling_8"
    ],
    true_cols=[
        "Q3_1", "Q3_2", "Q3_3", "Q3_4",
        "Q3_5", "Q3_6", "Q3_7", "Q3_8"
    ],
    category=category_D2_GPT
)


#### Note: GPT reasnong models (i.e., o-series) can not be finetuned for now