In [1]:
vlm_file  = "40_results_vlm.jsonl"
clip_file = "zeroshot_40sample_results.jsonl"

In [2]:
import pandas as pd, json, os

In [3]:
df_clip = pd.read_json(clip_file, lines=True)
df_clip = df_clip.rename(columns={
    "path": "image",
    "label": "clip_label",
    "confidence": "clip_conf"
})


In [4]:
df_vlm = pd.read_json(vlm_file, lines=True)
df_vlm = pd.concat([df_vlm.drop(["result"], axis=1), df_vlm["result"].apply(pd.Series)], axis=1)
df_vlm = df_vlm.rename(columns={
    "scene_type": "vlm_label",
    "confidence_0_1": "vlm_conf"
})



In [6]:
# --- Extract filename keys (ignore path + extension) ---
df_clip["image_key"] = df_clip["image"].apply(lambda x: os.path.splitext(os.path.basename(str(x)))[0])
df_vlm["image_key"]  = df_vlm["image"].apply(lambda x: os.path.splitext(os.path.basename(str(x)))[0])

# Quick label counts before merge
print("CLIP label distribution:\n", df_clip["clip_label"].value_counts(), "\n")
print("VLM label distribution:\n", df_vlm["vlm_label"].value_counts(), "\n")


CLIP label distribution:
 clip_label
unknown             17
MEDIA_PROPAGANDA    12
PROTEST_STREET      11
Name: count, dtype: int64 

VLM label distribution:
 vlm_label
unknown                11
PROTEST_STREET         11
MEDIA_PROPAGANDA       10
CAMPUS_EVENT            5
INSTITUTIONAL_STAGE     3
Name: count, dtype: int64 



In [15]:
merged = pd.merge(df_clip, df_vlm, on="image_key", how="inner")
print(f"✅ Merged {len(merged)} images successfully")

merged[["image_key", "clip_label", "vlm_label", "clip_conf", "vlm_conf", "caption_20w"]]


✅ Merged 40 images successfully


Unnamed: 0,image_key,clip_label,vlm_label,clip_conf,vlm_conf,caption_20w
0,2848410766023731083__284529430_340934021319534_2343800145217192904_n,unknown,CAMPUS_EVENT,0.743,0.0,a group of people sitting in chairs in a room
1,2848410766023731083__284645805_787670909264484_7131751097966912843_n,PROTEST_STREET,PROTEST_STREET,0.952,0.9,Group of people protesting for better UCLA transit system
2,2849126906832291056__284356575_713238196573997_2663972127010493414_n,unknown,CAMPUS_EVENT,0.509,0.0,MTSAC MSA 2ND GENERAL MEETING
3,2849656145477240143__284645804_755471129155733_5979343260691737113_n,unknown,MEDIA_PROPAGANDA,0.268,0.9,Statement of Opposition to SB1412
4,2983720566764124471__501685915_1256993622499792_3932324816662002823_n,PROTEST_STREET,PROTEST_STREET,0.946,0.9,People protesting in the street with signs
5,451343914_496616053018135_2586767934785121431_n,MEDIA_PROPAGANDA,unknown,0.948,0.0,woman wearing hijab speaking
6,452842052_1017585399767829_1218373299653330229_n,unknown,MEDIA_PROPAGANDA,0.659,0.8,Woman speaking at CPOC event
7,452856948_815909363643449_8261195655921378787_n,MEDIA_PROPAGANDA,CAMPUS_EVENT,0.803,0.0,diver on platform at Paris 2024 Olympics
8,452857080_7906272059485882_13013324345258530_n,unknown,CAMPUS_EVENT,0.664,0.0,music class in session
9,452859706_502781995554325_267712069011515191_n,PROTEST_STREET,PROTEST_STREET,0.912,0.9,Two women protest on a bridge with a sign criticizing Israel


In [8]:
# How many images both models labeled as "unknown"
both_unknown = merged[
    (merged["clip_label"].str.lower() == "unknown") &
    (merged["vlm_label"].str.lower() == "unknown")
]

print(f"🟡 Both CLIP and VLM labeled 'unknown': {len(both_unknown)} images")


🟡 Both CLIP and VLM labeled 'unknown': 5 images


In [11]:
# Count how many images share the same label in both models
overlap_counts = merged[merged["clip_label"] == merged["vlm_label"]]["clip_label"].value_counts()

print(" Overlapping label counts (same label in both models):")
print(overlap_counts)

 Overlapping label counts (same label in both models):
clip_label
PROTEST_STREET      11
unknown              5
MEDIA_PROPAGANDA     5
Name: count, dtype: int64


In [13]:
unknown_captions = merged[merged["vlm_label"] == "unknown"][["image_key", "vlm_label", "vlm_conf", "caption_20w"]]
print(f"🟡 {len(unknown_captions)} images labeled 'unknown' by VLM\n")
pd.set_option('max_colwidth', None)
display(unknown_captions)

🟡 11 images labeled 'unknown' by VLM



Unnamed: 0,image_key,vlm_label,vlm_conf,caption_20w
5,451343914_496616053018135_2586767934785121431_n,unknown,0.0,woman wearing hijab speaking
12,453572435_7995532140534527_544302348809111960_n,unknown,0.0,
13,453575633_1183387739531482_2309473526383923461_n,unknown,0.0,newborn baby sleeping
14,453583954_1550032105600963_4043760404323128717_n,unknown,0.0,
16,453649574_1672755530191245_5293498686781486983_n,unknown,0.0,
20,453711520_952804193286259_7974871928834171757_n,unknown,0.0,a pile of trash and garbage on the ground
21,453714419_1711852752917390_4947390332304106168_n,unknown,0.0,
28,453813678_404742955425744_3330767030418778740_n,unknown,0.0,
31,453888340_1583134279277204_878036164309545409_n,unknown,0.0,Boy standing against wall with text overlay
36,453963858_1380113316725389_3849273822277756344_n,unknown,0.0,young boy with text overlay
