# Zero-Shot *Vibe* Classification 

In [87]:
import os
import numpy as np
import pandas as pd
from PIL import Image

from transformers import pipeline

In [88]:
# pipeline?

In [89]:
# Initialize the pipeline with multi_label=True
checkpoint = "openai/clip-vit-large-patch14"
detector = pipeline(model=checkpoint, task="zero-shot-image-classification", multi_label=True)




In [90]:
base = '/Users/scampione/MSDS/Spring_24_2/Entrepreneurship/philly_business_photos'
business_ids = os.listdir(base)
business_id_subdirs = [base + '/' + id for id in business_ids if id != '.DS_Store']

business_id_subdirs[0]

'/Users/scampione/MSDS/Spring_24_2/Entrepreneurship/philly_business_photos/9PZxjhTIU7OgPIzuGi89Ew'

In [91]:
# Define candidate labels for zero-shot classification
candidate_labels = ["Coworking Cafe——Coffee shop with people working on laptops, Wi-Fi available", 
                    "Brunch——Outdoor brunch with breakfast and mimosas", 
                    "Romantic Date Night——Romantic restaurant with candlelit tables", 
                    "Upscale Special Occasion——Elegant restaurant with white tablecloths and fine dining", 
                    "Rooftop——Rooftop bar or restaurant with city views", 
                    "Pub——Lively bar with draft beer and sports TV", 
                    "None of the above"]


# Initialize a dictionary to store results
d = {'bid': [], 
     'Coworking Cafe': [], 
     'Brunch': [], 
     'Romantic Date Night': [],
     'Upscale Special Occasion': [], 
     'Rooftop': [], 
     'Pub': [], 
     'None of the above': []}


# Loop through each business directory in the list of subdirectories
for bid in business_id_subdirs:
    d['bid'].append(os.path.basename(bid)) # Extract and append the business ID

    # Initialize lists to collect scores for each category
    coworking = []
    brunch = []
    healthy = []
    romantic = []
    special_occassion = []
    rooftop = []
    pub = []
    none_of_the_above = []

    # Loop through each image file in the current business directory
    for pid in os.listdir(bid):
        if pid.lower().endswith(('.png', '.jpg', '.jpeg')):  # Check if the file is an image
            try:

                image_path = os.path.join(bid, pid)
                image = Image.open(image_path)
                
                # Perform zero-shot classification on the image with specified candidate labels
                score = detector(images=image, 
                                candidate_labels=candidate_labels,
                                num_workers=8
                                )
                
                # Append scores to the appropriate category based on the label
                for x in score:
                    if 'Coworking Cafe' in x['label']:
                        coworking.append(x['score'])
                    elif 'Brunch' in x['label']:
                        brunch.append(x['score'])
                    elif 'Romantic Date Night' in x['label']:
                        romantic.append(x['score'])
                    elif "Upscale Special Occasion" in x['label']:
                        special_occassion.append(x['score'])
                    elif "Rooftop" in x['label']:
                        rooftop.append(x['score'])
                    elif "Pub" in x['label']:
                        pub.append(x['score'])
                    elif "None of the above" in x['label']:
                        none_of_the_above.append(x['score'])

            except Exception as e:
                print(f"Failed to process image {image_path}: {str(e)}")

    # Compute the mean score for each category and store in the dictionary
    d['Coworking Cafe'].append(np.mean(coworking))
    d['Brunch'].append(np.mean(brunch))
    d['Romantic Date Night'].append(np.mean(romantic))
    d['Upscale Special Occasion'].append(np.mean(special_occassion))
    d['Rooftop'].append(np.mean(rooftop))
    d['Pub'].append(np.mean(pub))
    d['None of the above'].append(np.mean(none_of_the_above))


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [92]:
results = pd.DataFrame.from_dict(d)
results.to_csv("zero_shot_scores.csv")

In [93]:
results

Unnamed: 0,bid,Coworking Cafe,Brunch,Romantic Date Night,Upscale Special Occasion,Rooftop,Pub,None of the above
0,9PZxjhTIU7OgPIzuGi89Ew,0.001241,0.040247,0.695568,0.050353,0.037055,0.131736,0.043800
1,dmyPzMRNgTIOBRT11NFcvg,0.521136,0.039084,0.042242,0.000615,0.020861,0.001819,0.374243
2,tcvNBC65Gsa4cH1_gyucpQ,0.084691,0.323881,0.334050,0.227092,0.007418,0.000836,0.022031
3,T2i2ZA2O0I8cp5CuRRO6KA,0.000973,0.048808,0.444152,0.005915,0.017914,0.028210,0.454028
4,K_s-9Wd6vXSfnxYFzhE0Kw,0.009018,0.022997,0.505085,0.323617,0.022227,0.017416,0.099640
...,...,...,...,...,...,...,...,...
1303,khH0QtNyUjcExh9i2CwGfg,0.002614,0.029499,0.492966,0.244986,0.114540,0.068241,0.047155
1304,vuE1iseFrgNPumUEfHIZZQ,0.001837,0.010577,0.816923,0.150016,0.002678,0.002031,0.015938
1305,gEX_kEhYgvLKGJtlY45BmQ,0.003333,0.011433,0.641398,0.218016,0.003322,0.007314,0.115183
1306,0jJf0DNHvYI0itdQSezsgg,0.035239,0.424783,0.007611,0.001710,0.000770,0.000087,0.529800
