# summary

this notebook is built to classify all the images in landscape1

# imports

this section imports all relevant libraries, the CLIP model, and the earcon tags from the earcon dataset

In [1]:
import torch
import clip
from PIL import Image
import matplotlib.pyplot as plt
import warnings
import pandas as pd

warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
# Load the model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device)

In [None]:
# load in earcon dataset
import os

if os.path.isfile("../dataset/earcon_dataset/earcon_dataset.xlsx"):
    earcon_dataset = pd.read_excel('../dataset/earcon_dataset/earcon_dataset.xlsx')

earcon_dataset.head()

In [None]:
earcon_tags = earcon_dataset['tags'].tolist()

taglist = []
for element in earcon_tags:
    temp = element.replace("[", "")
    temp = temp.replace("]", "")
    temp = temp.replace("'", "")
    temp = temp.split(", ")
    # print(temp)
    for tag in temp:
        tag = tag.strip()
        tag = tag.lower()
        if tag not in taglist:
            taglist.append(tag)

print(f"There are {len(taglist)} unique tags in the dataset")
print(f"Here are the first 10 tags: {taglist[:10]}")

# load paths

this section crawls all the images so that we have a list of image files that we can use

In [None]:
# test the code using validation coast from landscape dataset 1

sub_folders = [
    "Coast",
    "Desert",
    "Forest",
    "Glacier",
    "Mountain"
]

split = [
    "test",
    "train",
    "validation"
]

folders = [
    "../dataset/landscape1/Testing Data/",
    "../dataset/landscape1/Training Data/",
    "../dataset/landscape1/Validation Data/",
]

image_paths = []

# for filepath in folders:
for i in range(len(folders)):
    for folder in sub_folders:
        for item in os.scandir(folders[i] + folder):
            image_paths.append({"split": split[i], "folder": folder, "filename": item.name, "filepath": item.path})

image_paths = pd.DataFrame(image_paths)
image_paths.to_csv("../dataset/landscape1/csvs/image_paths.csv", index=False)
image_paths.head()

In [6]:
# tokenize the tags
text_tokens = clip.tokenize(taglist).to(device)

In [None]:
# List to store the image classification results
image_classification = []

splits = image_paths["split"].tolist()
folders = image_paths["folder"].tolist()
filenames = image_paths["filename"].tolist()
paths = image_paths["filepath"].tolist()

# Forward pass for each image
for i in range(len(paths)):
    # Load and preprocess the image
    image = preprocess(Image.open(paths[i])).unsqueeze(0).to(device)

    # Forward pass to get image and text features
    with torch.no_grad():
        image_features = model.encode_image(image)
        text_features = model.encode_text(text_tokens)

    # Normalize features to compare cosine similarity
    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)

    # Compute similarity between the image and text prompts
    similarities = (100.0 * image_features @ text_features.T).softmax(dim=-1)

    # Get the top 100 predictions (tags and similarity scores)
    top_preds = torch.topk(similarities, 100)
    top_indices = top_preds.indices.squeeze(0).tolist()
    top_scores = top_preds.values.squeeze(0).tolist()

    # Create a dictionary entry for the image classification
    image_classification.append({
        "split": splits[i],
        "folder": folders[i],
        "filename": filenames[i],
        "image_path": paths[i],
        "top_tags": [taglist[i] for i in top_indices],
        "similarity_scores": top_scores
    })
    
    if (i+1) % 100 == 0:
        print(f"Processed {i+1}/{len(paths)} images")

# Convert the list of dictionaries to a pandas DataFrame
df = pd.DataFrame(image_classification)

# Save to CSV
df.to_csv("../dataset/landscape1/csvs/image_classification.csv", index=False)