In [None]:
# Classification 

from ultralytics import YOLO
import cv2

# Load model
model = YOLO("yolo11n-cls.pt")  

# View classes
model.names # ImageNet classes

# Predict
results = model.predict(
  "scenes_sdf/sdf_009.jpg",
  device='cpu'
  )

In [None]:
# View predictions
results[0].show()

# Save prediction image
results[0].save("sdf_009_classify.jpg")

# Save predictions to txt
results[0].save_txt("sdf_009.txt")

In [None]:
import glob, os
import pandas as pd

# Predict on multiple files
imgs = glob.glob('scenes_sdf/*.jpg')
results = model.predict(imgs, device='cpu')

In [None]:
# Save prediction images and txt files
os.mkdir("scenes_sdf_classify")
for result in results:
  path = result.path
  newpath = path.replace("scenes_sdf", "scenes_sdf_classify")
  result.save(newpath)
  txtpath = newpath.replace("jpg", "txt")
  result.save_txt(txtpath)

In [None]:
# Create df with top prediction
dfs = []
for result in results:
  df_single = result.to_df()
  df_single.insert(0, 'file', result.path)
  dfs.append(df_single)
df = pd.concat(dfs)
df = df.sort_values(by='file')
df.to_csv("sdf_classify.csv", index = False)

In [None]:
# Object detection

from ultralytics import YOLO
import cv2

# Load model
model = YOLO("yolo11n.pt")  

# Predict
results = model.predict("scenes_sdf/sdf_014.jpg")

# View predictions
results[0].show()

# Save image with predictions
results[0].save("sdf_014_object.jpg")

In [None]:
# Predict on multiple files
imgs = glob.glob('scenes_sdf/*.jpg')
results = model.predict(imgs)

# Save prediction images
os.mkdir("scenes_sdf_object")
for result in results:
  path = result.path
  newpath = path.replace("scenes_sdf", "scenes_sdf_object")
  result.save(newpath)

# Save as CSV
dfs = []
for result in results:
  df_single = result.to_df()
  df_single.insert(0, 'file', result.path)
  dfs.append(df_single)
df = pd.concat(dfs)
df = df.sort_values(by='file')
df.to_csv("objects.csv", index = False)
# each row is a detected object

In [None]:
# Instance segmentation

# Load model
model = YOLO('yolo11n-seg.pt')

# Predict
results = model.predict("scenes_sdf/sdf_014.jpg")

# Save image with predictions
results[0].save("sdf_014_segment.jpg")

In [None]:
# Calculate area
from shapely.geometry import Polygon

# Get image area in pixels
dims = results[0].orig_shape
area_total = dims[0] * dims[1]

# Calculate proportions for all masks in image
masks = results[0].masks
area_props = []
for mask in masks:
  msk = mask.xy[0]
  area_pixels = Polygon(msk).area
  prop = area_pixels * 100 / area_total
  prop = round(prop, 2)
  area_props.append(prop)

# Add to dataframe and save as CSV
df = results[0].to_df()
df.insert(0, 'file', results[0].path)
df.insert(5, 'area_prop', area_props)
df.to_csv("segmentation.csv", index = False)

In [None]:
# Finetune classifier
# Assumes subfolders like this:
# data/
# ├── class1/
# │   ├── img1.jpg
# │   ├── img2.jpg
# │   └── ...
# ├── class2/
# │   ├── img3.jpg
# │   ├── img4.jpg
# │   └── ...
# └── class3/
#     ├── img5.jpg
#     ├── img6.jpg
#     └── ...

from fastai.vision.all import *

path = Path('data')

# Create ataLoaders object
dls = ImageDataLoaders.from_folder(
  path, 
  valid_pct=0.2,
  seed=42,
  item_tfms=Resize(224),                                batch_tfms=aug_transforms()
  ) 

# create CNN learner on model (e.g. ResNet34)
learn = vision_learner(dls, resnet34, metrics=accuracy)

# finetune
learn.fine_tune(5)

# view and save
learn.show_results()
learn.save('my_classifier')

# Inference
img = PILImage.create("image.jpg")
prediction, idx, probs = learn.predict(img)
print(f"Prediction: {prediction}, Probability: {probs[idx]:.2f}")

In [None]:
# CLIP
import torch
from transformers import pipeline
import pandas as pd

# load model
clip = pipeline(
    task="zero-shot-image-classification",
    model="openai/clip-vit-base-patch32",
    torch_dtype=torch.bfloat16,
    device=0
)

In [None]:
# Create alternatives
labels = [
  "a photo of a car",
  "a photo of a horse", 
  "a photo of a dog", 
  ]

# Predict
clip("scenes_sdf/sdf_014.jpg", candidate_labels=labels)

In [None]:
# Create alternatives
labels = [
  "a dramatic image",
  "a serene image"
  ]

# Predict
clip("scenes_sdf/sdf_014.jpg", candidate_labels=labels)

In [None]:
clip("scenes_is/is_044.jpg", candidate_labels=labels)

In [None]:
# Pretty print
# for pred in preds:
#   label = pred['label']
#   score = pred['score']
#   label_words = label.split()
#   keyword = label_words[1]
#   print(f"{keyword}: {round(score, 3)}")

In [None]:
# Bulk process 

# Define paths and labels 
imgs = glob.glob('scenes_sdf/*.jpg')
labels = ["a dramatic image", "a serene image"]

# Loop over paths
rows = []
for img in imgs:
  preds = clip(img, candidate_labels=labels)
  label = preds[0]['label']
  keyword = label.split()[1]
  score = round(preds[0]['score'], 3) 
  data = [{
      'file': img,
      'label': label,
      'keyword': keyword,
      'conf': score
    }]
  row = pd.DataFrame(data)
  rows.append(row)

# Merge and save to CSV
df = pd.concat(rows)
df = df.sort_values(by='file')
df.to_csv("clip.csv", index=False)