In [3]:
!pip install transformers torch Pillow

Collecting transformers
  Using cached transformers-4.47.1-py3-none-any.whl.metadata (44 kB)
Collecting huggingface-hub<1.0,>=0.24.0 (from transformers)
  Downloading huggingface_hub-0.27.1-py3-none-any.whl.metadata (13 kB)
Collecting regex!=2019.12.17 (from transformers)
  Using cached regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Using cached tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Downloading safetensors-0.5.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Using cached transformers-4.47.1-py3-none-any.whl (10.1 MB)
Downloading huggingface_hub-0.27.1-py3-none-any.whl (450 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m450.7/450.7 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hUsing cached regex-2024.11.6-cp311-

In [1]:
%reset -f

In [6]:
from transformers import AutoImageProcessor, ResNetForImageClassification
from PIL import Image
from torchvision import models, transforms
import torch
import os

In [7]:
processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")

In [14]:
def classify_image(image_path):#, top_k=1): # model and processor already loaded
    
    # Open and process the image
    image = Image.open(image_path)
    inputs = processor(image, return_tensors="pt")

    # Run inference
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits

    # Get top-k predictions
    #topk_values, topk_indices = torch.topk(logits, k=top_k)
    #topk_labels = [model.config.id2label[idx.item()] for idx in topk_indices[0]]
    #topk_probs = torch.nn.functional.softmax(topk_values, dim=-1)[0]
    predicted_label = logits.argmax(-1).item()

    # Return a list of (class, probability) tuples
    #return list(zip(topk_labels, topk_probs.tolist()))
    return (model.config.id2label[predicted_label])

# Testing one image

In [11]:
# Example usage:
image_path = "/exchange/dspro01/group3/data/test/coyote/TAG-TC29_11_17_2015_TAG-TC29_0002602.JPG"
results = classify_image(image_path)#, top_k=3)
print(f'results: {results}')
"""for class_name, probability in results:
    print(f"{class_name}: {probability:.2%}")
"""

results: grey fox, gray fox, Urocyon cinereoargenteus


'for class_name, probability in results:\n    print(f"{class_name}: {probability:.2%}")\n'

In [12]:
results

'grey fox, gray fox, Urocyon cinereoargenteus'

# Test set

In [27]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # Values of ImageNet  
])

In [16]:
# Lists to store labels and predictions
all_labels = []
all_predictions_no_training = []

# Define the path
base_path = "/exchange/dspro01/group3/data/test/"

# Process each folder and classify images
for folder_name in os.listdir(base_path):
    folder_path = os.path.join(base_path, folder_name)
    if os.path.isdir(folder_path):
        print(f"Processing folder: {folder_name}")
        image_count = 0 
        for image_name in os.listdir(folder_path):
            image_path = os.path.join(folder_path, image_name)
            if image_name.lower().endswith(('png', 'jpg', 'jpeg')):

                if image_count >= 50:
                    # If we've already processed 50 images, stop further processing in this folder
                    break
                    
                try:
                    result = classify_image(image_path)# classify_image(image_path, top_k=1)
                    all_labels.append(folder_name)
                    all_predictions_no_training.append(result)  # result[0][0]vStore predicted label
                    
                    print(f"Image: {image_name}, Predicted Class: {result}")

                    image_count += 1
                    
                except Exception as e:
                    print(f"Error processing {image_path}: {e}")


Processing folder: american black bear
Image: 2015_Unit102_SWTB031_img0348.jpg, Predicted Class: wombat
Image: CA-42_08_05_2016_CA-42_0020300.JPG, Predicted Class: chimpanzee, chimp, Pan troglodytes
Image: CA-39_10_16_2015_CA-39_0010011.jpg, Predicted Class: armadillo
Image: CA-20_09_07_2016_CA-20_0024990.JPG, Predicted Class: American black bear, black bear, Ursus americanus, Euarctos americanus
Image: CA-17_05_10_2016_CA-17_0031958.JPG, Predicted Class: gorilla, Gorilla gorilla
Image: 2016_Unit058_Ivan074_img0915.jpg, Predicted Class: suspension bridge
Image: CA-24_08_12_2015_CA-24_0008537.jpg, Predicted Class: llama
Image: 2014_Unit6_Ivan134_img1015.jpg, Predicted Class: brown bear, bruin, Ursus arctos
Image: CA-42_11_14_2015_CA-42_0012066.jpg, Predicted Class: capuchin, ringtail, Cebus capucinus
Image: CA-13_10_05_2016_CA-13_0012359.JPG, Predicted Class: chimpanzee, chimp, Pan troglodytes
Image: CA-08_08_04_2015_CA-08_0002523.jpg, Predicted Class: American black bear, black bear, U

In [17]:
import pandas as pd
df = pd.DataFrame({
    'Label': all_labels,
    'Prediction': all_predictions_no_training
})

# Save the DataFrame to a CSV file
filename = f'predictions_model_no_training.csv'
df.to_csv(filename, index=False)

In [3]:
import pandas as pd
predictions_filename = 'predictions_model_no_training.csv'
df = pd.read_csv(predictions_filename)
df.head()

Unnamed: 0,Label,Prediction
0,american black bear,wombat
1,american black bear,"chimpanzee, chimp, Pan troglodytes"
2,american black bear,armadillo
3,american black bear,"American black bear, black bear, Ursus america..."
4,american black bear,"gorilla, Gorilla gorilla"


In [10]:
df['Prediction_short'] = df['Prediction'].str.split(',', n=1).str.get(0)

In [30]:
df['Comparison'] = df['Label'].str.lower() == df['Prediction_short'].str.lower()

# Count true matches

In [41]:
true_counts = []
for i in range(0, len(df), 50):
    chunk = df['Comparison'].iloc[i:i + 50]
    true_counts.append(chunk.sum())

In [37]:
true_counts

[16, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 18]

# Count less specific animal 
Example: does not contain "American black bear" but contains "bear"

Bear

In [58]:
count_bear = 0 
for i in range(50):
    if 'bear' in str(df['Prediction_short'].iloc[i]).lower():  # Use .iloc to access individual entries
        count_bear += 1

print(f'Count "bear": {count_bear}')
print(f'Without true counts: {count_bear - true_counts[0]}')

Count "bear": 20
Without true counts: 4


In [63]:
count_squirrel = 0 
for i in range(50,100):
    if 'squirrel' in str(df['Prediction_short'].iloc[i]).lower():  # Use .iloc to access individual entries
        count_bear += 1
print(f'Count "squirrel": {count_squirrel}')
print(f'Without true counts: {count_squirrel - true_counts[2]}')


Count "squirrel": 0
Without true counts: 0


In [65]:
count_skunk = 0 
for i in range(450,500):
    if 'squirrel' in str(df['Prediction_short'].iloc[i]).lower():  # Use .iloc to access individual entries
        count_bear += 1
print(f'Count "squirrel": {count_skunk}')
print(f'Without true counts: {count_skunk - true_counts[-3]}')


Count "squirrel": 0
Without true counts: 0


# Count unique labels per category

In [44]:
chunk_size = 50
unique_labels_per_category = []

for i in range(0, len(df), chunk_size):
    chunk = df['Prediction_short'].iloc[i:i + chunk_size]
    unique_labels = chunk.unique() 
    chunk_label = df['Label'].iloc[i]
    unique_labels_per_category.append((chunk_label, unique_labels))


In [50]:
unique_labels_count = []
for i in unique_labels_per_category:
    unique_labels_count.append(len(i[1]))

In [51]:
unique_labels_count

[26, 20, 28, 22, 25, 32, 9, 21, 29, 10, 20, 24]

In [61]:
class_names = sorted(['American Black Bear', 'California Ground Squirrel', 'Elk', 'Gray Fox', 'Red Deer', 'Unidentified Deer', 'Bobcat', 'Coyote', 'Empty', 'Mule Deer', 'Striped Skunk', 'Wild Boar'])