In [1]:
import os
import numpy as np
import pandas as pd
import torch
from transformers import Trainer,TrainingArguments,AutoModelForImageClassification,AutoImageProcessor
from datasets import load_dataset
from sklearn.metrics import accuracy_score,precision_score




In [2]:
model_path = r'C:\Users\Aum Thaker\Desktop\VSC\checkpoint-6200'

In [4]:
model_id = 'google/vit-base-patch16-224'

In [5]:
# instantiate the image tookenizer
feature_extractor = AutoImageProcessor.from_pretrained(model_id,trust_remote_code=True)

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


In [6]:
# this block is for preprocessing task 

def preprocess(batch):
    img = [x.convert('RGB') for x in batch['image']]
    inps = feature_extractor(img,return_tensors='pt')
    batch['pixel_values'] = inps['pixel_values']
    # batch['label'] = batch['label']
    return batch

In [7]:
dataset = load_dataset('aum27/mars-terrain-test',split='train')

In [8]:
dataset = dataset.map(preprocess,batched=True)

In [9]:
dataset.set_format(type='torch',columns=['pixel_values'])

In [10]:
id2label = {
    0: 'bright dune',
    1: 'crater',
    2: 'dark dune',
    3: 'impact ejecta',
    4: 'other',
    5: 'slope streak',
    6: 'spider',
    7: 'swiss cheese'
}


In [11]:
label2id = {
    'bright dune': 0,
    'crater': 1,
    'dark dune': 2,
    'impact ejecta': 3,
    'other': 4,
    'slope streak': 5,
    'spider': 6,
    'swiss cheese': 7
}


In [12]:
#instantiate the model 
model = AutoModelForImageClassification.from_pretrained(model_path,num_labels=8,id2label=id2label,label2id=label2id,ignore_mismatched_sizes=True,trust_remote_code=True)

In [13]:
device = torch.device('cuda')
device

device(type='cuda')

In [14]:
model.to(device)

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTSdpaAttention(
            (attention): ViTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_fe

In [15]:
training_args = TrainingArguments(
    output_dir='./output_results_vit',
    per_device_eval_batch_size=8,
) 

In [16]:
def compute_metrics(preds):
    labels = preds.label_ids
    preds = np.argmax(preds.predictions,axis=1)
    acc = accuracy_score(labels,preds)

    prec = precision_score(labels,preds,average='weighted')

    return {'accuracy':acc,'precision':prec}

In [17]:
trainer = Trainer(
    model=model,
    args=training_args,
    eval_dataset=dataset,
    compute_metrics=compute_metrics,
)

In [18]:
preds = trainer.predict(test_dataset=dataset)

  context_layer = torch.nn.functional.scaled_dot_product_attention(


  0%|          | 0/250 [00:00<?, ?it/s]

In [22]:
np.array(preds[0]).shape

(2000, 8)

In [23]:
logits = preds.predictions

In [25]:
probabilities = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)

In [26]:
predicted_labels = np.argmax(probabilities, axis=1)

In [57]:
output = []

In [58]:
for i in range(len(predicted_labels)):
    output.append(id2label[predicted_labels[i]])

In [59]:
id = []

In [60]:
for j in range(len(output)):
    id.append(str(str(j+1)+'.jpg'))

In [61]:
ids_ = pd.DataFrame(id)

In [62]:
df_final_1 = pd.DataFrame(output)

In [63]:
df_final__1_f = pd.concat([ids_,df_final_1],axis=1)

In [65]:
df_final__1_f.head()

Unnamed: 0,0,0.1
0,1.jpg,slope streak
1,2.jpg,crater
2,3.jpg,crater
3,4.jpg,other
4,5.jpg,other


In [67]:
df_final__1_f.to_csv('predictions_final.csv')