In [1]:
from datasets import DatasetDict, load_dataset, load_from_disk, ClassLabel, load_metric
from sklearn.preprocessing import LabelEncoder
from transformers import ViTFeatureExtractor
import torch
import numpy as np

In [2]:
data = load_from_disk("Data")

In [3]:
# Get the label column
train_labels = data['train']['Label']
test_labels = data['test']['Label']

# Initialize the LabelEncoder
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)
test_labels_encoded = label_encoder.transform(test_labels)
# Fit and transform the labels in the train split
data['train'] = data['train'].map(lambda example: {'Label': label_encoder.fit_transform(example['Label'])}, batched=True, batch_size=-1)

# Transform the labels in the test split using the already fitted label encoder
data['test'] = data['test'].map(lambda example: {'Label': label_encoder.transform(example['Label'])}, batched=True, batch_size=-1)

# Print the modified data
print(data)


Loading cached processed dataset at c:\College\Research Papers\Spectrogram_based_phonological_analysis\Data\train\cache-4204523ad44b5baa.arrow
Loading cached processed dataset at c:\College\Research Papers\Spectrogram_based_phonological_analysis\Data\test\cache-c96d53d89b247754.arrow


DatasetDict({
    train: Dataset({
        features: ['Image Path', 'Image', 'Label'],
        num_rows: 18340
    })
    test: Dataset({
        features: ['Image Path', 'Image', 'Label'],
        num_rows: 6114
    })
})


In [4]:
model_name_or_path = 'google/vit-base-patch16-224-in21k'
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name_or_path)



In [5]:
def process_example(example):
    inputs = feature_extractor(example['Image'], return_tensors='pt')
    inputs['Label'] = example['Label']
    return inputs

In [6]:
process_example(data['train'][400])

{'pixel_values': tensor([[[[1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          ...,
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.]],

         [[1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          ...,
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.]],

         [[1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          ...,
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.]]]]), 'Label': 15}

In [7]:
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = feature_extractor([x for x in example_batch['Image']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['Label'] = example_batch['Label']
    return inputs

In [8]:
prepared_data = data.with_transform(transform)

In [9]:
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

In [10]:
metric = load_metric("accuracy")
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)

  metric = load_metric("accuracy")
