In [None]:
import torch
from utils import SensorDataset
import yaml

import librosa
import matplotlib.pyplot as plt
%matplotlib inline

with open("config.yaml") as stream:
    try:
        config = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

sr = config['mic']['sample_rate']

In [None]:
# Initialize Dataset
dataset = SensorDataset("datasets/BrushlessMotor/train/parquet/mic/")
dataloader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=True, num_workers=2)

In [None]:
for batch_idx, data in enumerate(dataloader):
    print(data.shape)
    break

print(data[0])

In [None]:
mel_spectrogram = librosa.feature.melspectrogram(y=data[0].numpy(), sr=sr)
mel_log_spectrogram = librosa.power_to_db(mel_spectrogram)

plt.figure()
librosa.display.specshow(mel_log_spectrogram, x_axis='time', y_axis='mel', sr=sr, cmap='plasma')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel-frequency spectrogram')
plt.show()

In [None]:
from transformers import PerceiverFeatureExtractor, PerceiverForImageClassificationLearned
import requests
from PIL import Image

feature_extractor = PerceiverFeatureExtractor.from_pretrained("deepmind/vision-perceiver-learned")
model = PerceiverForImageClassificationLearned.from_pretrained("deepmind/vision-perceiver-learned").cuda()

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
display(image)

# prepare input
encoding = feature_extractor(image, return_tensors="pt")
inputs = encoding.pixel_values.cuda()
# forward pass
outputs = model(inputs)
logits = outputs.logits
print("Predicted class:", model.config.id2label[logits.argmax(-1).item()])
