<a href="https://colab.research.google.com/github/singhmihir8/MusicDetection/blob/main/Untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -Uqq fastai

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torchaudio
from fastai.vision.all import *

In [7]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import os

# Define the input folder containing the nested folders of images
input_folder = '/content/drive/MyDrive/mihirs music images'

# Define the custom dataset class
class CustomDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.transform = transform
        self.file_list = self._get_file_list()
        self.classes = self._get_classes()

    def _get_file_list(self):
        file_list = []
        for root, dirs, files in os.walk(self.folder_path):
            for file in files:
                file_list.append(os.path.join(root, file))
        return file_list
    
    def _get_classes(self):
        classes = set()
        for file in self.file_list:
            label = file.split('/')[-2]
            classes.add(label)
        return list(classes)

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        image_path = self.file_list[index]
        image = Image.open(image_path).convert('RGB')

        if self.transform is not None:
            image = self.transform(image)

        label = image_path.split('/')[-2]  # Extract label from folder name
        label_tensor = torch.tensor(self.classes.index(label))  # Convert label to tensor

        return image, label_tensor

# Define the data transformations
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Create the custom dataset
dataset = CustomDataset(input_folder, transform=data_transforms)

# Create the data loader
batch_size = 8
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Load a pre-trained model (e.g., ResNet)
model = models.resnet18(pretrained=True)

# Modify the last layer to match the number of classes in your dataset
num_classes = len(dataset.classes)
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

for epoch in range(num_epochs):
    running_loss = 0.0
    for images, labels in dataloader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    epoch_loss = running_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f}")

print("Training completed.")




Epoch 1/100 - Loss: 0.9762
Epoch 2/100 - Loss: 0.5949
Epoch 3/100 - Loss: 0.3515
Epoch 4/100 - Loss: 0.5923
Epoch 5/100 - Loss: 0.4502
Epoch 6/100 - Loss: 0.3695
Epoch 7/100 - Loss: 0.4325
Epoch 8/100 - Loss: 0.2230
Epoch 9/100 - Loss: 0.2229
Epoch 10/100 - Loss: 0.1385
Epoch 11/100 - Loss: 0.1608
Epoch 12/100 - Loss: 0.2002
Epoch 13/100 - Loss: 0.1029
Epoch 14/100 - Loss: 0.0813
Epoch 15/100 - Loss: 0.0877
Epoch 16/100 - Loss: 0.1499
Epoch 17/100 - Loss: 0.2297
Epoch 18/100 - Loss: 0.3700
Epoch 19/100 - Loss: 0.3926
Epoch 20/100 - Loss: 0.1197
Epoch 21/100 - Loss: 0.0977
Epoch 22/100 - Loss: 0.0582
Epoch 23/100 - Loss: 0.1230
Epoch 24/100 - Loss: 0.1558
Epoch 25/100 - Loss: 0.1639
Epoch 26/100 - Loss: 0.0313
Epoch 27/100 - Loss: 0.0496
Epoch 28/100 - Loss: 0.0184
Epoch 29/100 - Loss: 0.0198
Epoch 30/100 - Loss: 0.0100
Epoch 31/100 - Loss: 0.0923
Epoch 32/100 - Loss: 0.0719
Epoch 33/100 - Loss: 0.0613
Epoch 34/100 - Loss: 0.0727
Epoch 35/100 - Loss: 0.0124
Epoch 36/100 - Loss: 0.0165
E

In [13]:
from torchvision.transforms.functional import normalize

# Define the function for testing a single image
def test_single_image(image_path, transform):
    # Load and preprocess the image
    image = Image.open(image_path).convert('RGB')
    image = transform(image)
    image = image.unsqueeze(0)  # Add a batch dimension

    # Move the image to the device
    image = image.to(device)

    # Set the model to evaluation mode
    model.eval()

    # Disable gradient calculation
    with torch.no_grad():
        # Forward pass
        output = model(image)

        # Get the predicted label
        _, predicted_label = torch.max(output.data, 1)

        # Get the predicted class name
        predicted_class = dataset.classes[predicted_label.item()]

    return predicted_class

# Test a single image
image_path = '/content/drive/MyDrive/mihirs music images/AI Images/spectrogram_Drake - She Knows.jpg'
test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
predicted_class = test_single_image(image_path, test_transforms)
print(f"Predicted class: {predicted_class}")


Predicted class: AI Images


In [6]:
!pip install -Uqq fastai
!pip install librosa matplotlib numpy
from fastai.vision.all import *
from fastai.vision.learner import cnn_learner
from fastai.vision.augment import Resize
import os
import glob
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

# Folder path containing subfolders with audio files
audio_folder_path = '/content/drive/MyDrive/MusicDetectionDatabase'

# New folder path for storing the spectrogram images
image_folder_path = '/content/drive/MyDrive/mihirs music images'

# Create subfolders for storing AI and Real spectrogram images
ai_folder = os.path.join(image_folder_path, 'AI Images')
real_folder = os.path.join(image_folder_path, 'Real Images')

os.makedirs(ai_folder, exist_ok=True)
os.makedirs(real_folder, exist_ok=True)

# Parameters
image_size = (224, 224)  # Size of the resized spectrogram image

# Helper function to resize the image
def resize_image(image, size):
    pil_image = Image.fromarray(image)
    resized_image = pil_image.resize(size)
    return np.array(resized_image)

# Iterate through subfolders in the audio folder
for subfolder in os.listdir(audio_folder_path):
    subfolder_path = os.path.join(audio_folder_path, subfolder)
    
    if os.path.isdir(subfolder_path):
        # Determine the target folder for storing spectrogram images
        if subfolder == 'AI Music':
            target_folder = ai_folder
        elif subfolder == 'Real Music':
            target_folder = real_folder
        else:
            continue  # Skip subfolders not labeled as AI Music or Real Music
        
        # Process audio files in the subfolder
        audio_files = glob.glob(os.path.join(subfolder_path, '*.mp3'))
        
        for audio_file in audio_files:
            # Load the audio file
            audio, sr = librosa.load(audio_file, duration=30.0)  # Specify duration of 30 seconds

            # Create a spectrogram image
            D = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
            plt.figure(figsize=(10, 4))
            librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
            plt.colorbar(format='%+2.0f dB')
            plt.title('Spectrogram - {}'.format(audio_file))
            
            # Extract the audio file name without the extension
            audio_filename = os.path.splitext(os.path.basename(audio_file))[0]
            
            # Save the spectrogram image directly in the target folder
            spectrogram_save_path = os.path.join(target_folder, 'spectrogram_{}.jpg'.format(audio_filename))
            plt.savefig(spectrogram_save_path)
            plt.close()

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


  D = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
  D = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
  D = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
  D = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
  D = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
  D = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
  D = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
  D = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
  D = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
  D = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
  D = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
  D = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
  D = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
  D = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
  D = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
  D = librosa.amplitude_to_db(librosa.stft(audio), ref=