In [6]:
# !gcloud auth application-default login

In [4]:
import os
from google.cloud import storage

# Initialize GCS client
storage_client = storage.Client()

# Update paths to GCS bucket paths
BUCKET_NAME = "music-caps"
SUNOCAPS_PATH = "FakeMusicCaps/SunoCaps"
DATASET_PATH = "FakeMusicCaps"

In [7]:
# Get a bucket
bucket = storage_client.bucket(BUCKET_NAME)

# Helper function to list files in GCS
def list_gcs_files(prefix):
    blobs = bucket.list_blobs(prefix=prefix)
    return [blob.name for blob in blobs]

# List available models/folders
print("Listing contents of bucket...")
models_names = ['MusicCaps', 'MusicGen_medium', 'musicldm', 'audioldm2', 'stable_audio_open', 'mustango']

# Print structure of data
for name in models_names:
    print(f"\nFiles in {name}:")
    model_files = list_gcs_files(f"{DATASET_PATH}/{name}")
    # print first 5 files as example
    for file_path in model_files[:5]:
        print(f"  - {file_path}")


Listing contents of bucket...

Files in MusicCaps:
  - FakeMusicCaps/MusicCaps/-0Gj8-vB1q4.wav
  - FakeMusicCaps/MusicCaps/-0SdAVK79lg.wav
  - FakeMusicCaps/MusicCaps/-0vPFx-wRRI.wav
  - FakeMusicCaps/MusicCaps/-0xzrMun0Rs.wav
  - FakeMusicCaps/MusicCaps/-1LrH01Ei1w.wav

Files in MusicGen_medium:
  - FakeMusicCaps/MusicGen_medium/-0Gj8-vB1q4.wav
  - FakeMusicCaps/MusicGen_medium/-0SdAVK79lg.wav
  - FakeMusicCaps/MusicGen_medium/-0vPFx-wRRI.wav
  - FakeMusicCaps/MusicGen_medium/-0xzrMun0Rs.wav
  - FakeMusicCaps/MusicGen_medium/-1LrH01Ei1w.wav

Files in musicldm:
  - FakeMusicCaps/musicldm/-0Gj8-vB1q4.wav
  - FakeMusicCaps/musicldm/-0SdAVK79lg.wav
  - FakeMusicCaps/musicldm/-0vPFx-wRRI.wav
  - FakeMusicCaps/musicldm/-0xzrMun0Rs.wav
  - FakeMusicCaps/musicldm/-1LrH01Ei1w.wav

Files in audioldm2:
  - FakeMusicCaps/audioldm2/-0Gj8-vB1q4.wav
  - FakeMusicCaps/audioldm2/-0SdAVK79lg.wav
  - FakeMusicCaps/audioldm2/-0vPFx-wRRI.wav
  - FakeMusicCaps/audioldm2/-0xzrMun0Rs.wav
  - FakeMusicCaps/au

In [None]:
import io
import torch
import torchaudio
import matplotlib.pyplot as plt
import numpy as np
from google.cloud import storage

# Specific file path
file_path = "FakeMusicCaps/MusicGen_medium/-0Gj8-vB1q4.wav"
BUCKET_NAME = "music-caps"

def visualize_gcs_audio(bucket_name, file_path):
    # Get the blob
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(file_path)
    
    # Download the audio data
    audio_bytes = blob.download_as_bytes()
    
    # Load audio with torchaudio
    audio_tensor, sample_rate = torchaudio.load(io.BytesIO(audio_bytes))
    # Convert to numpy for plotting
    audio_data = audio_tensor.numpy().squeeze()
    
    # Plot waveform
    plt.figure(figsize=(15, 5))
    plt.plot(audio_data)
    plt.title(f"Waveform: {file_path.split('/')[-1]}")
    plt.xlabel("Sample")
    plt.ylabel("Amplitude")
    plt.show()
    
    # Print audio info
    print(f"Sample rate: {sample_rate} Hz")
    print(f"Duration: {len(audio_data)/sample_rate:.2f} seconds")
    print(f"Max amplitude: {np.max(np.abs(audio_data)):.2f}")

# Visualize the specific file
visualize_gcs_audio(BUCKET_NAME, file_path)

ModuleNotFoundError: No module named 'torch'