In [1]:
!pip install torch torchaudio



In [2]:
import os
import torch
import glob
import torchaudio
import torch.nn as nn

In [3]:
from network_utils import *

In [4]:
#define model
# FXencoder that extracts audio effects from music recordings trained with a contrastive objective
class FXencoder(nn.Module):
    def __init__(self, 
                channels= [16, 32, 64, 128, 256, 256, 512, 512, 1024, 1024, 2048, 2048],
                kernels =  [25, 25, 15, 15, 10, 10, 10, 10, 5, 5, 5, 5],
                strides =  [4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1],
                dilation = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                bias =  True,
                norm = 'batch',
                conv_block= 'res',
                activation= "relu"):
        super(FXencoder, self).__init__()
        # input is stereo channeled audio
        channels.insert(0, 2)

        # encoder layers
        encoder = []
        for i in range(len(kernels)):
            if conv_block=='res':
                encoder.append(Res_ConvBlock(dimension=1, \
                                                in_channels=channels[i], \
                                                out_channels=channels[i+1], \
                                                kernel_size=kernels[i], \
                                                stride=strides[i], \
                                                padding="SAME", \
                                                dilation=dilation[i], \
                                                norm=norm, \
                                                activation=activation, \
                                                last_activation=activation))
            elif conv_block=='conv':
                encoder.append(ConvBlock(dimension=1, \
                                            layer_num=1, \
                                            in_channels=channels[i], \
                                            out_channels=channels[i+1], \
                                            kernel_size=kernels[i], \
                                            stride=strides[i], \
                                            padding="VALID", \
                                            dilation=dilation[i], \
                                            norm=norm, \
                                            activation=activation, \
                                            last_activation=activation, \
                                            mode='conv'))
        self.encoder = nn.Sequential(*encoder)

        # pooling method
        self.glob_pool = nn.AdaptiveAvgPool1d(1)

    # network forward operation
    def forward(self, input):
        enc_output = self.encoder(input)
        glob_pooled = self.glob_pool(enc_output).squeeze(-1)

        # outputs c feature
        return glob_pooled

In [5]:
checkpoint_path = '/Users/svanka/Codes/cambridge-mt_scrapper/fx_embeddings/FXencoder_ps.pt'

def reload_weights(self, ckpt_path, ddp=True):
    checkpoint = torch.load(ckpt_path, map_location=self.device)

    from collections import OrderedDict
    new_state_dict = OrderedDict()
    for k, v in checkpoint["model"].items():
        # remove `module.` if the model was trained with DDP
        name = k[7:] if ddp else k
        new_state_dict[name] = v

    # load params
    FXencoder.load_state_dict(new_state_dict)

    print(f"---reloaded checkpoint weights from {ckpt_path}---")



In [6]:
#load the checkpoint
checkpoint_path = '/Users/svanka/Codes/cambridge-mt_scrapper/fx_embeddings/FXencoder_ps.pt'
checkpoint = torch.load(checkpoint_path)
model = FXencoder()
from collections import OrderedDict
new_state_dict = OrderedDict()
for k, v in checkpoint["model"].items():
    # remove `module.` if the model was trained with DDP
    name = k[7:] if ddp else k
    new_state_dict[name] = v

# load params
self.models[cur_model_name].load_state_dict(new_state_dict)

    print(f"---reloaded checkpoint weights : {cur_model_name} ---")
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()





IndentationError: unexpected indent (581678629.py, line 15)

In [None]:
#load the audio file
audio_path = "/Users/svanka/Codes/cambridge-mt_scrapper/audio/6(rock_metal)_data"
songs = os.listdir(audio_path)
for song in songs:
    song_path = os.path.join(audio_path, song)
    if os.path.isdir(song_path):
        mp3_files = glob.glob(f"{song_path}/*.mp3")
        for mp3_file in mp3_files:
            sound = AudioSegment.from_mp3(mp3_file)
            wav_file = os.path.splitext(mp3_file)[0] + ".wav"
            sound.export(wav_file, format="wav")
            print(f"Converted {mp3_file} to {wav_file}")
    break

