In [147]:
import numpy as np
from scipy import signal
from scipy.io import wavfile
from scipy.spatial import distance
import matplotlib
import matplotlib.pyplot as plt
import os
from pydub import AudioSegment
import pandas as pd
import numpy as np

In [148]:
def log(text):
    print(str(text))

In [149]:
def spectrogram(pathfile):
    """read a wav file and return its spectrogram"""
    if not pathfile.endswith(".wav"):
        log("audio file must be in wav format")
    else:
        framerate, series = wavfile.read(pathfile)
        log("wav file processed")
        # series[:,0] -> left channel
        # series[:,1] -> right channel
        # take mean to get one-channel series
        series = np.mean(series, axis=1)
        log("series converted to one-channel")

        f, t, spect = signal.spectrogram(
            series,
            fs=framerate,
            nperseg=10*framerate,
            noverlap=(10-1)*framerate,
            window="hamming"
        )
        log("spectrogram computed")

        return framerate, f, t, spect
    
def fingerprint(f, spect):
    """compute fingerprint (ver.1) from spectrogram

    Option 4 in the instruction:
    find a list of (positive) frequencies f (scaled to [0, 1])
    at which the local periodogram has a peak
    """
    max_f = max(f)
    peaks = np.argmax(spect, axis=0)
    fingerprints = f[peaks] / max_f

    log("fingerprint (ver.1) computed")

    return np.array(fingerprints)

def fingerprint2(f, spect, framerate):
    """compute fingerprint (ver.2) from spectrogram

    Option 5 in the instruction:
    find the maximum power per octave in local periodograms
    """
    # m = number of octaves
    # must have m>5 to cover middleC
    # larger m -> better precision
    m = 8
    min_f = int((2**-(m+1))*(framerate/2))
    fingerprints = []

    log("start to iterate through the spectrogram")

    # iterate through all octaves
    for k in range(m):
        start = min_f*(2**k)*10
        end = min_f*(2**(k+1))*10
        # take subset of spectrogram, slice each octave
        sub_f = f[start:end]
        sub_spect = spect[start:end]
        # compute fingerprint of each subset
        sub_fingerprint = fingerprint(sub_f, sub_spect)
        fingerprints.append(sub_fingerprint)
    # transpose to get fingerprint for each window
    fingerprints = np.array(fingerprints).T

    log("fingerprint (ver.2) computed")

    return fingerprints

def match(f1, f2):
    """compare two fingerprints (ver.1) and see if they match

    Params
    + f1 (num) - fingerprint stored in database, duration=10s
    + f2 (num) - fingerprint of a snippet, duration>=10s

    Return
    + boolean, True if match, False otherwise
    """
    # small tolerance -> better precision
    tolerance = 10**(-100)
    dist = (f1-f2)**2

    return dist < tolerance

In [150]:
def select_max_song_id(df):
    """ Select the maximum song_id (to loop over all songs). """
    if 'filename' in df:
        max_song_id = df.index.max() + 1
    else:
        max_song_id = 0
    return max_song_id

def select_fingerprint1(df, filename):
    """ Select all fingerprints (ver.1) of a song. """
    if 'filename' in df:
        fingerprints1 = df.loc[df['filename'] == filename, 'fingerprints1'].tolist()
        return fingerprints1
    else:
        return None

def select_fingerprint2(df, filename):
    """ Select all fingerprints (ver.2) of a song. """
    if 'filename' in df:
        fingerprints2 = df.loc[df['filename'] == filename, 'fingerprints2'].tolist()
        return fingerprints2
    else:
        return None

def list_all_songs(df):
    """ List all song titles in DataFrame. """
    if 'filename' in df:
        song_filenames = df['filename'].tolist()
        return song_filenames
    else:
        return []
    
def select_title(df, filename):
    """ Select song title by filename. """
    if 'filename' in df and 'title' in df:
        title = df.loc[df['filename'] == filename, 'title'].iloc[0]
        return title
    else:
        return None

In [151]:
def identify1(pathfile, df):
    """ Identify a snippet (wav) with fingerprint ver.1 """
    # Compute spectrogram and fingerprints
    framerate, f, t, spect = spectrogram(pathfile)
    fingerprints1 = fingerprint(f, spect)
    fingerprints2 = fingerprint2(f, spect, framerate)
    log('Spectrogram and fingerprints computed.')
    
    # Create a dictionary to hold song information
    song_info = {
        'filename': pathfile,
        'framerate': framerate,
        'spectrogram': spect,
        'fingerprints1': fingerprints1,
        'fingerprints2': fingerprints2
    }
    
    # Calculate similarity with each song in the DataFrame
    similarities = []
    for index, row in df.iterrows():
        similarity_score = compare_fingerprints(fingerprints1, row['fingerprints1'])
        similarities.append((row['filename'], similarity_score))
    
    # Find the song with the highest similarity
    best_match = max(similarities, key=lambda x: x[1])
    best_match_filename = best_match[0]
    similarity_score = best_match[1]
    
    return best_match_filename, similarity_score

def compare_fingerprints(fp1, fp2):
    """ Compare two sets of fingerprints and return similarity score """
    # Pad the shorter fingerprint with zeros to match the length of the longer one
    max_length = max(len(fp1), len(fp2))
    fp1_padded = np.pad(fp1, (0, max_length - len(fp1)))
    fp2_padded = np.pad(fp2, (0, max_length - len(fp2)))
    
    # Calculate similarity score using dot product and normalization
    similarity_score = np.dot(fp1_padded, fp2_padded) / (np.linalg.norm(fp1_padded) * np.linalg.norm(fp2_padded))
    return similarity_score



In [152]:
def convert(infile):
    """convert mp3 to wav
    Param: infile(str): a mp3 file, like "music.mp3"
    Export: outfile: a wav file with the same name, like "music.wav"
    """
    try:
        # format outfile name
        filename = os.path.basename(infile)
        outfile = "music/" + filename[:-3] + "wav"
        # export wav
        sound = AudioSegment.from_mp3(infile)
        sound.export(outfile, format="wav")
    except OSError:
        log("expected an mp3 file in the directory")

## df

In [153]:
import os
import pandas as pd

def process_audio_files_to_df(folder_path):
    """Process all audio files in the specified folder and store data in a DataFrame."""
    data = []
    for file in os.listdir(folder_path):
        if file.endswith(".mp3"):
            mp3_path = os.path.join(folder_path, file)
            # Convert mp3 to wav
            convert(mp3_path)
            log('Audio converted to wav.')
            # Удаляем mp3-версию файла после конвертации в wav
            os.remove(mp3_path)

    for file in os.listdir(folder_path):
        if file.endswith(".wav"):
            # Read the wav from local directory
            pathwav = os.path.join(folder_path, file)
            
            # Compute spectrogram and fingerprints
            framerate, f, t, spect = spectrogram(pathwav)
            fingerprints1 = fingerprint(f, spect)
            fingerprints2 = fingerprint2(f, spect, framerate)
            log('Spectrogram and fingerprints computed.')
            
            # Add song info to the data list
            data.append({
                'filename': file,
                'framerate': framerate,
                'spectrogram': spect,
                'fingerprints1': fingerprints1,
                'fingerprints2': fingerprints2
            })
    
    # Convert data list to DataFrame
    df = pd.DataFrame(data)
    return df

# Путь к папке с аудиофайлами
folder_path = "music"

# Обработка всех аудиофайлов в папке и создание DataFrame с звуковыми отпечатками
song_df = process_audio_files_to_df(folder_path)
print(song_df)


wav file processed
series converted to one-channel
spectrogram computed
fingerprint (ver.1) computed
start to iterate through the spectrogram
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.2) computed
Spectrogram and fingerprints computed.
wav file processed
series converted to one-channel
spectrogram computed
fingerprint (ver.1) computed
start to iterate through the spectrogram
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.2) computed
Spectrogram and fingerprints computed.
wav file processed
series converted to one-channel
spectrogram computed
fingerprint (ver.1) computed
start to iterat

In [155]:
def identify_audio_file(folder_path, df):
    """Identify audio file in the specified directory and pass its path to the identification function."""
    files = os.listdir(folder_path)
    
    # Ensure there is exactly one file in the directory
    if len(files) == 1:
        filepath = os.path.join(folder_path, files[0])
        
        # Check if the file is an mp3
        if filepath.endswith(".mp3"):
            # Convert mp3 to wav
            try:
                # Format output file name
                filename = os.path.basename(filepath)
                outfile = os.path.join(folder_path, filename[:-3] + "wav")
                # Export wav
                sound = AudioSegment.from_mp3(filepath)
                sound.export(outfile, format="wav")
                log('Audio converted to wav.')
                # Remove the mp3 version of the file after conversion to wav
                os.remove(filepath)
                filepath = outfile  # Update filepath to point to the wav file
            except OSError:
                log("Expected an mp3 file in the directory.")
        elif not filepath.endswith(".wav"):
            log("File format not supported. Supported formats: mp3, wav.")
            return None
        
        # Pass the filepath to the identification function
        if filepath:
            return identify1(filepath, df)
    else:
        print("There should be exactly one file in the directory.")
        return None

# Usage
folder_path = 'test_music'
print(identify_audio_file(folder_path, song_df))

Audio converted to wav.
wav file processed
series converted to one-channel
spectrogram computed
fingerprint (ver.1) computed
start to iterate through the spectrogram
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.1) computed
fingerprint (ver.2) computed
Spectrogram and fingerprints computed.
('Apologize.wav', 0.9473537393642835)
