In [1]:
import librosa
import opensmile
import numpy as np
import pandas as pd

'''
function block
'''

def match_audio_length(path_1:str,path_2:str)->tuple:
    y1,sr1 = librosa.load(path_1)
    y2,sr2 = librosa.load(path_2)
    y2 = librosa.effects.time_stretch(
        y2,
        (y2.shape[0]/y1.shape[0])
    )
    return y1,y2

def get_df(signal_1,signal_2):
    smile = opensmile.Smile(
        feature_set=opensmile.FeatureSet.eGeMAPSv02,
        feature_level=opensmile.FeatureLevel.LowLevelDescriptors
    )
    df1 = smile.process_signal(signal_1,sampling_rate=22050)
    df2 = smile.process_signal(signal_2,sampling_rate=22050)
    return df1,df2

def cos_similarity(df1,df2):
    csl = np.empty((df1.shape[0],))
    for i,(vec1,vec2) in enumerate(zip(df1.values,df2.values)):
        csl[i] = np.dot(vec1,vec2)/((np.linalg.norm(vec1)*np.linalg.norm(vec2))+1e-9)
    return np.average(csl)




In [2]:
import plotly.express as px
import plotly.graph_objects as go

# initialized smile object with feature set
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.LowLevelDescriptors
)

#y holds features of v1_01 to v5_01
y = []
for i in range(1,5):
    df = smile.process_file(f'./v{i}/v{i}_01.mp3')
    y.append(df)

DURATION = 0.02 #duration of each window
max_time = max([df.shape[0] for df in y])
time_list = []
for j in range(max_time):
    time_list.append(f"{j/100}")

#building figure
fig = go.Figure()
for j in range(len(y)):
    fig.add_trace(
        go.Scatter(
            x = time_list,
            y = y[j]['Loudness_sma3'],
            mode = 'lines',
            name = f'Loudness of Sample {j+1}'
        )
    )
fig.update_layout(
    height=600,
    width=1000,
)

fig.show()

In [3]:
y1,y2 = match_audio_length('./v1/v1_01.mp3','./v2/v2_01.mp3')
df1,df2 = get_df(y1,y2)

time = df1.shape[0]
time_axis = []
for j in range(time):
    time_axis.append(f"{j/100}")


fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x = time_axis,
        y = df1['F1frequency_sma3nz'],
        mode = 'lines'
    )
)
fig.add_trace(
    go.Scatter(
        x = time_axis,
        y = df2['F1frequency_sma3nz'],
        mode = 'lines'
    )
)


PySoundFile failed. Trying audioread instead.


PySoundFile failed. Trying audioread instead.



In [4]:
cos_similarity(df1.transpose(),df2.transpose())

0.6178860244699764