# Import, and analyze audio files

### Before you extract features from your music collection, make sure that all files
- are stereo files
- have a sample rate of 44100
- are normalized to a peak value of 0 dB

### Manually set the "path" to your mp3-/wav-/aiff-/m4a-collection and then run all cells

In [10]:
#pip install numpy

In [8]:
import os
import numpy as np
from matplotlib import pyplot as plt #for data analysis and visualization
import IPython.display as ipd
import librosa, librosa.display #to analyse audio files
import pandas as pd #for data analysis and visualization
import math
import ffmpeg #for audio file conversion
import pydub #for audio file conversion
import csv #writing of csv files
import sklearn #for data analysis and visualization
import sounddevice as sd#to play back sounds
from pydub import AudioSegment
from scipy import signal#for fast correlation (using fast convolution)
from scipy.signal import butter, lfilter, freqz, sosfreqz, sosfilt #for butter bandworth filter

#defining the bandpass filter
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    sos = butter(order, [low, high], btype='band',output='sos')
    return sos

def butter_bandpass_filter(data, lowcut, highcut, sr, order=5):
    sos = butter_bandpass(lowcut, highcut, sr, order=order)
    y = sosfilt(sos, data)
    return y

In [9]:
# Extraction  of all recording studio features
def recordingstudio(cut, fs):
    bc = []
    peaks = []
    rmss = []
    crests = []
    corr=[]
    line=[]
    #klasse=[]
    for frame in range(int(len(cut[0])/fs)):
        box = [[0,0] for i in range(fs)]
        left = [0 for i in range(fs)]
        right = left
        for i in range(fs):
            left[i]=cut[0][frame*fs+i]
            right[i]=cut[1][frame*fs+i]
            box[i]=[int(math.floor(left[i]*20)),int(math.floor(right[i]*20))]
            sq = left[i]**2
        peak=np.max(np.abs(left))
        if peak < 0.000001:
            peak = 0.000001
        #print(peak)
        corr.append(np.min(np.corrcoef(cut[0][frame*fs:frame*fs+fs],cut[1][frame*fs:frame*fs+fs])))
        rms=np.sqrt(np.mean(sq))#0 to 1
        if rms < 0.0000002:
            rms = 0.0000002
        peakdB=20*math.log(peak,10)#-infty to 0
        rmsdB=20*math.log(rms,10)#0 to -infty
        crest=peakdB-rmsdB
        bc.append(len(np.unique(box))/400)#0.0025 to 1
        peaks.append(peakdB)
        rmss.append(rmsdB)
        crests.append(crest)#0 to 80.9
        line.append(frame)
        #klasse.append(1)
    print('all frames done')
    return line, bc, corr, peaks, rmss, crests#,class 
#print(type(line))

In [10]:
mp3list = []
path = r'C:\Users\Justin\Documents\MA\Files\YouTube\GER'#file path that contains all files to be analyzed
for root, dirs, files in os.walk(path):#loop to list all file paths
    for file in files:
        mp3list.append(os.path.join(root,file))
fs = 2048 #frame size about 50 ms
ss = fs #step size equals frame size (i.e., no overlap between frames)

In [11]:
#transposing of data

from typing import List, Any

def all_value(value: Any, arr: List[float]) -> bool:
    return all(map(lambda x: x==value, arr))

def transpose_array(arr: List[List[float]]) -> List[List[float]]:
    return list(map(list, zip(*arr)))


def strip_array(value: Any, arr: List[List[float]]) -> List[List[float]]:
    # delete empty rows
    arr = [row for row in arr if not all_value(value, row)]

    #transpose and delete empty columns
    arr = transpose_array(arr)
    arr = [col for col in arr if not all_value(value, col)]

    #transpose back
    arr = transpose_array(arr)
    return arr

In [12]:
#extraction of audio features
n=0
array, sr=librosa.load(mp3list[n],sr=44100,mono=False) #loading files
print(mp3list[n])
if np.max(array)>np.min(array)*-1: #normalizing audio
    array = array/np.max(array)
else:
    array = array/np.min(array)*-1
cut = strip_array(0, array) #cuts the audio into frames
cutter = int(np.floor(len(array[0])/fs))
print(cutter)
cut = [array[0][0:cutter*fs],array[1][0:cutter*fs]]
line, bc, corr, peak, rms, crest = recordingstudio(cut, fs) #extracting the audio features using the funtion recordingstudio
print('here')

#filtering the feature using bandpass and extracting for the three frequency bands
cutlow = [butter_bandpass_filter(cut[0],20, 150 ,sr, order=5),butter_bandpass_filter(cut[1],20, 150 ,sr, order=5)]
line, bclow, corrlow, peaklow, rmslow, crestlow = recordingstudio(cutlow, fs)
cutmid = [butter_bandpass_filter(cut[0],150, 2000 ,sr, order=5),butter_bandpass_filter(cut[1],150, 2000 ,sr, order=5)]
line, bcmid, corrmid, peakmid, rmsmid, crestmid = recordingstudio(cutmid, fs)
cuthigh = [butter_bandpass_filter(cut[0],2000, 10000 ,sr, order=5),butter_bandpass_filter(cut[1],2000, 10000 ,sr, order=5)]
line, bchigh, corrhigh, peakhigh, rmshigh, cresthigh = recordingstudio(cuthigh, fs)
print(mp3list[n])

#collects all extracted features
features=[np.transpose(line), np.transpose(bc), np.transpose(corr), np.transpose(peak), np.transpose(rms), np.transpose(crest), np.transpose(bclow), np.transpose(corrlow), np.transpose(peaklow), np.transpose(rmslow), np.transpose(crestlow), np.transpose(bcmid), np.transpose(corrmid), np.transpose(peakmid), np.transpose(rmsmid), np.transpose(crestmid), np.transpose(bchigh), np.transpose(corrhigh), np.transpose(peakhigh), np.transpose(rmshigh), np.transpose(cresthigh)] 
Details = ["", 'PhaseSpace', 'ChannelCorrelation', 'PeakMeter', 'RMS', 'CrestFactor','PhaseSpaceLow', 'ChannelCorrelationLow', 'PeakMeterLow','RMSLow','CrestFactorLow', 'PhaseSpaceMid', 'ChannelCorrelationMid', 'PeakMeterMid','RMSMid','CrestFactorMid', 'PhaseSpaceHigh', 'ChannelCorrelationHigh', 'PeakMeterHigh','RMSHigh','CrestFactorHigh']

#writes a csv file
with open(mp3list[n]+'.csv', 'w', newline='') as f:
    write = csv.writer(f) 
    write.writerow(Details) 
    write.writerows(np.transpose(features)) 

C:\Users\Justin\Documents\MA\Files\YouTube\GER\Chakuza_Solltenalleuntergehen_2007-05-18.wav
5265
all frames done
here
all frames done
all frames done
all frames done
C:\Users\Justin\Documents\MA\Files\YouTube\GER\Chakuza_Solltenalleuntergehen_2007-05-18.wav


In [13]:
#loops after the first extraction, same code as before
n=0
            #dur = pydub.utils.mediainfo(mp3list[n])["duration"]
array, sr=librosa.load(mp3list[n],sr=44100,mono=False)#,duration = math.floor(float(dur))
print(mp3list[n])
#normalization:
if np.max(array)>np.min(array)*-1:
    array = array/np.max(array)
else:
    array = array/np.min(array)*-1
cut = strip_array(0, array)
cutter = int(np.floor(len(array[0])/fs))
cut = [array[0][0:cutter*fs],array[1][0:cutter*fs]]
line, bc, corr, peak, rms, crest = recordingstudio(cut, fs)
print('here')
#line, bc, corr, peak, rms, crest, class = recordingstudio(cut, fs)
cutlow = [butter_bandpass_filter(cut[0],20, 150 ,sr, order=5),butter_bandpass_filter(cut[1],20, 150 ,sr, order=5)]
line, bclow, corrlow, peaklow, rmslow, crestlow = recordingstudio(cutlow, fs)
#line, bclow, corrlow, peaklow, rmslow, crestlow, klasse = recordingstudio(cutlow, fs)
cutmid = [butter_bandpass_filter(cut[0],150, 2000 ,sr, order=5),butter_bandpass_filter(cut[1],150, 2000 ,sr, order=5)]
line, bcmid, corrmid, peakmid, rmsmid, crestmid = recordingstudio(cutmid, fs)
#line, bcmid, corrmid, peakmid, rmsmid, crestmid, klasse = recordingstudio(cutmid, fs)
cuthigh = [butter_bandpass_filter(cut[0],2000, 10000 ,sr, order=5),butter_bandpass_filter(cut[1],2000, 10000 ,sr, order=5)]
line, bchigh, corrhigh, peakhigh, rmshigh, cresthigh = recordingstudio(cuthigh, fs)
#line, bchigh, corrhigh, peakhigh, rmshigh, cresthigh, klasse = recordingstudio(cuthigh, fs)
#print(mp3list[n])
features=[np.transpose(line), np.transpose(bc), np.transpose(corr), np.transpose(peak), np.transpose(rms), np.transpose(crest), np.transpose(bclow), np.transpose(corrlow), np.transpose(peaklow), np.transpose(rmslow), np.transpose(crestlow), np.transpose(bcmid), np.transpose(corrmid), np.transpose(peakmid), np.transpose(rmsmid), np.transpose(crestmid), np.transpose(bchigh), np.transpose(corrhigh), np.transpose(peakhigh), np.transpose(rmshigh), np.transpose(cresthigh)] 
#features=[np.transpose(line), np.transpose(bc), np.transpose(corr), np.transpose(peak), np.transpose(rms), np.transpose(crest), np.transpose(bclow), np.transpose(corrlow), np.transpose(peaklow), np.transpose(rmslow), np.transpose(crestlow), np.transpose(bcmid), np.transpose(corrmid), np.transpose(peakmid), np.transpose(rmsmid), np.transpose(crestmid), np.transpose(bchigh), np.transpose(corrhigh), np.transpose(peakhigh), np.transpose(rmshigh), np.transpose(cresthigh), np.transpose(klasse)] 
Details = ["", 'PhaseSpace', 'ChannelCorrelation', 'PeakMeter', 'RMS', 'CrestFactor','PhaseSpaceLow', 'ChannelCorrelationLow', 'PeakMeterLow','RMSLow','CrestFactorLow', 'PhaseSpaceMid', 'ChannelCorrelationMid', 'PeakMeterMid','RMSMid','CrestFactorMid', 'PhaseSpaceHigh', 'ChannelCorrelationHigh', 'PeakMeterHigh','RMSHigh','CrestFactorHigh']
#Details = ["", 'PhaseSpace', 'ChannelCorrelation', 'PeakMeter', 'RMS', 'CrestFactor','PhaseSpaceLow', 'ChannelCorrelationLow', 'PeakMeterLow','RMSLow','CrestFactorLow', 'PhaseSpaceMid', 'ChannelCorrelationMid', 'PeakMeterMid','RMSMid','CrestFactorMid', 'PhaseSpaceHigh', 'ChannelCorrelationHigh', 'PeakMeterHigh','RMSHigh','CrestFactorHigh', 'Class']
with open(mp3list[n]+'.csv', 'w', newline='') as f:
    write = csv.writer(f) 
    write.writerow(Details) 
    write.writerows(np.transpose(features)) 

C:\Users\Justin\Documents\MA\Files\YouTube\GER\Chakuza_Solltenalleuntergehen_2007-05-18.wav
all frames done
here
all frames done
all frames done
all frames done


In [14]:
#loops after the second extraction, same code as before

for n in range(len(mp3list)):
    while True:
        try:
            #dur = pydub.utils.mediainfo(mp3list[n])["duration"]
            array, sr=librosa.load(mp3list[n],sr=44100,mono=False)#,duration = math.floor(float(dur))
            print(mp3list[n])
            #normalization:
            if np.max(array)>np.min(array)*-1:
                array = array/np.max(array)
            else:
                array = array/np.min(array)*-1
            #cut to 30 seconds from the middle of the song:
            #cut = [array[0][math.floor(len(array[0])/2)-fs*323:math.floor(len(array[0])/2)+fs*323],array[1][math.floor(len(array[0])/2)-fs*323:math.floor(len(array[0])/2)+fs*323]]
            #minus = len(array[0]) % fs
            #array = [array[0][:len(array[0])-minus],array[1][:len(array[1])-minus]]
            cut = strip_array(0, array)
            cutter = int(np.floor(len(array[0])/fs))
            cut = [array[0][0:cutter*fs],array[1][0:cutter*fs]]
            #print('cut works')
            line, bc, corr, peak, rms, crest = recordingstudio(cut, fs)
            print('here')
            #line, bc, corr, peak, rms, crest, class = recordingstudio(cut, fs)
            cutlow = [butter_bandpass_filter(cut[0],20, 150 ,sr, order=5),butter_bandpass_filter(cut[1],20, 150 ,sr, order=5)]
            line, bclow, corrlow, peaklow, rmslow, crestlow = recordingstudio(cutlow, fs)
            #line, bclow, corrlow, peaklow, rmslow, crestlow, klasse = recordingstudio(cutlow, fs)
            cutmid = [butter_bandpass_filter(cut[0],150, 2000 ,sr, order=5),butter_bandpass_filter(cut[1],150, 2000 ,sr, order=5)]
            line, bcmid, corrmid, peakmid, rmsmid, crestmid = recordingstudio(cutmid, fs)
            #line, bcmid, corrmid, peakmid, rmsmid, crestmid, klasse = recordingstudio(cutmid, fs)
            cuthigh = [butter_bandpass_filter(cut[0],2000, 10000 ,sr, order=5),butter_bandpass_filter(cut[1],2000, 10000 ,sr, order=5)]
            line, bchigh, corrhigh, peakhigh, rmshigh, cresthigh = recordingstudio(cuthigh, fs)
            #line, bchigh, corrhigh, peakhigh, rmshigh, cresthigh, klasse = recordingstudio(cuthigh, fs)
            #print(mp3list[n])
            features=[np.transpose(line), np.transpose(bc), np.transpose(corr), np.transpose(peak), np.transpose(rms), np.transpose(crest), np.transpose(bclow), np.transpose(corrlow), np.transpose(peaklow), np.transpose(rmslow), np.transpose(crestlow), np.transpose(bcmid), np.transpose(corrmid), np.transpose(peakmid), np.transpose(rmsmid), np.transpose(crestmid), np.transpose(bchigh), np.transpose(corrhigh), np.transpose(peakhigh), np.transpose(rmshigh), np.transpose(cresthigh)] 
            #features=[np.transpose(line), np.transpose(bc), np.transpose(corr), np.transpose(peak), np.transpose(rms), np.transpose(crest), np.transpose(bclow), np.transpose(corrlow), np.transpose(peaklow), np.transpose(rmslow), np.transpose(crestlow), np.transpose(bcmid), np.transpose(corrmid), np.transpose(peakmid), np.transpose(rmsmid), np.transpose(crestmid), np.transpose(bchigh), np.transpose(corrhigh), np.transpose(peakhigh), np.transpose(rmshigh), np.transpose(cresthigh), np.transpose(klasse)] 
            Details = ["", 'PhaseSpace', 'ChannelCorrelation', 'PeakMeter', 'RMS', 'CrestFactor','PhaseSpaceLow', 'ChannelCorrelationLow', 'PeakMeterLow','RMSLow','CrestFactorLow', 'PhaseSpaceMid', 'ChannelCorrelationMid', 'PeakMeterMid','RMSMid','CrestFactorMid', 'PhaseSpaceHigh', 'ChannelCorrelationHigh', 'PeakMeterHigh','RMSHigh','CrestFactorHigh']
            #Details = ["", 'PhaseSpace', 'ChannelCorrelation', 'PeakMeter', 'RMS', 'CrestFactor','PhaseSpaceLow', 'ChannelCorrelationLow', 'PeakMeterLow','RMSLow','CrestFactorLow', 'PhaseSpaceMid', 'ChannelCorrelationMid', 'PeakMeterMid','RMSMid','CrestFactorMid', 'PhaseSpaceHigh', 'ChannelCorrelationHigh', 'PeakMeterHigh','RMSHigh','CrestFactorHigh', 'Class']
            with open(mp3list[n]+'.csv', 'w', newline='') as f:
                write = csv.writer(f) 
                write.writerow(Details) 
                write.writerows(np.transpose(features)) 
            break      
        except:
            print('shit!')
            break

C:\Users\Justin\Documents\MA\Files\YouTube\GER\Chakuza_Solltenalleuntergehen_2007-05-18.wav
all frames done
here
all frames done
all frames done
all frames done
C:\Users\Justin\Documents\MA\Files\YouTube\GER\Dendemann_312Minuten_2006-00-00.wav
all frames done
here
all frames done
all frames done
all frames done
C:\Users\Justin\Documents\MA\Files\YouTube\GER\Dendemann_CheckMalDieRhetorikAb_2006-00-00.wav
all frames done
here
all frames done
all frames done
all frames done
C:\Users\Justin\Documents\MA\Files\YouTube\GER\Dendemann_DasErsteMal_2006-00-00.wav
all frames done
here
all frames done
all frames done
all frames done
C:\Users\Justin\Documents\MA\Files\YouTube\GER\Dendemann_EndlichNichtschwimmer_2006-00-00.wav
all frames done
here
all frames done
all frames done
all frames done
C:\Users\Justin\Documents\MA\Files\YouTube\GER\Dendemann_ErSoIchSo_2006-00-00.wav
all frames done
here
all frames done
all frames done
all frames done
C:\Users\Justin\Documents\MA\Files\YouTube\GER\Dendemann

  c /= stddev[None, :]


all frames done
C:\Users\Justin\Documents\MA\Files\YouTube\GER\Dendemann_Inhalation_2006-00-00.wav
all frames done
here
all frames done
all frames done
all frames done
C:\Users\Justin\Documents\MA\Files\YouTube\GER\Dendemann_KommtZeitDrehtRad_2006-00-00.wav
all frames done
here
all frames done
all frames done
all frames done
C:\Users\Justin\Documents\MA\Files\YouTube\GER\Dendemann_Lalalabernich_2006-00-00.wav
all frames done
here
all frames done
all frames done
all frames done
C:\Users\Justin\Documents\MA\Files\YouTube\GER\Dendemann_MetapherThanLeather_2010-04-09.wav
all frames done
here
all frames done
all frames done
all frames done
C:\Users\Justin\Documents\MA\Files\YouTube\GER\Dendemann_Nesthocker_2010-04-09.wav
all frames done
here
all frames done
all frames done
all frames done
C:\Users\Justin\Documents\MA\Files\YouTube\GER\Dendemann_ORobota_2010_04-09.wav
all frames done
here
all frames done
all frames done
all frames done
C:\Users\Justin\Documents\MA\Files\YouTube\GER\Dendemann