In [1]:
import librosa as rosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from warnings import warn
import pickle
import os
import IPython.display as ipd

# Stałe

In [2]:
dataDirPath="../data/"
#namesMainParts=["1"]
namesMainParts=["1","2","3"]
audioFilePrefix="audio"
audioFileSufix=".wav"
labelFilePrefix="labels"
labelFileSufix=".txt"

In [3]:
subfragmentsDir="../subfragments/"
subfragmentLengthInSeconds=1
# ratio used to calculate muber of subfragments to generate from fragment
# if L is length of fragment in second, S length of subfragment in seconds
# then from this subfragment will be generated L/S*subfragToFragLengthRatio+1 subfragments
subfragToFragLengthRatio=0.5

In [4]:
typeList=["m","s","j","sm","pc"]

In [5]:
os.makedirs(subfragmentsDir, exist_ok=True)
for typ in typeList:
    os.makedirs(subfragmentsDir+typ, exist_ok=True)

In [15]:
datasetDir="../dataset/"
os.makedirs(datasetDir, exist_ok=True)

# Pierwsze zabawy z librosa

In [39]:
%%time
audio1, sr=rosa.load("../data/audio1.wav")

CPU times: user 6.93 s, sys: 4.99 s, total: 11.9 s
Wall time: 18.4 s


In [5]:
audio1

array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 7.6293945e-05,
       6.1035156e-05, 4.5776367e-05], dtype=float32)

In [45]:
%%time
audioFFT=rosa.stft(audio1[:22050*1])

CPU times: user 2.4 ms, sys: 2.95 ms, total: 5.35 ms
Wall time: 4.22 ms


In [46]:
audioFFT.shape

(1025, 44)

# Parsowanie etykiet

In [6]:
def parseLabelFile(pathToFile):
    startsOfFragments={"m":[], "s":[], "j":[], "sm":[], "pc":[]}
    with open(pathToFile, "r") as labelFile:
        labelFile.readline()
        for line in labelFile:
            line=line[:-1]
            listedLine=line.split(",")
            if listedLine[-1]=="":
                listedLine=listedLine[:-1]
            timestamp=listedLine[0].split(":")
            timeInSeconds=int(timestamp[-1])+int(timestamp[-2])*60+int(timestamp[-3])*3600
            if listedLine[1]=="silence":
                continue
            elif listedLine[1]=="music":
                if len(listedLine)>2:
                    if listedLine[2]=="jingle":
                        startsOfFragments["j"].append(timeInSeconds)
                        continue
                    else:
                        warn(RuntimeWarning("Second argument isn't recognized: "+listedLine[2]))
                startsOfFragments["m"].append(timeInSeconds)
            elif listedLine[1]=="speech":
                if len(listedLine)>2:
                    if listedLine[2]=="musicinthebackground":
                        startsOfFragments["sm"].append(timeInSeconds)
                        continue
                    elif listedLine[2]=="phonecall":
                        startsOfFragments["pc"].append(timeInSeconds)
                        continue
                    else:
                        warn(RuntimeWarning("Second argument isn't recognized: "+listedLine[2]))
                startsOfFragments['s'].append(timeInSeconds)
            else:
                warn(RuntimeWarning("Type argument isn't recognized: "+listedLine[1]))
    return startsOfFragments

In [7]:
class AudioFragment:
    def __init__(self, typ, audio, startPointInSec, fileName,sr):
        self.typ=typ
        self.audio=audio
        self.startPoint=startPointInSec
        self.fileName=fileName
        self.sr=sr
    def drawSubfragment(self, lengthInSeconds):
        lengthInProbes=self.sr*lengthInSeconds
        if len(self.audio)<lengthInProbes:
            raise RuntimeError("Fragment is to short to generate requested length.")
        index=np.random.randint(0,len(self.audio)-lengthInProbes+1)
        return self.audio[index:index+lengthInProbes].copy()
    def getLengthInSeconds(self):
        return len(self.audio)/self.sr

In [8]:
class SubfragmentIndex:
    def __init__(self):
        self.numberOfSubfragments=0
        self.subfragmentsByType={"m":[], "s":[], "j":[], "sm":[], "pc":[]}
    def getNextNumberOfSubfragment(self):
        return self.numberOfSubfragments+1
    def addNewSubfragmentToIndex(self, subfragmentPath, subfragmentTyp):
        self.numberOfSubfragments+=1
        self.subfragmentsByType[subfragmentTyp].append(subfragmentPath)
        return
    def drawRandomSubfragment(self):
        r=np.random.randint(0, self.numberOfSubfragments)
        for typ in self.subfragmentsByType:
            if r>len(self.subfragmentsByType[typ]):
                r-=len(self.subfragmentsByType[typ])
            else:
                return self.subfragmentsByType[typ][r]
        raise RuntimeError("Coś poszło bardzo nie tak.")
    def drawRandomByType(self, typ):
        r=np.random.randint(0, len(self.subfragmentsByType[typ]))
        return self.subfragmentsByType[typ][r]

In [9]:
def generateSubfragments():
    subfInd=SubfragmentIndex()
    for name in namesMainParts:
        audioFilePath=dataDirPath+audioFilePrefix+name+audioFileSufix
        labelFilePath=dataDirPath+labelFilePrefix+name+labelFileSufix
        audio, sr=rosa.load(audioFilePath)
        if (sr!=22050):
            warn("Sapling rate of file: "+audioFilePath+" is diffrent than 22050.")
        startOfFragmentsDict=parseLabelFile(labelFilePath)

        sortedListOfStarts=[]
        for typ in startOfFragmentsDict:
            sortedListOfStarts.extend(startOfFragmentsDict[typ])
        sortedListOfStarts.sort()

        for typ in startOfFragmentsDict:
            for start in startOfFragmentsDict[typ]:
                index=sortedListOfStarts.index(start)
                if index+1==len(sortedListOfStarts):
                    end=int(len(audio)/sr)
                else:
                    end=sortedListOfStarts[index+1]
                af=(AudioFragment(typ, audio[start*sr:end*sr], start, audioFilePath, sr))
                numberOfSubfragmentsToGenerate=int(af.getLengthInSeconds()/subfragmentLengthInSeconds*\
                    subfragToFragLengthRatio+1)
                try:
                    for i in range(numberOfSubfragmentsToGenerate):
                        subfragment=af.drawSubfragment(subfragmentLengthInSeconds)
                        subfragmentFFT=np.abs(rosa.stft(subfragment, hop_length=1024))
                        picklePath=subfragmentsDir+typ+"/"+str(subfInd.getNextNumberOfSubfragment())+".pickle"
                        with open(picklePath,"wb") as pickleFile:
                            pickle.dump(subfragmentFFT, pickleFile)
                        subfInd.addNewSubfragmentToIndex(picklePath, typ)
                except RuntimeError:
                    pass
    return subfInd

In [10]:
SI=generateSubfragments()

# Creating test sets

In [13]:
def createTestSet(subfragIndex, testSetSize, typeWeights, savePath):
    for typ in typeWeights:
        expectedNumber=typeWeights[typ]*testSetSize
        if len(subfragIndex.subfragmentsByType[typ])<expectedNumber:
            warn("There is less subfragments for type: "+typ+" than expected number of samples in dataset.")
    randomTypes=np.random.choice(len(typeWeights.keys()),size=testSetSize, p=list(typeWeights.values()))
    keys=list(typeWeights.keys())
    
    dataList=[]
    for randomTyp in randomTypes:
        typ=keys[randomTyp]
        randSubfPath=subfragIndex.drawRandomByType(typ)
        with open(randSubfPath, "rb") as pickleFile:
            dataList.append(pickle.load(pickleFile))
    with open(savePath, "wb") as pickleFile:
        pickle.dump(dataList, pickleFile)

In [20]:
def generateTestsSets(numberOfSets, sizes):
    for s in sizes:
        for i in range(numberOfSets):
            createTestSet(SI, s, {"m":0.5, "s":0.25, "j":0, "sm":0.125, "pc":0.125}, 
                          datasetDir+"dataset{}-{}.pickle".format(s,i))

In [None]:
generateTestsSets(3, [50, 100, 500])

In [22]:
generateTestsSets(3, [1000])