In [1]:
import pymongo
import pandas as pd
from pandas import Series
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from bson.objectid import ObjectId
import pprint
import os

In [2]:
client = pymongo.MongoClient('localhost',32768) #connection to MongoDB instance
db = client.get_database('b=bandhub') #grab database
postsCol = db.get_collection('posts')
songsCol = db.get_collection('songsStream')
videosCol = db.get_collection('mergedVideos')
tracksCol = db.get_collection('tracksStream')
#grab collections

In [3]:
[postsCol.count(), songsCol.count(), tracksCol.count(), videosCol.count()]
#number of documents in each collection

[0, 0, 0, 0]

In [4]:
data = []
#list to hold data

cursor = songsCol.find({'access' : 1})
#grab public song collaborations

for songDoc in cursor:
#iterate through each song

    songId = songDoc['_id']
    #grab songId which is unique for each collaboration
    
    musicBrainzID = songDoc.get('musicbrainzMetadataId')
    newMusicBrainzID = songDoc.get('newMusicbrainzMetadataId')
    #music brainz IDs
    
    post = postsCol.find({'objectId' : songId})
    #grab the corresponding post document
    
    videoDocuments = videosCol.find({'songId': songId})
    #find the corresponding video documents. Note: there are multiple videos docs for the same collaboration
    #as instruments are added and tracks swapped out
    
    for postDoc in post:
    #iterate through corresponding post document and grab relevant information
        
        publishedTracks = postDoc['participantsInfo']['publishedTracks']
        trackList = []
        for track in publishedTracks:
            trackList.append(str(track['_id']))
        sortedTracks = sorted(trackList)
        #grab the array of published tracks for this collaboration and create a list to hold those tracks
        #the track list will be used to compare against trackIds in the video document to determine which
        #video document holds the final mix
        
        for videoDocs in videoDocuments:
            toCompare = []
            for ids in videoDocs['trackIds']:
                toCompare.append(str(ids))
            sortedToCompare = sorted(toCompare)
            #create list to compare list of published tracks to

            #pprint.pprint(sortedToCompare)
            #pprint.pprint(sortedTracks)

            mixedVideo = None
            if (sortedToCompare == sortedTracks):
                mixedVideo = videoDocs['mp4MergedVideoUrl']
                mixedAudio = videoDocs.get('mp3AudioUrl')
                break

        if mixedVideo is None:
            mixedVideo = songDoc.get('mp4MergedVideoUrl')
        
        if mixedVideo is None:
            print('No Match')
        
        collabSettings = postDoc.get('collabSettings')
        if collabSettings is None:
            isFinished = False
        else:
            isFinished = collabSettings['finished']
        #set bool for whether collaboration is finished
        
        for track in publishedTracks:
        #for each track that is published

            trackId = track['_id']
            #grab trackId of published track.
            
            trackSettings = songDoc['settings'].get(str(trackId))
            if trackSettings is None:
                print('no track settings for this published track')
                pprint.pprint(songDoc)
                continue
            #to be used to grab track settings
            
            audioChannel = trackSettings.get('audioChannels')
            trackVolume = trackSettings.get('volume')
            mute = trackSettings.get('mute')
            
            if trackVolume is None:
                audioChannel[0].get('volume')
            if mute is None:
                audioChannel[0].get('mute')
            #if cannot find track volume, check here
            
            if trackVolume is None:
                print('cannot find volume')
                print(songId)
                print(trackId)
            if mute is None:
                print('cannot find mute')
                print(songId)
                print(trackId)
            #error catching to find bad documents
            
            
            if(trackVolume == 0) or (mute == True):
                volume = 0
                #continue
            #skip track if its volume is set to 0 or is mute

            
            ### AUDIO EFFECTS SETTINGS ###
            #conflicting values of some effects (including track volume)
            #inside and outside of audioChannels. Get the SongIDs and 
            #send to Mark
            
            if audioChannel is not None:
                compressorState = audioChannel[0].get('compressorState')
                compressorValue = audioChannel[0].get('compressorValue')
                echoState = audioChannel[0].get('echoState')
                echoValue = audioChannel[0].get('echoValue')
                noiseGateState = audioChannel[0].get('noiseGateState')
                noiseGateValue = audioChannel[0].get('noiseGateValue')
                panState = audioChannel[0].get('panState')
                panValue = audioChannel[0].get('panValue')
                reverbState = audioChannel[0].get('reverbState')
                reverbValue = audioChannel[0].get('reverbValue')
                eqState = audioChannel[0].get('visualEQState')
                eqValue = audioChannel[0].get('visualEQValues')
                solo = audioChannel[0].get('solo')
            else:
                compressorState = None
                compressorValue = None
                echoState = None
                echoValue = None
                noiseGateState = None
                noiseGateValue = None
                panState = None
                panValue = None
                reverbState = None
                reverbValue = None
                eqState = None
                eqValue = None
                solo = None
            #if the settings are located in settings.audioChannel[0] grab them there
            
            #if not then try to grab from settings('field')
            if compressorValue is None:
                compressorState = trackSettings.get('compressorState')
                compressorValue = trackSettings.get('compressorValue')
            
            #echo
            if echoValue is None:
                echoState = trackSettings.get('echoState')
                echoValue = trackSettings.get('echoValue')
            
            #noise gate?
            if noiseGateValue is None:
                noiseGateState = trackSettings.get('noiseGateState')
                noiseGateValue = trackSettings.get('noiseGateValue')
            
            #pan
            if panValue is None:
                panState = trackSettings.get('panState')
                panValue = trackSettings.get('panValue')
            
            #reverb
            if reverbValue is None:
                reverbState = trackSettings.get('reverbState')
                reverbValue = trackSettings.get('reverbValue')
            
            #eq
            if eqValue is None:
                eqState = trackSettings.get('visualEQState')
                eqValue = trackSettings.get('visualEQValues')
            
            #is solo'ed. I don't think this is ever true
            if solo is None:
                solo = trackSettings.get('solo')  
                
            if 'solo' is True:
                pprint.pprint(songDoc)
            #if solo is true print
            
            ### END AUDIO EFFECTS SETTINGS ###
            

            
            trackDocument = tracksCol.find({'_id' : trackId})
            #grab the corresponding track document
            
            for trackDoc in trackDocument:        
            #look through corresponding track document    
                
                #list to hold all data for each track which is a row in our hdf file

                #pprint.pprint(trackDoc)

                audioURL = trackDoc['audioChannels'][0]['fileUrl']
                if audioURL is None:
                    print('no unprocessed audio')
                startTime = trackDoc['startTimeValue']
                #grab unprocessed audio and its start time
                
                processedAudioURL = trackSettings.get('effectsAudioUrl')
                #if there is a processedAudioURL grab it
                
                if (processedAudioURL is None) and (startTime > 0):
                    processedAudioURL = audioChannel[0].get('audioFileUrl')
                
                if processedAudioURL is None:
                    dummyURL = trackSettings.get('audioFileUrl')
                    if dummyURL != audioURL:
                        processedAudioURL = dummyURL
                
                
                
                #pprint.pprint(startTime)
                #pprint.pprint(audioURL)
                #pprint.pprint(processedAudioURL)
                #pprint.pprint(trackSettings.get('processedAudioUrl_np'))
                #pprint.pprint(trackSettings.get('audioFileUrl'))
                
                #audCh = trackSettings.get('audioChannels')
                #pprint.pprint(audCh[0].get('audioFileUrl'))
                #pprint.pprint('break')

                trackVideo = trackDoc.get('videoFileUrl')
                if trackVideo is None:
                    trackVideo = trackDoc.get('sourceVideoURL')
                    if trackVideo is not None:
                        fromYouTube = True
                    else:
                        fromYouTube = False
                else:
                    fromYouTube = False 
                #grab video files. Set fromYouTube bool
                
                if trackVideo is None:
                    pprint.pprint(trackDoc['_id'])
                
                owner = trackDoc['owner']
                #grab the owner of the track
        
                data.append([str(trackId), str(songId), owner, audioURL, processedAudioURL, startTime, trackVolume, compressorValue, panValue, echoValue, noiseGateValue, reverbValue, eqValue, trackVideo, fromYouTube, isFinished, mixedAudio, mixedVideo, musicBrainzID, newMusicBrainzID])
                #append data to list
                
df = pd.DataFrame(data)
df.columns = ['trackId', 'songId', 'owner', 'audioURL', 'processedAudioURL', 'startTime', 'trackVolume', 'compressorValue', 'panValue', 'echoValue', 'noiseGateValue', 'reverbValue', 'eqValue', 'trackVideo', 'fromYouTube', 'isFinished', 'mixedAudio','mixedVideo', 'musicBrainzID', 'newMusicBrainzID']
#print(df)


#info to check: effects settings inside settings.audioChannels[0] and in settings[i]
#info to check: different audio URLs for each track? Audio URLs in the trackStream vs in the SongStream vs multiple in SongStream
#info to give to Mark: tracks with no corresponding video URL, files with extra audio URLs

ValueError: Length mismatch: Expected axis has 0 elements, new values have 20 elements

In [5]:
hdf = pd.HDFStore('bandhub.h5')

In [6]:
hdf.append('bandhub', df, format = 'table', data_columns = True, compression = 'zlib') 
          # columns=('_id' 'trackID''songID','userID','processedAudioURL' 'unprocessedAudioURL', 'startTime', 'videoURL', 'mergedVideoURL', 'fromYouTube', 'collabFinished', 'mixedAudioURL', 'mergedVideoURL', 'other' )), 
          # format='table', data_columns=True)


In [7]:
hdf.close() # closes the file