# Master Discogs Database

In [1]:
## Basic stuff
%load_ext autoreload
%autoreload
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))

## Python Version
import sys
print("Python: {0}".format(sys.version))


################################################################################
## General Stuff
################################################################################
from ioUtils import saveJoblib, loadJoblib, saveFile, getFile
import urllib
from urllib.parse import quote
from collections import Counter
from searchUtils import findExt, findSubExt, findPatternExt, findNearest
from timeUtils import clock, elapsed
from fsUtils import moveFile, setFile, setDir, setSubDir, isFile, isDir, mkDir
from fileUtils import getFileBasics, getBasename
from listUtils import getFlatList
from time import sleep


################################################################################
## Music Stuff
################################################################################

### MultiArtist
from multiArtist import multiartist

### My Music DB
from myMusicDBMap import myMusicDBMap
from musicDBMap import musicDBMap
from matchDBArtist import matchDBArtist

### Master DB code
from masterdb import masterdb
from mainDB import mainDB


import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))

Python: 3.7.7 (default, Mar 26 2020, 10:32:53) 
[Clang 4.0.1 (tags/RELEASE_401/final)]




Notebook Last Run Initiated: 2020-11-12 19:05:29.822061


In [None]:
mdb = myMusicDBMap(debug=True)

In [None]:
start, cmt = clock("Creating DB")
maindb = mainDB(mdb=mdb, create=True, debug=True)
print("\n\n{0}\n".format("Full DB"))
#maindb.setDBFull("AllMusic")
#maindb.setDBFull("MusicBrainz")
maindb.setDBFull("Discogs")
#maindb.setDBFull("LastFM")
#maindb.setDBFull()
print("\n\n{0}\n".format("Known DB"))
#maindb.setDBKnown() ## Do this to recreate everything
#maindb.setDBKnown()
#artistDBs = maindb.getKnownArtistDBs()

elapsed(start, cmt)

In [None]:
maindb = mainDB(mdb=mdb, create=False, debug=True)

In [None]:
maindb.setDBFull()

# Metadata

In [None]:
%load_ext autoreload
%autoreload

from multiprocessing import Pool
from mainDB import mainDB
import time
mainDB = mainDB()
dbdata = mainDB.dbdata

def parseArtistsAM(modVal, force=False, doExtra=False):
    dbdata["AllMusic"]["Artists"].parseArtistModValFiles(modVal, force=force)
    #artsAM.parseArtistModValFiles(modVal, force=force)

def parseArtistsDC(modVal, force=True, doExtra=False):
    dbdata["Discogs"]["Artists"].parseArtistModValFiles(modVal, force=force)
    #artsDC.parseArtistModValFiles(modVal, force=force)

def parseArtistsMB(modVal, force=False, doExtra=False):
    dbdata["MusicBrainz"]["Artists"].parseArtistModValFiles(modVal, force=force)
    #artsMB.parseArtistModValFiles(modVal, force=force)

def parseArtistsAB(modVal, force=False, doExtra=False):
    artsAB.parseArtistFiles(force=force)
    
def parseArtistsDP(modVal, force=False, doExtra=False):
    dbdata['DatPiff']['Artists'].parseArtistFiles()
    #artsDP.parseArtistFiles(force=force)

def parseArtistsRM(modVal, force=False, doExtra=False):
    dbdata["RateYourMusic"]["Artists"].parseArtistModValFiles(modVal, force=force)
    #artsRM.parseArtistModValFiles(modVal, force=force)

def parseArtistsLM(modVal, force=False, doExtra=False):
    dbdata["LastFM"]["Artists"].parseArtistModValFiles(modVal, force=force)
    #artsLM.parseArtistModValFiles(modVal, force=force)

def parseArtistsRC(modVal, force=False, doExtra=False):
    dbdata["RockCorner"]["Artists"].parseArtistModValFiles(modVal, force=force)
    #artsRC.parseArtistModValFiles(modVal, force=force)

def parseArtistsCL(modVal, force=False, doExtra=False):
    dbdata["CDandLP"]["Artists"].parseArtistModValFiles(modVal, force=force)
    #artsCL.parseArtistModValFiles(modVal, force=force)

def parseArtistsMS(modVal, force=False, doExtra=False):
    artsMS.parseArtistFiles(force=force)

def parseArtistsMT(modVal, force=False, doExtra=False):
    artsMT.parseArtistModValFiles(modVal, force=force)

    
def parseArtistsParallel(db, nProcs=3, force=False):
    pool = Pool(processes=nProcs)
    if db == "Discogs":
        result = pool.map_async(parseArtistsDC, range(100))
    elif db == "AllMusic":
        result = pool.map_async(parseArtistsAM, range(100))
    elif db == "MusicBrainz":
        result = pool.map_async(parseArtistsMB, range(100))
    elif db == "AceBootlegs":
        result = pool.map_async(parseArtistsAB, [None])
    elif db == "DatPiff":
        result = pool.map_async(parseArtistsDP, [None])
    elif db == "RateYourMusic":
        dbdata["RateYourMusic"]["Artists"].parseDownloadedFiles()
        result = pool.map_async(parseArtistsRM, range(100))
    elif db == "LastFM":
        result = pool.map_async(parseArtistsLM, range(100))
    elif db == "RockCorner":
        result = pool.map_async(parseArtistsRC, range(100))
    elif db == "CDandLP":
        result = pool.map_async(parseArtistsCL, range(100))
        #result = pool.map_async(parseArtistsCL, range(56,72))
        #result = pool.map_async(parseArtistsCL, [55,25,26])
    elif db == "MusicStack":
        result = pool.map_async(parseArtistsMS, [None])
    elif db == "MetalStorm":
        result = pool.map_async(parseArtistsMT, range(100))
    else:
        raise ValueError("[{0}] is not recognized as a DB".format(db))

    while not result.ready():
        if force is True:
            time.sleep(10)
        else:
            time.sleep(1)
    print("")
    return result.get()

In [None]:
start,cmt=clock("Parsing All Artists")
start2,cmt2=clock("Parallel Artist Parsing")
#parseArtistsParallel(db="AllMusic", nProcs=3)
#parseArtistsParallel(db="MusicBrainz", nProcs=3)
parseArtistsParallel(db="Discogs", nProcs=4)
#parseArtistsParallel(db="AceBootlegs", nProcs=1)
#parseArtistsParallel(db="DatPiff", nProcs=1)
#parseArtistsParallel(db="RateYourMusic", nProcs=3)
#parseArtistsParallel(db="LastFM", nProcs=3)
#parseArtistsParallel(db="RockCorner", nProcs=3)
#parseArtistsParallel(db="CDandLP", nProcs=3)
#parseArtistsParallel(db="MusicStack", nProcs=1)
elapsed(start2, cmt2)
elapsed(start, cmt)

# Download Stuff

In [None]:
## Tests
if False:
    mdbmap.addArtist("Name")
    mdbmap.add("Name", None, "Discogs", "4545468")
    mdbmap.getArtistData("Name").show()

In [None]:
if False:
    mymusic = mdb.get()
    for artistName,artistData in mymusic.items():
        mdbmap.addArtist(artistName)
        for db,dbdata in artistData.items():
            mdbmap.add(artistName, None, db, dbdata["ID"])
        print(artistName)
    mdbmap.save()

# Main DB

In [3]:
### Master DB code
%load_ext autoreload
%autoreload
from masterdb import masterdb
from mainDB import mainDB

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
maindb = mainDB(mdb=None, create=False, debug=False)
maindb.loadDBDataMap()

Summary Statistics For DB: Discogs
    Using Known Artists: False
    Found 753273 ID -> Name entries
    Found 687680 Name -> ID entries
    Found 0 Albums
Summary Statistics For DB: AllMusic
    Using Known Artists: False
    Found 55277 ID -> Name entries
    Found 51322 Name -> ID entries
    Found 232502 Albums
Summary Statistics For DB: MusicBrainz
    Using Known Artists: False
    Found 133914 ID -> Name entries
    Found 112971 Name -> ID entries
    Found 0 Albums
Summary Statistics For DB: LastFM
    Using Known Artists: False
    Found 115953 ID -> Name entries
    Found 115789 Name -> ID entries
    Found 992668 Albums
Summary Statistics For DB: RockCorner
    Using Known Artists: False
    Found 1262 ID -> Name entries
    Found 1262 Name -> ID entries
    Found 14481 Albums
Summary Statistics For DB: AceBootlegs
    Using Known Artists: False
    Found 341 ID -> Name entries
    Found 341 Name -> ID entries
    Found 2838 Albums
Summary Statistics For DB: CDandLP
    Usi

In [None]:
artistNames = mdbmap.getArtists()
artistNames[249]  # ('Bastille', 'b31cc180eb21f511abd6f2295cb99a27')
artistName = 'Bastille'
artistID   = 'b31cc180eb21f511abd6f2295cb99a27'

In [None]:


mdbMatcher = matchDBArtist(maindb)
mdbMatcher.setArtistInfo(artistName, artistID, ["Bad Blood"])
#mdbMatcher.findPotentialArtistNameMatches()
mc = mdbMatcher.findPotentialArtistAlbumMatchesByDB('Discogs')
mcs = mdbMatcher.findPotentialArtistAlbumMatches()

****

# Primary DB Matching

In [None]:
dbName = "AllMusic"

In [5]:
mdbmaps = {db: musicDBMap(db, init=False) for db in maindb.getDBs()}
if False:
    mdbmaps["AllMusic"] = musicDBMap("AllMusic", init=False)
    mdbmaps["MusicBrainz"] = musicDBMap("MusicBrainz", init=False)
    mdbmaps["Discogs"] = musicDBMap("Discogs", init=False)
    mdbmaps["LastFM"] = musicDBMap("LastFM", init=False)
    mdbmaps["RockCorner"] = musicDBMap("RockCorner", init=False)
    mdbmaps["AceBootlegs"] = musicDBMap("AceBootlegs", init=False)
    mdbmaps["MusicStack"] = musicDBMap("MusicStack", init=False)
    mdbmaps["CDandLP"] = musicDBMap("CDandLP", init=False)

  Loaded 44120 previously matched entries
  Loaded 41006 previously matched entries
  Loaded 39630 previously matched entries
  Loaded 32618 previously matched entries
  Loaded 1262 previously matched entries
  Loaded 178 previously matched entries
  Loaded 4977 previously matched entries
  Loaded 77 previously matched entries
  Loaded 187 previously matched entries


In [6]:
from pandas import Series

class masterDBMatchClass:
    def __init__(self, maindb, mdbmaps):
        self.maindb  = maindb
        self.mdbmaps = mdbmaps

        print("Loading Artist Names")
        self.artistData  = {db: self.getArtistNameDB(db) for db in maindb.dbdata.keys()}        
        #self.matchData   = {db: self.getDBMatchData(db) for db in maindb.dbdata.keys()}

    def getArtistNameDB(self, db):
        return self.maindb.dbdata[db]["Disc"].getMasterSlimArtistDiscogsDB()
        
    def getArtistAlbumsDB(self, db):
        return self.maindb.dbdata[db]["Disc"].getMasterSlimArtistAlbumsDiscogsDB()
        
        
        
    def getDBMatchData(self, dbName):
        print("Loading Artist Albums")
        artistsDF = self.artistData[dbName]
        albumsDF  = self.getArtistAlbumsDB(dbName)
        
        dbArtistAlbums = artistsDF[["DiscArtist"]].join(albumsDF)
        dbArtistAlbums["Albums"] = dbArtistAlbums["Albums"].apply(lambda x: getFlatList([albums.values() for media,albums in x.items()]))
        matchData = {(dbArtistData["DiscArtist"], dbArtistID): dbArtistData["Albums"] for dbArtistID,dbArtistData in dbArtistAlbums.T.to_dict().items()}
        return matchData
        
        
    def getArtistNameFromID(self, db, dbID):
        df  = self.artistData[db]
        adf = df[df.index == dbID]
        if adf.shape[0] == 1:
            retval = list(adf["DiscArtist"])[0]
            return retval
        else:
            return None
        
    
    def getDataToMatch(self, db, maxValues=100, maxAlbums=100, sort=True):
        matchData  = self.getDBMatchData(db)
        nAlbums    = Series({primaryKey: len(albums) for primaryKey,albums in matchData.items()}).sort_values(ascending=False).to_dict()
        if sort is True:
            sortedKeys = nAlbums.keys()
        else:
            sortedKeys = matchData.keys()
        
        toMatch = []
        known   = 0
        for primaryKey in sortedKeys:
            albums = matchData[primaryKey]
            if nAlbums[primaryKey] >= maxAlbums:
                continue
            if not self.mdbmaps[db].isKnownKey(primaryKey):
                if maxValues is not None:
                    if len(toMatch) >= maxValues:
                        continue
                toMatch.append([primaryKey[0],primaryKey[1],albums])
            else:
                known += 1
        print(len(toMatch),known,len(matchData))
        return {db: toMatch}
    
    
    def getMutualEntries(self):        
        dbOrder = list(self.mdbmaps.keys())
        entryMap = {db: {db2: None for db2 in dbOrder} for db in dbOrder}
        
        for db1 in dbOrder:
            db1df = self.mdbmaps[db1].getDF().T
            for j,db2 in enumerate(dbOrder):
                db2MatchesFromdb1 = db1df[[db1,db2]]
                num = db2MatchesFromdb1[~db2MatchesFromdb1[db2].isna()].shape[0]
                entryMap[db1][db2] = num
            
        print("{0: <25}".format(""), end="")
        for db in entryMap.keys():
            print("{0: <15}".format(db), end="")
        print("")
        for db1,dbEntries in entryMap.items():
            print("{0: <25}".format(db1), end="")
            for db2,db2Entry in dbEntries.items():                
                print("{0: <15}".format(db2Entry), end="")
            print("")
        print("")
    
    
    def matchMutualMaps(self):
        start,cmt = clock("Mutual mapping it")
        dbOrder = list(self.mdbmaps.keys())
        for i,db1 in enumerate(dbOrder):
            print(i,'\t',db1)
            db1df = self.mdbmaps[db1].getDF().T
            for j,db2 in enumerate(dbOrder):
                if i == j:
                    continue
                if not db2 in db1df.columns: 
                    continue
                db2MatchesFromdb1 = db1df[[db1,db2]]
                db2MatchesFromdb1 = db2MatchesFromdb1[~db2MatchesFromdb1[db2].isna()]
                for key,row in db2MatchesFromdb1.iterrows():
                    db1MatchID   = row[db1]
                    db2MatchID   = row[db2]
                    db2MatchName = self.getArtistNameFromID(db2,db2MatchID)
                    #print('\t{0: <30}{1: <20}{2: <20}{3}'.format(key[0],db1MatchID,db2MatchID,db2MatchName))
                    mdbmaps[db2].addArtist(db2MatchName,db2MatchID)
                    mdbmaps[db2].addArtistData(db2MatchName,db2MatchID,db1,db1MatchID)

        for i,db in enumerate(dbOrder):
            self.mdbmaps[db].save()
            
        elapsed(start, cmt)

In [7]:
mdbmc = masterDBMatchClass(maindb, mdbmaps)

Loading Artist Names


In [None]:
toMatch   = mdbmc.getDataToMatch("AllMusic", maxValues=5000, maxAlbums=100)

In [None]:
toMatch

# Matching Code

In [13]:
from tqdm import tqdm
from multiprocessing import Pool
from functools import partial
import time

def matchDBArtistWithAlbums(item, *args, **kwargs):    
    #time.sleep(0.0025)

    artistName   = item[0]
    artistID     = item[1]
    artistAlbums = item[2]
    
    mdbMatcher = matchDBArtist(maindb)
    mdbMatcher.setArtistInfo(artistName, artistID, artistAlbums)
    mcs    = mdbMatcher.findPotentialArtistAlbumMatches()
    retval = [artistName,artistID,mcs]
    return retval
    #result = findNearest(name, artists, 1, kwargs['cutoff'])


def multiProc(func, argument_list, num_processes):
    pool = Pool(processes=num_processes)
    result_list_tqdm = []
    for result in tqdm(pool.imap(func=func, iterable=argument_list), total=len(argument_list)):
        result_list_tqdm.append(result)
    return result_list_tqdm



def saveMapData(mdbmap, result_list):
    for item in result_list:
        artistName = item[0]
        artistID   = item[1]
        mcs        = item[2]
        mdbmap.addArtist(artistName, artistID)
        for db,mc in mcs.items():
            matchID    = mc.matchID
            matchScore = mc.matchScore
            if matchID is not None:
                mdbmap.addArtistData(artistName, artistID, db, matchID)

    mdbmap.save()

In [9]:
df = mdbmaps['AllMusic'].getDF()

In [12]:
df.T[df.T["AllMusic"].isna()]

Unnamed: 0,Unnamed: 1,Discogs,AllMusic,MusicBrainz,AceBootlegs,RateYourMusic,LastFM,DatPiff,RockCorner,CDandLP,MusicStack,MetalStorm
Dog Faced Hermans,0000794391,,,100203949104445552254798470887574779706,,,22338467735,,,,,
Barry Adamson,0000786245,,,108585600191802208411428998011777911706,,,,,,,,
Paul Wall,0000036641,,,109649788685445033720271999922869803506,,,,,,,,
Dave Hillyard & the Rocksteady 7,0000960029,,,110195896558717384948577420887356762106,,,,,,,,
The Holy Modal Rounders,0000062273,,,110550128985270066259962109652052434206,,,29953164381,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
Breach the Silence,0003294757,,,,,,88519190616,,,,,
Ronnie & the Pomona Casuals,0000291585,,,,,,72815791892,,,,,
Brothers Grim & the Blue Murders,0002766572,,,,,,68000096197,,,,,
Barbwire Tourniquet,0003088357,,,,,,40108723592,,,,,


In [None]:
for i in range(100):
    #for db in ["AllMusic", "MusicBrainz", "LastFM", "RockCorner", "CDandLP", "RateYourMusic", "MusicStack"]:
    for db in ["AllMusic"]: #, "MusicBrainz", "LastFM", "RockCorner", "CDandLP", "RateYourMusic", "MusicStack"]:
        toMatch   = mdbmc.getDataToMatch(db, maxValues=500, maxAlbums=100)
        if len(toMatch) == 0:
            continue

        num_processes = 3
        func = matchDBArtistWithAlbums
        pfunc = partial(matchDBArtistWithAlbums, cutoff=0.95) # Giving some arguments for kwargs
        #argument_list = list(inputs.items()) # [random.randint(0, 100) for _ in range(num_jobs)]
        dbName = list(toMatch.keys())[0]
        argument_list = toMatch[dbName]
        print("Running imap multiprocessing for {0} artists ...".format(len(argument_list)))
        result_list = multiProc(func=pfunc, argument_list=argument_list,
                                               num_processes=num_processes)


        saveMapData(mdbmaps[dbName], result_list)

        #mdbmc.matchMutualMaps()

Loading Artist Albums
500 40376 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:13<00:00,  1.34it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 4.8MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 4.8MB.
Loading Artist Albums
500 40876 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:26<00:00,  1.29it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 4.8MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 4.8MB.
Loading Artist Albums
500 41376 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:13<00:00,  1.34it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 4.9MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 4.9MB.
Loading Artist Albums
500 41876 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:13<00:00,  1.34it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.0MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.0MB.
Loading Artist Albums
500 42376 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:20<00:00,  1.31it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.0MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.0MB.
Loading Artist Albums
500 42876 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:13<00:00,  1.34it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.1MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.1MB.
Loading Artist Albums
500 43376 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:29<00:00,  1.28it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.1MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.1MB.
Loading Artist Albums
500 43876 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:25<00:00,  1.30it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.2MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.2MB.
Loading Artist Albums
500 44376 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:06<00:00,  1.36it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.2MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.2MB.
Loading Artist Albums
500 44876 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:05<00:00,  1.37it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.3MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.3MB.
Loading Artist Albums
500 45376 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:40<00:00,  1.25it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.4MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.4MB.
Loading Artist Albums
500 45876 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:21<00:00,  1.31it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.4MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.4MB.
Loading Artist Albums
500 46376 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:26<00:00,  1.29it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.5MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.5MB.
Loading Artist Albums
500 46876 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:11<00:00,  1.35it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.5MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.5MB.
Loading Artist Albums
500 47376 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:14<00:00,  1.34it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.6MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.6MB.
Loading Artist Albums
500 47876 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:32<00:00,  1.27it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.7MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.7MB.
Loading Artist Albums
500 48376 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:06<00:00,  1.36it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.7MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.7MB.
Loading Artist Albums
500 48876 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:34<00:00,  1.27it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.8MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.8MB.
Loading Artist Albums
500 49376 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:26<00:00,  1.29it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.8MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.8MB.
Loading Artist Albums
500 49876 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:26<00:00,  1.29it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.9MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 5.9MB.
Loading Artist Albums
500 50376 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:23<00:00,  1.30it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 6.0MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 6.0MB.
Loading Artist Albums
500 50876 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:26<00:00,  1.29it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 6.0MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 6.0MB.
Loading Artist Albums
500 51376 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:50<00:00,  1.22it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 6.1MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 6.1MB.
Loading Artist Albums
500 51876 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [06:40<00:00,  1.25it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 6.1MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 6.1MB.
Loading Artist Albums
500 52376 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [07:56<00:00,  1.05it/s]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 6.2MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 6.2MB.
Loading Artist Albums
500 52876 55277
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [09:28<00:00,  1.14s/it]


Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 6.2MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbAllMusicMap.p
  --> This file is 6.2MB.
Loading Artist Albums
500 53376 55277
Running imap multiprocessing for 500 artists ...


 77%|███████▋  | 387/500 [05:17<02:35,  1.38s/it]

In [None]:
if False:
    results = {}
    for item in matchData[:100]:
        print(item[0])
        mdbMatcher.setArtistInfo(artistName, artistID, artistAlbums)
        #mdbMatcher.findPotentialArtistNameMatches()
        #mc = mdbMatcher.findPotentialArtistAlbumMatchesByDB('Discogs')
        mcs = mdbMatcher.findPotentialArtistAlbumMatches()
        results[(artistName,artistID)] = {db: [mc.matchID, mc.matchScore] for db, mc in mcs.items()}

In [None]:
df = mdbmap.getDF().T

In [None]:
from pandas import Series
matchData  = mdbmc.getDBMatchData("AllMusic")
sortedData = Series({primaryKey: len(albums) for primaryKey,albums in matchData.items()}).sort_values(ascending=False).to_dict()

In [None]:
sortedData

In [None]:
matchData

In [None]:


s
        
        toMatch = []
        known   = 0
        for primaryKey,albums in matchData.items():

In [None]:

            
        
        
    def matchChartArtist(self, dbs=None, albumType=None, ratioCut=0.95, returnData=True):
        chartArtist       = self.cad.artist
        chartArtistAlbums = self.cad.albums
        
        if self.mdb.isKnown(chartArtist):
            return self.mdb.getArtistData(chartArtist)
        
            
        
        
        ######################################################################
        #### Get Potential DB Artists
        ######################################################################
        if dbs is None:
            artistNameDBIDs = self.mdb.getArtistIDs(chartArtist, num=50, cutoff=0.7)
        else:
            if isinstance(dbs, list):
                artistNameDBIDs = self.mdb.getArtistIDsFromDBs(chartArtist, dbs=dbs, num=50, cutoff=0.7)
            else:
                raise ValueError("DBs must be a list")
        
        
        ######################################################################
        #### Get Database Albums
        ######################################################################
        matches = {}
        for db,artistDBartists in artistNameDBIDs.items():
            
            dbMatches = {}
            for artistDBartist,artistDBIDs in artistDBartists.items():
                for artistDBID in artistDBIDs:
                    dbMatches[artistDBID] = {}
                    artistDBAlbumsFromID = self.mdb.getArtistAlbumsFromID(db, artistDBID)

                    for mediaType, mediaTypeAlbums in artistDBAlbumsFromID.items():
                        if albumType is not None:
                            if mediaType not in self.mdb.getDBAlbumTypeNames(db, albumType):
                                continue

                        ma = matchAlbums(cutoff=ratioCut)
                        ma.match(chartArtistAlbums, mediaTypeAlbums)
                        #ma.show(debug=True)
                        
                        dbMatches[artistDBID][mediaType] = ma
                        
            matches[db] = dbMatches
            
            
        ######################################################################
        #### Find Best Match
        ######################################################################
        retval = {}
        for db,dbdata in matches.items():
            retval[db] = None
            bestMatch = {"ID": None, "Matches": 0, "Score": 0.0}
            for artistDBID,artistDBData in dbdata.items():
                for mediaType,ma in artistDBData.items():
                    if ma.near == 0:
                        continue
                    if ma.near > bestMatch["Matches"]:
                        bestMatch = {"ID": artistDBID, "Matches": ma.near, "Score": ma.score}
                    elif ma.near == bestMatch["Matches"]:
                        if ma.score > bestMatch["Score"]:
                            bestMatch = {"ID": artistDBID, "Matches": ma.near, "Score": ma.score}

            if bestMatch["ID"] is not None:
                retval[db] = bestMatch["ID"]
                #print("mdb.add(\"{0}\", \"{1}\", \"{2}\")".format(self.cad.artist, db, bestMatch["ID"]))
                
        if returnData:
            return retval
        self.results[self.cad.artist] = retval
    

In [None]:

    def initKey(self, db):
        for myArtistName in self.musicmap.keys():            
            self.musicmap[myArtistName][db] = self.artistIDElement
        return self.musicmap
        

    def initArtist(self, artistName):
        self.musicmap[artistName] = {db: self.artistIDElement for db in self.getDBs()}
        return self.musicmap
        

    def initArtistDB(self, artistName, db):
        self.musicmap[artistName][db] = self.artistIDElement
        return self.musicmap
        #self.saveMyMusicMap()
        

    def rmArtist(self, artistName):
        if self.musicmap.get(artistName) is None:
            print("There is no artist [{0}] in music DB.".format(artistName))
            return
        del self.musicmap[artistName]
        return
        print("Could not delete db [{0}] for artist [{1}] in music DB.".format(db, artistName))
        

    def rmArtistDBKey(self, artistName, db):
        if self.musicmap.get(artistName) is None:
            print("There is no artist [{0}] in music DB.".format(artistName))
            return
        
        if self.musicmap[artistName].get(db) is not None:
            del self.musicmap[artistName][db]
        return
        print("Could not delete db [{0}] for artist [{1}] in music DB.".format(db, artistName))


        
    def rmKey(self, db):
        self.checkDB(db)
        for myArtistName in self.musicmap.keys():
            self.rmArtistDBKey(artistName, db)
        return self.musicmap
        #self.saveMyMusicMap()
        
        
    def addArtist(self, artistName):
        if self.musicmap.get(artistName) is None:
            print("Adding Artist {0}".format(artistName))
            self.musicmap[artistName] = {db: self.artistIDElement for db in self.getDBs()}
            print("\t",self.musicmap[artistName])
            
       
    def ignoreList(self, dbName):
        ignores = []
        if dbName == "AllMusic":
            ignores  = ["Bryan Adams", "Leslie Keith", "Patrick Swayze", "David Frizzell & Shelly West", "Matt Monroe"]
            ignores += ["Antonio Vivaldi", "Franz Liszt", "Georges Bizet", "Hector Berlioz", "Richard Wagner"]
            ignores += ["Robert Schumann"]        
        return ignores
        
        
    def add(self, artistName, dbName, artistID):
        if artistID is None:
            print("Not Adding None ArtistID")
            return
        
        self.checkDB(dbName)        
        self.checkID(artistID)
        
        ignores = self.ignoreList(dbName)
        if artistName in ignores:
            print("Not adding [{0}] because it's on the ignore list".format(artistName))
            return
        
        try:
            int(artistID)
        except:
            raise ValueError("Artist [{0}}] and Database [{1}] with ID [{2}] isn't a number".format(dbName, artistName, artistID))
        
        if self.musicmap.get(artistName) is None:
            self.addArtist(artistName)
        dbData = self.musicmap[artistName].get(dbName)
        
        if dbData is None:
            print("Adding Database [{0}] to DB list for [{1}]".format(dbName, artistName))
            self.musicmap[artistName][dbName] = self.artistIDElement
        else:            
            if self.musicmap[artistName][dbName]["ID"] != artistID:
                print("  Replacing ID for DB [{0}] from [{1}] to [{2}]".format(dbName, self.musicmap[artistName][dbName]["ID"], artistID))
            
        self.checkDBID(dbName, artistID, artistName)
        self.musicmap[artistName][dbName] = {"ID": artistID, "Name": None}
        print("Artist DB Data: {0}".format(self.musicmap[artistName]))
        
        
        
    ####################################################################################################
    #
    # DB Section
    #
    ####################################################################################################
    def getDBMatches(self, db):
        dbMatches = {}
        for artistName in self.getArtists():
            dbData = self.getArtistDBData(artistName, db)
            if dbData.get('ID') is not None:
                dbMatches[artistName] = dbData['ID']
        return dbMatches
    
    
    def setDBMatches(self):
        self.dbArtistData = {db: {} for db in self.getDBs()}
        for artist,artistData in self.get().items():
            for db,dbmatch in artistData.items():
                self.checkDB(db,artist)
                if dbmatch is not None:
                    dbID = dbmatch.get('ID')
                    self.checkID(dbID)
                    self.dbArtistData[db][dbID] = artist
                    
    
        
    ####################################################################################################
    #
    # Artist Section
    #
    ####################################################################################################
    def isKnown(self, artistName):
        if self.musicmap.get(artistName) is None:
            return False
        return True
        
    def getArtistFromID(self, dbID):
        for artistName,artistData in self.musicmap.items():
            for db,dbdata in artistData.items():
                if dbdata is not None:
                    dbdataID = dbdata.get("ID")
                    if dbdataID == dbID:
                        return [db,artistName]
        return [None,None]
        
        
    def getMatchedDBStatus(self, artistName):
        artistData = self.getArtistData(artistName)
        status     = {}
        for dbkey in self.dbkeys:
            if artistData.get(dbkey) is None:
                status[dbkey] = False
            else:
                status[dbkey] = True
        return status
    
    
    def getArtistData(self, artistName):
        if self.musicmap.get(artistName) is None:
            return {}
        return self.musicmap[artistName]
    
    
    def getArtistDBData(self, artistName, db):
        if self.musicmap.get(artistName) is None:
            return {}
        if self.musicmap[artistName].get(db) is None:
            return {}
        return self.musicmap[artistName][db]
    
    
    def getArtistDataIDs(self, artistName, returnNone=True):
        if self.musicmap.get(artistName) is None:
            return {}        
        
        retval = {}
        for db,dbdata in self.musicmap[artistName].items():
            try:
                ID = dbdata["ID"]
            except:
                ID = None
                if returnNone is False:
                    continue
            retval[db] = ID
        return retval
    
    
    def getArtists(self):
        return list(self.musicmap.keys())
    
    
    def showArtistData(self, artistName):
        artistData = self.getArtistData(artistName)
        print("===> {0}".format(artistName))
        for db,dbdata in artistData.items():
            print("   {0: <15}: {1}".format(db,dbdata))
        
            
            
    ########################################################################################################
    #
    # Get Artist Data
    #
    ########################################################################################################
    def getArtistIDsFromDBs(self, artistName, dbs, num=10, cutoff=0.7, debug=False):
        if not all([self.dbdata.get(db) for db in self.getDBs()]):
            self.getFullDBData()
                    
        if debug:
            print("  Getting DB Artist IDs for ArtistName: {0}".format(artistName))
        artistIDs = {}
        for db in self.getDBs():
            if db in dbs:
                artistIDs[db] = self.getArtistDBIDs(artistName, db, num, cutoff, debug)
            else:
                artistIDs[db] = {}
        return artistIDs
    
    
    
    def getArtistIDs(self, artistName, num=10, cutoff=0.7, debug=False):
        if not all([self.dbdata.get(db) for db in self.getDBs()]):
            self.getFullDBData()
                    
        if debug:
            print("  Getting DB Artist IDs for ArtistName: {0}".format(artistName))
        artistIDs = {db: self.dbdata[db].getArtistIDs(artistName, num, cutoff, debug=debug) for db in self.getDBs()}
        return artistIDs
    
    
    def getArtistFromDBID(self, db, dbID):
        try:
            adb = self.dbdata[db]
        except:
            raise ValueError("DB {0} does not exist.".format(db))
            
        artist = adb.getArtistNameFromID(dbID)
        return artist
        
        
    def getArtistDBIDs(self, artistName, db, num=10, cutoff=0.7, debug=False):
        if self.dbdata.get(db) is None:
            self.getFullDBData()
                    
        if debug:
            print("  Getting DB Artist IDs for ArtistName: {0}".format(artistName))
        artistIDs = self.dbdata[db].getArtistIDs(artistName, num, cutoff, debug=debug)
        return artistIDs
        
        
            
    ########################################################################################################
    #
    # Get Artist Album Data
    #
    ########################################################################################################
    def getArtistAlbumsFromID(self, db, artistID, flatten=False):
        if not all([self.dbdata.get(db) for db in self.getDBs()]):
            self.getFullDBData()
        artistAlbums = self.dbdata[db].getArtistAlbums(artistID, flatten=flatten)
        return artistAlbums
    
    
    def getArtistAlbums(self, artistName, num=10, cutoff=0.7, debug=False):
        if not all([self.dbdata.get(db) for db in self.getDBs()]):
            self.getFullDBData()
        
        print("  Getting Artist Albums for ArtistName: {0}".format(artistName))
        artistAlbums = {}
        artistIDs    = self.getArtistIDs(artistName, num=num, cutoff=cutoff, debug=debug)
        if debug is True:
            print("Found Artist IDs")
            print(artistIDs)
        for db,NameIDs in artistIDs.items():
            artistAlbums[db] = {}
            for name,IDs in NameIDs.items():
                artistAlbums[db][name] = {artistID: self.dbdata[db].getArtistAlbums(artistID) for artistID in IDs}
                
        if debug:
            print("ArtistAlbums({0}) Results".format(artistName))
            for db,nameData in artistAlbums.items():
                print("="*150)
                print("  DB: {0}".format(db))
                for name,IDsData in nameData.items():
                    print("    Name: {0}".format(name))
                    for ID,albums in IDsData.items():
                        print("      ID: {0}".format(ID))
                        for mediaType,mediaData in albums.items():
                            albums = list(mediaData.values())
                            print("          -----> {0: <20} :: {1}\t{2}".format(mediaType, len(albums), json.dumps(albums)))
                        print("\n")
                    print("\n\n")
                        
        return artistAlbums
        
        
        
    ####################################################################################################
    #
    # Interactive Section
    #
    ####################################################################################################
    def getNearestArtistNames(self, artistName, num=1, cutoff=0.9, debug=False):
        if not all([self.dbdata.get(db) for db in self.getDBs()]):
            self.getFullDBData()
        artistMatches = {db: self.dbdata[db].getNearestArtist(artistName, num, cutoff, debug=debug) for db in self.getDBs()}
        return artistMatches
        
    def getNearestArtists(self, artistName, num=2, cutoff=0.7):
        artists = findNearest(artistName, self.getArtists(), num=num, cutoff=cutoff)
        print("Nearest Matches for: {0}".format(artistName))
        for artist in artists:
            self.showArtistData(artist)
        
        
        
    ####################################################################################################
    #
    # Database Section
    #
    ####################################################################################################
    def getSubsetData(self, dbname):
        mydbdata = {artistName: db.get(dbname) for artistName, db in self.musicmap.items() if db.get(dbname) is not None}
        mydbdata = {artistName: dbdata.get("ID") for artistName, dbdata in mydbdata.items() if dbdata.get("ID") is not None}
        return mydbdata

        
    def getDBData(self, db, known=False):
        if db not in self.getDBs():
            raise ValueError("Nothing known about DB [{0}]".format(db))
            
        if self.debug:
            print("Getting Database Data For {0}".format(db))
            
        if known is True:
            if self.debug is True:
                print("Loading Subset of Database Data For {0}".format(db))
            
        dbdata = artistDB(db, known=known, debug=self.debug)
        return dbdata
    
    
    def getFullDBData(self):
        for db in self.getDBs():
            self.dbdata[db] = self.getDBData(db, known=False)
    
        
    def getKnownDBData(self):
        for db in self.getDBs():
            self.dbdata[db] = self.getDBData(db, known=True)
        
        
        
    ####################################################################################################
    #
    # Database Section
    #
    ####################################################################################################
    def getDBAlbumTypeNames(self, db, albumType):
        albumTypes = self.getDBAlbumTypes(db)
        try:
            albumTypeNames = albumTypes[albumType]
        except:
            raise ValueError("Could not find DB AlbumType [{0}] for DB [{1}] in dbAlbumTypes".format(albumType, db))
            
        return albumTypeNames
        
        
    def getDBAlbumTypes(self, db):
        try:
            dbAlbumTypes = self.dbAlbumTypes[db]
        except:
            raise ValueError("Could not find DB [{0}] in dbAlbumTypes".format(db))
            
        return dbAlbumTypes
    
            
    def setAlbumTypes(self):
        self.dbAlbumTypes = {}
                     
        for db in self.getDBs():
            if db == "Discogs":
                allTypes  = ["Albums", "Singles & EPs", "Compilations", "Videos", "Miscellaneous"]
                primary   = ["Albums"]
                secondary = ["Compilations"]
                tertiary  = ["Singles & EPs"]
                fourth    = ["Videos", "Miscellaneous"] 
            elif db == "AllMusic":
                allTypes  = ["Albums", "Single/EP", "Comp", "Video", "Other"]
                primary   = ["Albums"]
                secondary = ["Comp"]
                tertiary  = ["Single/EP"]
                fourth    = ["Video", "Other"]
            elif db == "MusicBrainz":
                primary   = ["Album", "Album + Live", "Album + Soundtrack", "Album + Mixtape/Street", "Album + Remix", "Album + Audiobook", "Album + DJ-mix", "Album + Demo", "Album + Spokenword", "Album + Audio drama", "Album + Spokenword + Live", "Album + Soundtrack + Live", "Album + Remix + Mixtape/Street", "Album + Spokenword + Audiobook", "Album + Interview", "Album + Live + DJ-mix", "Album + Soundtrack + Remix", "Album + DJ-mix + Mixtape/Street", "Album + Interview + Live", "Album + Remix + DJ-mix", "Album + Live + Remix", "Album + Soundtrack + Audiobook", "Album + Interview + Demo", "Album + Soundtrack + Spokenword + Interview", "Album + Live + Demo", "Album + Soundtrack + Spokenword", "Album + Spokenword + Interview", "Album + Remix + Mixtape/Street + Demo", "Album + Demo + Audio drama", "Album + Soundtrack + Audiobook + Audio drama", "Album + Spokenword + Interview + Audiobook", "Album + Spokenword + Demo", "Album + Interview + Audiobook + Audio drama", "Album + Soundtrack + Audio drama", "Album + Soundtrack + Interview + Live", "Album + Audiobook + Audio drama", "Album + Audiobook + Live", "Album + Soundtrack + Demo"]
                secondary = ["Album + Compilation", "Album + Compilation + DJ-mix", "Compilation", "Album + Compilation + Live", "Album + Compilation + Soundtrack", "Album + Compilation + Remix", "Single + Compilation", "Album + Compilation + Mixtape/Street", "Album + Compilation + Live + DJ-mix", "Album + Compilation + Spokenword", "Album + Compilation + Demo", "Broadcast + Compilation", "Compilation + DJ-mix", "Album + Compilation + DJ-mix + Mixtape/Street", "Album + Compilation + Remix + DJ-mix", "Album + Compilation + Spokenword + Live", "Album + Compilation + Soundtrack + Remix", "Album + Compilation + Interview", "Compilation + Soundtrack", "Compilation + Live", "Broadcast + Compilation + Live", "Album + Compilation + Interview + Live", "Album + Compilation + Audio drama", "Album + Compilation + Audiobook", "Album + Compilation + Live + Demo", "Album + Compilation + Live + Remix", "Compilation + Live + DJ-mix", "Album + Compilation + Spokenword + Audiobook", "Broadcast + Compilation + Remix + DJ-mix", "Album + Compilation + Mixtape/Street + Demo", "Album + Compilation + Soundtrack + Interview", "Album + Compilation + Soundtrack + Spokenword + Interview + Audiobook + Remix", "Album + Compilation + Remix + Mixtape/Street", "Compilation + Remix", "Album + Compilation + Soundtrack + Demo", "Broadcast + Compilation + Audio drama"]
                tertiary  = ["Single", "EP", "EP + Live", "EP + Remix", "Single + Soundtrack", "Single + Live", "EP + Demo", "EP + Compilation", "EP + Soundtrack", "EP + Mixtape/Street", "Single + Demo", "Single + DJ-mix", "Single + Mixtape/Street", "EP + DJ-mix", "Single + Soundtrack + Remix", "Single + Audiobook", "EP + Compilation + Remix", "Single + Live + Remix", "EP + Live + Demo", "EP + Audio drama", "EP + Remix + Mixtape/Street", "Single + Audio drama", "Single + Soundtrack + Live", "EP + Soundtrack + Remix", "EP + Compilation + Live", "EP + Compilation + Mixtape/Street", "EP + Audiobook", "Single + Compilation + Remix", "Single + DJ-mix + Demo", "EP + Compilation + Remix + DJ-mix", "EP + Live + DJ-mix", "EP + Spokenword + Live", "Single + Remix + Mixtape/Street", "Single + Remix + Demo", "EP + Compilation + Demo", "Single + Mixtape/Street + Demo", "EP + Live + Remix", "Single + Spokenword", "Single + Interview", "EP + Compilation + Soundtrack", "EP + Interview"]
                fourth    = ["Unspecified type", "Other", "Single + Remix", "Other + Audiobook", "Other + Audio drama", "Other + Spokenword", "Live", "Remix", "Other + Compilation", "Broadcast", "Audiobook", "Other + Live", "Other + Demo", "Other + Interview", "Broadcast + Live", "Major series / box sets", "Sub Optimal Credits", "Soundtrack", "Broadcast + Audio drama", "Other + Mixtape/Street", "Currently known involved people:", "Demo", "The What The Fuck Serie:", "Mixtape/Street", "Other + DJ-mix", "A stab at the horrible Blue Note mess:", "Other + Soundtrack", "DJ-mix", "Broadcast + DJ-mix", "Spokenword", "Broadcast + Spokenword", "Broadcast + Audiobook", "Nonline discography:", "Other + Remix", "Other + Compilation + Live", "Other + Compilation + Audiobook", "Online discography:", "Former Official Homepage", "Current Members", "Don\'t add these albums here:", "Broadcast + Live + DJ-mix", "Other + Spokenword + Live", "Other + Spokenword + Audiobook", "Other + Spokenword + Audiobook + Audio drama", "Past Members", "Broadcast + Spokenword + Audio drama", "Audio drama", "Broadcast + Interview", "Other + Compilation + Spokenword", "Live + Demo", "Broadcast + Live + Audio drama", "Broadcast + Spokenword + Audiobook", "Other + Compilation + Demo", "Other + Compilation + Interview", "Broadcast + Demo", "Live + DJ-mix", "Other + Compilation + Live + DJ-mix", "DJ-mix + Mixtape/Street", "Other + Soundtrack + Mixtape/Street + Demo", "Zyklen/Reihen:", "Other + Compilation + DJ-mix", "Other + Audiobook + Audio drama", "Other + Compilation + Mixtape/Street", "Other + Remix + Mixtape/Street", "Other + Compilation + Interview + Live", "Broadcast + Soundtrack", "Other + Live + Demo", "Interview", "Jam Today (2)  1979 ~ 1980", "Other + Spokenword + DJ-mix + Mixtape/Street", "Other + Compilation + Remix", "Broadcast + Interview + Live"]        
                allTypes  = primary + secondary + tertiary + fourth
            elif db == "AceBootlegs":
                allTypes  = ["Bootleg"]
                primary   = ["Bootleg"]
                secondary = []
                tertiary  = []
                fourth    = []
            elif db == "RateYourMusic":     
                primary   = ["Album", "Live Album"]
                secondary = ['V/A Compilation', 'Compilation']
                tertiary  = ['Single', 'EP']
                fourth    = ['Bootleg / Unauthorized', 'Appears On', "Video"]        
                allTypes  = primary + secondary + tertiary + fourth
            elif db == "LastFM":
                allTypes  = ["Albums"]
                primary   = ["Albums"]
                secondary = []
                tertiary  = []
                fourth    = []
            elif db == "DatPiff":
                allTypes  = ["MixTape"]
                primary   = ["MixTape"]
                secondary = []
                tertiary  = []
                fourth    = []
            elif db == "RockCorner":
                primary   = ["Albums"]
                secondary = []
                tertiary  = ["Songs"]
                fourth    = []     
                allTypes  = primary + secondary + tertiary + fourth
            elif db == "CDandLP":
                primary   = ["Albums"]
                secondary = []
                tertiary  = []
                fourth    = []     
                allTypes  = primary + secondary + tertiary + fourth
            elif db == "MusicStack":
                primary   = ["Albums"]
                secondary = []
                tertiary  = []
                fourth    = []     
                allTypes  = primary + secondary + tertiary + fourth
            elif db == "MetalStorm":
                primary   = ["Albums"]
                secondary = []
                tertiary  = []
                fourth    = []
                allTypes  = primary + secondary + tertiary + fourth
            else:
                raise ValueError("Key is not known!")


            retval = {"All": allTypes, 1: primary, 2: secondary, 3: tertiary, 4: fourth}

            self.dbAlbumTypes[db] = retval