# Master Discogs Database

In [1]:
## Basic stuff
%load_ext autoreload
%autoreload
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))

## Python Version
import sys
print("Python: {0}".format(sys.version))

from ioUtils import saveJoblib, loadJoblib, saveFile, getFile
import urllib
from urllib.parse import quote

from discogsBase import discogs
from discogsUtils import discogsUtils
from collection import collections
from artist import artist
from timeUtils import clock, elapsed
from fsUtils import moveFile, setFile, setDir, setSubDir, isFile, isDir, mkDir
from fileUtils import getFileBasics, getBasename
from artists import artists
from artist import artist
from albums import albums
from album import album, albumURLInfo
from time import sleep

import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))

Python: 3.7.3 (default, Mar 27 2019, 16:54:48) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
Notebook Last Run Initiated: 2019-09-27 17:14:17.958259


In [3]:
%load_ext autoreload
%autoreload
disc = discogs()
arts = artists(disc)
art  = artist()
albs = albums(disc)
alb  = album()
dutils = discogsUtils()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Saved Discog Directory /Volumes/Music/Discog is Available
Local Discog Directory /Users/tgadfort/Music/Discog is Available
/Volumes/Music/Discog/base exists
/Volumes/Music/Discog/collections exists
/Volumes/Music/Discog/artists exists
/Volumes/Music/Discog/albums exists
/Volumes/Music/Discog/base-db exists
/Volumes/Music/Discog/collections-db exists
/Volumes/Music/Discog/artists-db exists
/Volumes/Music/Discog/albums-db exists
/Volumes/Music/Discog/artists-extra exists
/Volumes/Music/Discog/search exists
/Volumes/Music/Discog/search-artists exists
/Volumes/Music/Discog/diagnostic exists
/Volumes/Music/Discog/db exists
Found 100 artist DB files and that is equal to the max mod value


# Master DB Functions

In [2]:
from pandas import Series, DataFrame

def directoryName(x):
    if x is None:
        return x
    if "..." in x:
        x = x.replace("...", "")
    if "/" in x:
        x = x.replace("/", "-")
    return x

def realName(x):
    if x is None:
        return [None,-1]
    
    lenx = len(x)
    if len(x) < 1:
        return [x,-1]

    if x[-1] != ")":
        return [x, None]
    

    if lenx >=5:
        if x[-3] == "(":
            try:
                num = int(x[-2:-1])
                val = x[:-3].strip()
                return [val, num]
            except:
                return [x, None]
            
    if lenx >= 6:
        if x[-4] == "(":
            try:
                num = int(x[-3:-1])
                val = x[:-4].strip()
                return [val, num]
            except:
                return [x, None]
            
    if lenx >= 7:
        if x[-4] == "(":
            try:
                num = int(x[-3:-1])
                val = x[:-4].strip()
                return [val, num]
            except:
                return [x, None]

    return [x, None]
            

def getAlbumNames(x):
    if isinstance(x, dict):
        return list(x.values())
    else:
        return []
    
    
def splitMetaData(x):
    retval = {}
    if isinstance(x, dict):
        for k,v in x.items():
            retval[k] = [z[0] for z in v.most_common(3)]
    else:
        retval = None
    return retval

def createArtistAlbumsDB(disc):
    print("Creating ArtistAlbums DB")
    
    print("  Loading ArtistID Data")
    artistIDtoName  = disc.getArtistIDToNameData()
    artistIDtoRefs  = disc.getArtistIDToRefData()
    
    print("  Loading AlbumID Data")
    albumIDtoName   = disc.getAlbumIDToNameData()
    albumIDtoRef    = disc.getAlbumIDToRefData()

    print("  Loading ArtistID <-> AlbumID Data")
    artistIDtoAlbumNames = disc.getArtistIDAlbumNames()
    artistIDtoAlbumIDs   = disc.getArtistIDAlbumIDs()

    print("  Loading Artist MetaData")
    artistMetaData = disc.getAlbumArtistMetaData()


    sArtistToRef  = Series(artistIDtoRefs)
    sArtistToName = Series(artistIDtoName)
    sAlbumToRef   = Series(albumIDtoRef)
    sAlbumToName  = Series(albumIDtoName)

    sArtistToAlbums = Series(artistIDtoAlbumIDs)
    sArtistToAlbumNames = Series(artistIDtoAlbumNames)
    sArtistAlbums = Series([dict(zip(x, y)) for x,y in list(zip(sArtistToAlbums.values, sArtistToAlbumNames))], index=sArtistToAlbums.index)

    sArtistMetaData = Series(artistMetaData)
    sArtistMetaData = sArtistMetaData.apply(splitMetaData)
    
    print("  Creating Pandas DataFrame for {0} Artists".format(sArtistToRef.shape[0]))
    cols = ["Ref"]
    discdf = DataFrame(sArtistToRef)
    discdf.columns = cols
    discdf = discdf.join(DataFrame(sArtistToName))
    cols += ["Name"]
    discdf.columns = cols
    tmp = DataFrame(DataFrame(sArtistMetaData)[0].tolist())
    tmp.index = sArtistMetaData.index
    discdf = discdf.join(tmp)
    cols += ["Extra Artists", "Genres", "Styles"]
    discdf.columns = cols
    discdf = discdf.join(DataFrame(sArtistAlbums))
    cols += ["Albums Data"]
    discdf.columns = cols
    discdf["Known"] = True
    print("  DataFrame Shape is {0}".format(discdf.shape))    
    return discdf


def createArtistName(discdf):
    tmp = DataFrame(discdf["Name"].apply(realName).tolist())
    tmp.index = discdf.index
    discdf["Artist"]     = tmp[0]
    discdf["Artist Num"] = tmp[1]
    discdf["Artist"]     = discdf["Artist"].apply(directoryName)
    return discdf


def createAlbums(discdf):
    discdf["Albums"] = discdf["Albums Data"].apply(getAlbumNames)
    return discdf


def createCollectionsDB(disc):
    print("Creating Collections DB")

    print("  Loading Collection Data")
    colArtistIDtoName  = disc.getCollectionIDToNameData()
    colArtistIDtoRefs  = disc.getCollectionIDToRefData()
    colArtistReftoCnts = disc.getCollectionRefCountsData()

    sColArtistToRef  = Series(colArtistIDtoRefs)
    sColArtistToName = Series(colArtistIDtoName)
    sColArtistRefToCnts = Series(colArtistReftoCnts)

    print("  Creating Pandas DataFrame for {0} Artists".format(sColArtistToRef.shape[0]))
    cols = ["Ref"]
    coldiscdf = DataFrame(sColArtistToRef)
    coldiscdf.columns = cols
    coldiscdf = coldiscdf.join(DataFrame(sColArtistToName))
    cols += ["Name"]
    coldiscdf.columns = cols

    colrefdf = DataFrame(sColArtistRefToCnts)
    colrefdf.columns = ["Counts"]
    colrefdf.reset_index(inplace=True)
    colrefdf.columns = ["Ref", "Counts"]
    coldiscdf = coldiscdf.merge(colrefdf, on="Ref")
    coldiscdf.index = DataFrame(sColArtistToRef).index
    
    print("  DataFrame Shape is {0}".format(coldiscdf.shape))
    return coldiscdf


def createArtistDB(disc):
    print("Creating Artist DB")
    
    print("  Loading ArtistID Data")
    artistIDtoName  = disc.getArtistIDToNameData()
    sArtistToName   = Series(artistIDtoName)

    artistIDtoRef   = disc.getArtistIDToRefData()
    sArtistToRef    = Series(artistIDtoRef)
    
    print("  Creating Pandas DataFrame for {0} Artists".format(sArtistToRef.shape[0]))
    cols = ["Ref"]
    discdf = DataFrame(sArtistToRef)
    discdf.columns = cols
    discdf = discdf.join(DataFrame(sArtistToName))
    cols += ["Name"]
    discdf.columns = cols
    discdf["Known"] = True
    
    
    print("  Loading AlbumID Data")
    albumIDtoName   = disc.getAlbumIDToNameData()
    sAlbumToName    = Series(albumIDtoName)
    albumIDtoRef    = disc.getAlbumIDToRefData()
    sAlbumToRef     = Series(albumIDtoRef)
    
    print("  Loading ArtistID MetaData")
    artistIDMetaData = disc.getAlbumArtistMetaData()
    sArtistMetaData = Series(artistIDMetaData)
    sArtistMetaData = sArtistMetaData.apply(splitMetaData)
    
    print("  Joining Pandas DataFrame for Artist Metadata")
    tmp = DataFrame(DataFrame(sArtistMetaData)[0].tolist())
    tmp.index = sArtistMetaData.index
    discdf = discdf.join(tmp)
    cols += ["Extra Artists", "Genres", "Styles"]
    
    
    print("  DataFrame Shape is {0}".format(discdf.shape))    
    return discdf


def mergeDBs(discdf, coldiscdf):
    print("  Merging AlbumArtists {0} and Collections {1} DBs".format(discdf.shape, coldiscdf.shape))
    musicdf = coldiscdf.merge(discdf, on=["Ref", "Name"], how='left')
    musicdf.index = coldiscdf.index
    print("  Merged DataFrame Shape is {0}".format(musicdf.shape))
    return musicdf

In [86]:
x = disc.getArtistIDCoreAlbumNames()

Loading data from /Volumes/Music/Discog/db/ArtistIDCoreAlbumNames.p
  --> This file is 51.5MB.
Loading /Volumes/Music/Discog/db/ArtistIDCoreAlbumNames.p


In [95]:
albToRef  = disc.getAlbumIDToRefData()
albToName = disc.getAlbumIDToNameData()

Loading data from /Volumes/Music/Discog/db/AlbumIDToRef.p
  --> This file is 29.7MB.
Loading /Volumes/Music/Discog/db/AlbumIDToRef.p
Loading data from /Volumes/Music/Discog/db/AlbumIDToName.p
  --> This file is 21.2MB.
Loading /Volumes/Music/Discog/db/AlbumIDToName.p


In [105]:
albDF = DataFrame([albToRef,albToName]).T

In [106]:
albDF.head()

Unnamed: 0,0,1
10000,/Proem-Negativ/master/10000,Negativ
100000,/Peter-Tosh-Wanted-Dread-Alive/master/100000,Wanted Dread & Alive
1000000,/%D0%9B-%D0%91%D0%B5%D1%82%D1%85%D0%BE%D0%B2%D...,"Концерт Для Фортепиано, Скрипки И Виолончели С..."
10000046,/The-Beatles-Beatles-65/release/10000046,Beatles '65
10000057,/J-S-Bach-Edith-Mathis-Ernst-Haefliger-Peter-S...,"Kantaten / Nun komm, der Heiden Heiland / Wie ..."


In [97]:
{k: [albToRef.get(k),albToName.get(k)] for k in sy[0]}

{'856965': [None, None],
 '27412': [None, None],
 '75447': [None, None],
 '93685': [None, None],
 '1012077': ['/Dave-Clarke-Live/master/1012077', 'Live'],
 '448838': ['/Dave-Clarke-Fabric-60/master/448838', 'Fabric 60'],
 '43769': [None, None],
 '6190506': [None, None],
 '861622': ['/Joss-Baselli-La-Chanson-De-G%C3%A9d%C3%A9on/master/861622',
  'La Chanson De Gédéon'],
 '44831': ['/Dave-Clarke-Devils-Advocate/master/44831', "Devil's Advocate"],
 '44804': [None, None],
 '44801': [None, None],
 '93687': [None, None],
 '44853': [None, None],
 '2409183': [None, None],
 '93693': ['/John-McEntire-Music-From-The-Motion-Picture-Reach-The-Rock/release/93693',
  'Music From The Motion Picture Reach The Rock'],
 '44815': ['/Dave-Clarke-Archive-One/master/44815', 'Archive One'],
 '630867': ['/Dave-Clarke-Vs-Bang-The-Future-Stargate/master/630867',
  'Stargate'],
 '39904': [None, None],
 '7175': [None, None],
 '1344152': [None, None],
 '93696': [None, None],
 '10176549': [None, None],
 '68049': [No

In [89]:
def isKnown(val):
    return(type(val))

Series(y).apply(isKnown)

KeyboardInterrupt: 

In [88]:
y = disc.getArtistIDAlbumIDs()
y

Loading data from /Volumes/Music/Discog/db/ArtistIDAlbumIDs.p
  --> This file is 42.7MB.
Loading /Volumes/Music/Discog/db/ArtistIDAlbumIDs.p


{'1000': ['856965',
  '27412',
  '75447',
  '93685',
  '1012077',
  '448838',
  '43769',
  '6190506',
  '861622',
  '44831',
  '44804',
  '44801',
  '93687',
  '44853',
  '2409183',
  '93693',
  '44815',
  '630867',
  '39904',
  '7175',
  '1344152',
  '93696',
  '10176549',
  '68049',
  '4113',
  '861633',
  '1337892',
  '1258261',
  '992212',
  '94822',
  '2546978',
  '3432',
  '1125522',
  '4542',
  '1977333',
  '1759955',
  '44839',
  '44820',
  '44808',
  '557108',
  '93699',
  '1521422',
  '17179',
  '350936',
  '44825',
  '13600587',
  '39665',
  '9196112',
  '3176478',
  '93537',
  '1517139',
  '665932',
  '1294238'],
 '1000500': ['1178875'],
 '100200': ['2082992',
  '1561704',
  '50511',
  '219887',
  '7516271',
  '2843377',
  '3019891',
  '50530',
  '2732930'],
 '1002000': ['124656', '1002108', '64024'],
 '1002600': ['1180905'],
 '1002900': ['157011'],
 '1003900': ['157268',
  '330170',
  '187275',
  '1527778',
  '950157',
  '950159',
  '315483',
  '665409',
  '363135',
  '468

# Discogs Merge

In [3]:
%load_ext autoreload
%autoreload
disc      = discogs()
discdf    = createArtistDB(disc)
discdf    = createArtistName(discdf)

savename = disc.getMasterDiscogsDBFilename()
saveFile(idata=discdf, ifile=savename, debug=True)

discdf.head()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Saved Discog Directory /Volumes/Music/Discog is Available
Local Discog Directory /Users/tgadfort/Music/Discog is Available
/Volumes/Music/Discog/base exists
/Volumes/Music/Discog/collections exists
/Volumes/Music/Discog/artists exists
/Volumes/Music/Discog/albums exists
/Volumes/Music/Discog/base-db exists
/Volumes/Music/Discog/collections-db exists
/Volumes/Music/Discog/artists-db exists
/Volumes/Music/Discog/albums-db exists
/Volumes/Music/Discog/artists-extra exists
/Volumes/Music/Discog/search exists
/Volumes/Music/Discog/search-artists exists
/Volumes/Music/Discog/diagnostic exists
/Volumes/Music/Discog/db exists
Found 100 artist DB files and that is equal to the max mod value
Creating Artist DB
  Loading ArtistID Data
Loading data from /Volumes/Music/Discog/db/ArtistIDToName.p
  --> This file is 9.0MB.
Loading /Volumes/Music/Discog/db/ArtistIDToName.p
Loading data from /Volumes/Music/Discog/db

Unnamed: 0,Ref,Name,Known,Artists,Genre,Style,Artist,Artist Num
1000,/artist/1000-Dave-Clarke,Dave Clarke,True,[Dave Clarke],"[Electronic, Hip Hop, Non-Music]","[Techno, Electro, EBM]",Dave Clarke,
1000500,/artist/1000500-Club-Pulse,Club Pulse,True,[],[],[],Club Pulse,
100200,/artist/100200-Dike,Dike,True,[Dike],[Hip Hop],[Conscious],Dike,
1002000,/artist/1002000-Larry-Stokes,Larry Stokes,True,[],[],[],Larry Stokes,
1002600,/artist/1002600-Gidd-Sanchez,Gidd Sanchez,True,,,,Gidd Sanchez,


# Full Merge

In [None]:
%load_ext autoreload
%autoreload
disc      = discogs()
discdf    = createArtistAlbumsDB(disc)
discdf    = createArtistName(discdf)
discdf    = createAlbums(discdf)

savename = disc.getMasterDiscogsDBFilename()
saveFile(idata=discdf, ifile=savename, debug=True)

discdf.head()

Saving data to /Volumes/Music/Discog/db/MasterDB.p
  --> This file is 400.5MB.
Saved data to /Volumes/Music/Discog/db/MasterDB.p
  --> This file is 400.5MB.


# Artist Lookup Tables

In [4]:
from searchUtils import findExt
artistNames = {}
artistYears = {}
artistDBDir = disc.getArtistsDBDir()   
files       = findExt(artistDBDir, ext='.p')

In [5]:
start, cmt = clock("Creating Artist DBs")

artistIDToName       = {}
artistIDToRef        = {}
artistIDToVariations = {}

artistIDAlbumNames     = {}
artistIDAlbumRefs      = {}
artistIDCoreAlbumNames = {}
artistIDCoreAlbumRefs  = {}

albumIDAlbumNames = {}
albumIDAlbumRefs  = {}


core = ["Singles & EPs", "Albums", "Compilations"]

for i,ifile in enumerate(files):
    if (i+1) % 5 == 0:
        elapsed(start, cmt)
    print(ifile,'\t',end="")
    db = getFile(ifile)
    artistIDToName.update({k: v.artist.name for k,v in db.items()})
    artistIDToRef.update({k: v.url.url for k,v in db.items()})
    
    for k,v in db.items():
        artistID   = k
        artistData = v
                
        artistIDAlbumNames[artistID]     = {}
        artistIDAlbumRefs[artistID]      = {}
        artistIDCoreAlbumNames[artistID] = {}
        artistIDCoreAlbumRefs[artistID]  = {}

        if artistData.profile.variations is not None:
            artistIDToVariations[artistID] = [v2.name for v2 in artistData.profile.variations]
        else:
            artistIDToVariations[artistID] = [artistData.artist.name]

        for mediaName,mediaData in artistData.media.media.items():
            albumURLs  = {mediaValues.code: mediaValues.url for mediaValues in mediaData}
            albumNames = {mediaValues.code: mediaValues.album for mediaValues in mediaData}

            artistIDAlbumRefs[artistID].update(albumURLs)
            artistIDAlbumNames[artistID].update(albumNames)
            if mediaName in core:
                artistIDCoreAlbumRefs[artistID].update(albumURLs)
                artistIDCoreAlbumNames[artistID].update(albumNames)
        
    print(len(artistIDToName))
    
    
savenames = {"IDToRef": artistIDToRef, "IDToName": artistIDToName, "IDToVariations": artistIDToVariations,
             "IDToAlbumNames": artistIDAlbumNames, "IDToAlbumRefs": artistIDAlbumRefs, 
             "IDToCoreAlbumNames": artistIDCoreAlbumNames, "IDToCoreAlbumRefs": artistIDCoreAlbumRefs}
for basename,savedata in savenames.items():
    savename = setFile(disc.getDiscogDBDir(), "Artist{0}.p".format(basename))
    print("Saving {0} entries to {1}\n".format(len(savedata), savename))
    saveFile(ifile=savename, idata=savedata, debug=True)
    
    
elapsed(start, cmt)

Current Time is Fri Sep 27, 2019 17:14:46 for Creating Artist DBs
/Volumes/Music/Discog/artists-db/0-DB.p 	5125
/Volumes/Music/Discog/artists-db/1-DB.p 	10520
/Volumes/Music/Discog/artists-db/10-DB.p 	15665
/Volumes/Music/Discog/artists-db/11-DB.p 	20792
Current Time is Fri Sep 27, 2019 17:15:54 for Done with Creating Artist DBs
Process [{0}] took 1.1 minutes.
/Volumes/Music/Discog/artists-db/12-DB.p 	26020
/Volumes/Music/Discog/artists-db/13-DB.p 	31204
/Volumes/Music/Discog/artists-db/14-DB.p 	36265
/Volumes/Music/Discog/artists-db/15-DB.p 	41457
/Volumes/Music/Discog/artists-db/16-DB.p 	46637
Current Time is Fri Sep 27, 2019 17:17:22 for Done with Creating Artist DBs
Process [{0}] took 2.6 minutes.
/Volumes/Music/Discog/artists-db/17-DB.p 	51871
/Volumes/Music/Discog/artists-db/18-DB.p 	57026
/Volumes/Music/Discog/artists-db/19-DB.p 	62110
/Volumes/Music/Discog/artists-db/2-DB.p 	67371
/Volumes/Music/Discog/artists-db/20-DB.p 	72537
Current Time is Fri Sep 27, 2019 17:18:42 for Done

  --> This file is 204.5MB.
Saved data to /Volumes/Music/Discog/db/ArtistIDToAlbumRefs.p
  --> This file is 204.5MB.
Saving 510790 entries to /Volumes/Music/Discog/db/ArtistIDToCoreAlbumNames.p

Saving data to /Volumes/Music/Discog/db/ArtistIDToCoreAlbumNames.p
  --> This file is 87.7MB.
Saved data to /Volumes/Music/Discog/db/ArtistIDToCoreAlbumNames.p
  --> This file is 87.7MB.
Saving 510790 entries to /Volumes/Music/Discog/db/ArtistIDToCoreAlbumRefs.p

Saving data to /Volumes/Music/Discog/db/ArtistIDToCoreAlbumRefs.p
  --> This file is 122.9MB.
Saved data to /Volumes/Music/Discog/db/ArtistIDToCoreAlbumRefs.p
  --> This file is 122.9MB.
Current Time is Fri Sep 27, 2019 17:50:24 for Done with Creating Artist DBs
Process [{0}] took 35.6 minutes.


In [6]:
print(len(artistIDToName))
print(len(artistIDToRef))
print(len(artistIDToVariations))

print(len(artistIDAlbumNames))
print(len(artistIDAlbumRefs))
print(len(artistIDCoreAlbumNames))
print(len(artistIDCoreAlbumRefs))

510790
510790
510790
510790
510790
510790
510790


# Albums Lookup Table

In [61]:
from searchUtils import findExt
albumsDBDir = disc.getAlbumsDBDir()   
files       = findExt(albumsDBDir, ext='.p')
print(len(files))

101


In [78]:
from collections import Counter

albumIDToName = {}
albumIDToRef  = {}

albumIDToArtistIDs = {}
artistIDToAlbumIDs = {}

artistIDMetaData = {}


for i,ifile in enumerate(files):
    print(ifile,' \t',end="")
    db = getFile(ifile)
    for artistID,artistData in db.items():
        artistIDMetaData[artistID] = {"Genre": Counter(), "Artists": Counter(), "Style": Counter()}
            
        albumIDToName.update({k: v.album.name for k,v in artistData.items()})
        albumIDToRef.update({k: v.url.url for k,v in artistData.items()})
        
        
        for albumID,albumData in artistData.items():

            albumRef     = albumData.url.url
            albumName    = albumData.album.name
            albumArtists = albumData.artist.artists
            
            
            ####### Album <-> Artist #######
            for artist in albumArtists:
                if artist is not None:
                    albumArtistID = artist.ID
                    if all([albumArtistID,artistID]):
                        if albumIDToArtistIDs.get(albumID) is None:
                            albumIDToArtistIDs[albumID] = {}
                        albumIDToArtistIDs[albumID][albumArtistID] = True

                        if artistIDToAlbumIDs.get(albumArtistID) is None:
                            artistIDToAlbumIDs[albumArtistID] = {}
                        artistIDToAlbumIDs[albumArtistID][albumID] = True
                    


            ####### Artist MetaData #######
            genres = albumData.profile.genre
            if not isinstance(genres, list):
                genres = [genres]
            for genre in genres:
                if genre is not None:
                    artistIDMetaData[artistID]['Genre'][genre.name] += 1

            artists = albumData.artist.artists
            for artist in artists:
                if artist is not None:
                    artistIDMetaData[artistID]['Artists'][artist.name] += 1

            styles = albumData.profile.style
            if not isinstance(styles, list):
                styles = [styles]
            for style in styles:
                if style is not None:
                    artistIDMetaData[artistID]['Style'][style.name] += 1

    print(len(albumIDToName))
    
    
savenames = {"IDToRef": albumIDToRef, "IDToName": albumIDToName}
for basename,savedata in savenames.items():
    savename = setFile(disc.getDiscogDBDir(), "Album{0}.p".format(basename))
    print("Saving {0} entries to {1}\n".format(len(savedata), savename))
    saveFile(ifile=savename, idata=savedata, debug=True)

savenames = {"AlbumIDToArtistIDs": albumIDToArtistIDs, "ArtistIDToAlbumIDs": artistIDToAlbumIDs,
             "ArtistIDMetaData": artistIDMetaData}
for basename,savedata in savenames.items():
    savename = setFile(disc.getDiscogDBDir(), "{0}.p".format(basename))
    print("Saving {0} entries to {1}\n".format(len(savedata), savename))
    saveFile(ifile=savename, idata=savedata, debug=True)

/Volumes/Music/Discog/albums-db/0-DB.p 	11591
/Volumes/Music/Discog/albums-db/1-DB.p 	25185
/Volumes/Music/Discog/albums-db/10-DB.p 	31490
/Volumes/Music/Discog/albums-db/11-DB.p 	42384
/Volumes/Music/Discog/albums-db/12-DB.p 	51216
/Volumes/Music/Discog/albums-db/13-DB.p 	58835
/Volumes/Music/Discog/albums-db/14-DB.p 	72304
/Volumes/Music/Discog/albums-db/15-DB.p 	85915
/Volumes/Music/Discog/albums-db/16-DB.p 	100148
/Volumes/Music/Discog/albums-db/17-DB.p 	108763
/Volumes/Music/Discog/albums-db/18-DB.p 	120335
/Volumes/Music/Discog/albums-db/19-DB.p 	136533
/Volumes/Music/Discog/albums-db/2-DB.p 	148555
/Volumes/Music/Discog/albums-db/20-DB.p 	156814
/Volumes/Music/Discog/albums-db/21-DB.p 	163065
/Volumes/Music/Discog/albums-db/22-DB.p 	170522
/Volumes/Music/Discog/albums-db/23-DB.p 	178590
/Volumes/Music/Discog/albums-db/24-DB.p 	186771
/Volumes/Music/Discog/albums-db/25-DB.p 	200119
/Volumes/Music/Discog/albums-db/26-DB.p 	206491
/Volumes/Music/Discog/albums-db/27-DB.p 	215813
/Vo

In [80]:
print(len(albumIDToName))
print(len(albumIDToRef))

print(len(albumIDToArtistIDs))
print(len(artistIDToAlbumIDs))
print(len(artistIDMetaData))

919132
919132
832571
256834
333394


In [81]:
savenames = {"IDToRef": albumIDToRef, "IDToName": albumIDToName}
for basename,savedata in savenames.items():
    savename = setFile(disc.getDiscogDBDir(), "Album{0}.p".format(basename))
    print("Saving {0} entries to {1}\n".format(len(savedata), savename))
    saveFile(ifile=savename, idata=savedata, debug=True)

savenames = {"AlbumIDToArtistIDs": albumIDToArtistIDs, "ArtistIDToAlbumIDs": artistIDToAlbumIDs,
             "ArtistIDMetaData": artistIDMetaData}
for basename,savedata in savenames.items():
    savename = setFile(disc.getDiscogDBDir(), "{0}.p".format(basename))
    print("Saving {0} entries to {1}\n".format(len(savedata), savename))
    saveFile(ifile=savename, idata=savedata, debug=True)

Saving 919132 entries to /Volumes/Music/Discog/db/AlbumIDToRef.p

Saving data to /Volumes/Music/Discog/db/AlbumIDToRef.p
  --> This file is 29.7MB.
Saved data to /Volumes/Music/Discog/db/AlbumIDToRef.p
  --> This file is 29.7MB.
Saving 919132 entries to /Volumes/Music/Discog/db/AlbumIDToName.p

Saving data to /Volumes/Music/Discog/db/AlbumIDToName.p
  --> This file is 21.2MB.
Saved data to /Volumes/Music/Discog/db/AlbumIDToName.p
  --> This file is 21.2MB.
Saving 832571 entries to /Volumes/Music/Discog/db/AlbumIDToArtistIDs.p

Saving data to /Volumes/Music/Discog/db/AlbumIDToArtistIDs.p
  --> This file is 14.2MB.
Saved data to /Volumes/Music/Discog/db/AlbumIDToArtistIDs.p
  --> This file is 14.2MB.
Saving 256834 entries to /Volumes/Music/Discog/db/ArtistIDToAlbumIDs.p

Saving data to /Volumes/Music/Discog/db/ArtistIDToAlbumIDs.p
  --> This file is 9.9MB.
Saved data to /Volumes/Music/Discog/db/ArtistIDToAlbumIDs.p
  --> This file is 9.9MB.
Saving 333394 entries to /Volumes/Music/Discog/

# Missing / ToDo

In [52]:
from fsUtils import removeFile

In [53]:
%load_ext autoreload
%autoreload
disc = discogs()
arts = artists(disc)

toget = discdf[discdf['Name'].isna()]['Ref'].to_dict()
for artistID,artistRef in toget.items():
    url = arts.getArtistURL(artistRef)
    savename = arts.getArtistSavename(artistID)
    if isFile(savename):
        removeFile(savename)
    try:
        arts.downloadArtistURL(url=url, savename=savename, debug=True, force=True)
    except:
        continue


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Saved Discog Directory /Volumes/Music/Discog is Available
Local Discog Directory /Users/tgadfort/Music/Discog is Available
/Volumes/Music/Discog/base exists
/Volumes/Music/Discog/collections exists
/Volumes/Music/Discog/artists exists
/Volumes/Music/Discog/albums exists
/Volumes/Music/Discog/base-db exists
/Volumes/Music/Discog/collections-db exists
/Volumes/Music/Discog/artists-db exists
/Volumes/Music/Discog/albums-db exists
/Volumes/Music/Discog/artists-extra exists
/Volumes/Music/Discog/search exists
/Volumes/Music/Discog/search-artists exists
/Volumes/Music/Discog/db exists
Found 100 artist DB files and that is equal to the max mod value
Now Downloading in Artists(): https://www.discogs.com/artist/5442814-Shad-Music?sort=year%2Casc&limit=500
                   Saving as: /Volumes/Music/Discog/artists/14/5442814.p
Now Downloading in Artists(): https://www.discogs.com/artist/1876816-Mattia-Travag

Now Downloading in Artists(): https://www.discogs.com/artist/2971073-Mike-Douglas-5?sort=year%2Casc&limit=500
                   Saving as: /Volumes/Music/Discog/artists/73/2971073.p
Now Downloading in Artists(): https://www.discogs.com/artist/3465573-Jeff-Christopher?sort=year%2Casc&limit=500
                   Saving as: /Volumes/Music/Discog/artists/73/3465573.p
Now Downloading in Artists(): https://www.discogs.com/artist/458475-Sebastian-Hoff?sort=year%2Casc&limit=500
                   Saving as: /Volumes/Music/Discog/artists/75/458475.p
None
<h1 class="hide_mobile">Sebastian Hoff</h1>
Saving /Volumes/Music/Discog/artists/75/458475.p
  --> This file is 143.4kB.
Done. Sleeping for 2 seconds
Now Downloading in Artists(): https://www.discogs.com/artist/5402008-Yahman-Celvai?sort=year%2Casc&limit=500
                   Saving as: /Volumes/Music/Discog/artists/8/5402008.p
Now Downloading in Artists(): https://www.discogs.com/artist/190580-Nuno-Rebelo?sort=year%2Casc&limit=500
         