# Master Discogs Database

In [22]:
## Basic stuff
%load_ext autoreload
%autoreload
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))

## Python Version
import sys
print("Python: {0}".format(sys.version))

from ioUtils import saveJoblib, loadJoblib, saveFile, getFile
import urllib
from urllib.parse import quote

from discogsBase import discogs
from discogsUtils import discogsUtils
from collection import collections
from artist import artist
from searchUtils import findExt, findSubExt, findPatternExt
from timeUtils import clock, elapsed
from fsUtils import moveFile, setFile, setDir, setSubDir, isFile, isDir, mkDir
from fileUtils import getFileBasics, getBasename
from artists import artists
from artist import artist
from albums import albums
from album import album, albumURLInfo
from time import sleep

import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Python: 3.7.3 (default, Mar 27 2019, 16:54:48) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
Notebook Last Run Initiated: 2019-10-14 20:35:47.218458


In [23]:
%load_ext autoreload
%autoreload
disc = discogs()
arts = artists(disc)
art  = artist()
albs = albums(disc)
alb  = album()
dutils = discogsUtils()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Saved Discog Directory /Volumes/Music/Discog is Available
Local Discog Directory /Users/tgadfort/Music/Discog is Available
/Volumes/Music/Discog/collections exists
/Volumes/Music/Discog/artists exists
/Volumes/Music/Discog/albums exists
/Volumes/Music/Discog/collections-db exists
/Volumes/Music/Discog/artists-db exists
/Volumes/Music/Discog/albums-db exists
/Volumes/Music/Discog/artists-db/metadata exists
/Volumes/Music/Discog/albums-db/metadata exists
/Volumes/Music/Discog/diagnostic exists
/Volumes/Music/Discog/db exists
Found 100 artist DB files and that is equal to the max mod value


# Master DB Functions

In [3]:
from pandas import Series, DataFrame

def directoryName(x):
    if x is None:
        return x
    if "..." in x:
        x = x.replace("...", "")
    if "/" in x:
        x = x.replace("/", "-")
    return x

def realName(x):
    if x is None:
        return [None,-1]
    
    lenx = len(x)
    if len(x) < 1:
        return [x,-1]

    if x[-1] != ")":
        return [x, None]
    

    if lenx >=5:
        if x[-3] == "(":
            try:
                num = int(x[-2:-1])
                val = x[:-3].strip()
                return [val, num]
            except:
                return [x, None]
            
    if lenx >= 6:
        if x[-4] == "(":
            try:
                num = int(x[-3:-1])
                val = x[:-4].strip()
                return [val, num]
            except:
                return [x, None]
            
    if lenx >= 7:
        if x[-4] == "(":
            try:
                num = int(x[-3:-1])
                val = x[:-4].strip()
                return [val, num]
            except:
                return [x, None]

    return [x, None]
            

def getAlbumNames(x):
    if isinstance(x, dict):
        return list(x.values())
    else:
        return []
    
    
def splitMetaData(x):
    retval = {}
    if isinstance(x, dict):
        for k,v in x.items():
            retval[k] = [z[0] for z in v.most_common(3)]
    else:
        retval = None
    return retval


def createArtistDB(disc):
    print("Creating ArtistAlbums DB")
    
    print("  Loading ArtistID Data")
    artistIDtoName  = disc.getArtistIDToNameData()
    artistIDtoRefs  = disc.getArtistIDToRefData()
    
    sArtistToRef  = Series(artistIDtoRefs)
    sArtistToName = Series(artistIDtoName)
    
    print("  Creating Pandas DataFrame for {0} Artists".format(sArtistToRef.shape[0]))
    cols = ["Ref"]
    discdf = DataFrame(sArtistToRef)
    discdf.columns = cols
    discdf = discdf.join(DataFrame(sArtistToName))
    cols += ["Name"]
    discdf.columns = cols
    discdf["Known"] = True
    print("  DataFrame Shape is {0}".format(discdf.shape))    
    return discdf
    

def createArtistAlbumsDB(disc):
    print("Creating ArtistAlbums DB")
    
    print("  Loading ArtistID Data")
    artistIDtoName  = disc.getArtistIDToNameData()
    artistIDtoRefs  = disc.getArtistIDToRefData()
    
    print("  Loading AlbumID Data")
    albumIDtoName   = disc.getAlbumIDToNameData()
    albumIDtoRef    = disc.getAlbumIDToRefData()

    print("  Loading ArtistID <-> AlbumID Data")
    artistIDtoAlbumNames = disc.getArtistIDAlbumNames()
    artistIDtoAlbumIDs   = disc.getArtistIDAlbumIDs()

    print("  Loading Artist MetaData")
    artistMetaData = disc.getAlbumArtistMetaData()


    sArtistToRef  = Series(artistIDtoRefs)
    sArtistToName = Series(artistIDtoName)
    sAlbumToRef   = Series(albumIDtoRef)
    sAlbumToName  = Series(albumIDtoName)

    sArtistToAlbums = Series(artistIDtoAlbumIDs)
    sArtistToAlbumNames = Series(artistIDtoAlbumNames)
    sArtistAlbums = Series([dict(zip(x, y)) for x,y in list(zip(sArtistToAlbums.values, sArtistToAlbumNames))], index=sArtistToAlbums.index)

    sArtistMetaData = Series(artistMetaData)
    sArtistMetaData = sArtistMetaData.apply(splitMetaData)
    
    print("  Creating Pandas DataFrame for {0} Artists".format(sArtistToRef.shape[0]))
    cols = ["Ref"]
    discdf = DataFrame(sArtistToRef)
    discdf.columns = cols
    discdf = discdf.join(DataFrame(sArtistToName))
    cols += ["Name"]
    discdf.columns = cols
    tmp = DataFrame(DataFrame(sArtistMetaData)[0].tolist())
    tmp.index = sArtistMetaData.index
    discdf = discdf.join(tmp)
    cols += ["Extra Artists", "Genres", "Styles"]
    discdf.columns = cols
    discdf = discdf.join(DataFrame(sArtistAlbums))
    cols += ["Albums Data"]
    discdf.columns = cols
    discdf["Known"] = True
    print("  DataFrame Shape is {0}".format(discdf.shape))    
    return discdf


def createArtistName(discdf):
    tmp = DataFrame(discdf["Name"].apply(realName).tolist())
    tmp.index = discdf.index
    discdf["Artist"]     = tmp[0]
    discdf["Artist Num"] = tmp[1]
    discdf["Artist"]     = discdf["Artist"].apply(directoryName)
    return discdf


def createAlbums(discdf):
    discdf["Albums"] = discdf["Albums Data"].apply(getAlbumNames)
    return discdf


def createCollectionsDB(disc):
    print("Creating Collections DB")

    print("  Loading Collection Data")
    colArtistIDtoName  = disc.getCollectionIDToNameData()
    colArtistIDtoRefs  = disc.getCollectionIDToRefData()
    colArtistReftoCnts = disc.getCollectionRefCountsData()

    sColArtistToRef  = Series(colArtistIDtoRefs)
    sColArtistToName = Series(colArtistIDtoName)
    sColArtistRefToCnts = Series(colArtistReftoCnts)

    print("  Creating Pandas DataFrame for {0} Artists".format(sColArtistToRef.shape[0]))
    cols = ["Ref"]
    coldiscdf = DataFrame(sColArtistToRef)
    coldiscdf.columns = cols
    coldiscdf = coldiscdf.join(DataFrame(sColArtistToName))
    cols += ["Name"]
    coldiscdf.columns = cols

    colrefdf = DataFrame(sColArtistRefToCnts)
    colrefdf.columns = ["Counts"]
    colrefdf.reset_index(inplace=True)
    colrefdf.columns = ["Ref", "Counts"]
    coldiscdf = coldiscdf.merge(colrefdf, on="Ref")
    coldiscdf.index = DataFrame(sColArtistToRef).index
    
    print("  DataFrame Shape is {0}".format(coldiscdf.shape))
    return coldiscdf


def createFullArtistDB(disc):
    print("Creating Artist DB")
    
    print("  Loading ArtistID Data")
    artistIDtoName  = disc.getArtistIDToNameData()
    sArtistToName   = Series(artistIDtoName)

    artistIDtoRef   = disc.getArtistIDToRefData()
    sArtistToRef    = Series(artistIDtoRef)
    
    print("  Creating Pandas DataFrame for {0} Artists".format(sArtistToRef.shape[0]))
    cols = ["Ref"]
    discdf = DataFrame(sArtistToRef)
    discdf.columns = cols
    discdf = discdf.join(DataFrame(sArtistToName))
    cols += ["Name"]
    discdf.columns = cols
    discdf["Known"] = True
    
    
    print("  Loading AlbumID Data")
    albumIDtoName   = disc.getAlbumIDToNameData()
    sAlbumToName    = Series(albumIDtoName)
    albumIDtoRef    = disc.getAlbumIDToRefData()
    sAlbumToRef     = Series(albumIDtoRef)
    
    print("  Loading ArtistID MetaData")
    artistIDMetaData = disc.getAlbumArtistMetaData()
    sArtistMetaData = Series(artistIDMetaData)
    sArtistMetaData = sArtistMetaData.apply(splitMetaData)
    
    print("  Joining Pandas DataFrame for Artist Metadata")
    tmp = DataFrame(DataFrame(sArtistMetaData)[0].tolist())
    tmp.index = sArtistMetaData.index
    discdf = discdf.join(tmp)
    cols += ["Extra Artists", "Genres", "Styles"]
    
    
    print("  DataFrame Shape is {0}".format(discdf.shape))    
    return discdf


def mergeDBs(discdf, coldiscdf):
    print("  Merging AlbumArtists {0} and Collections {1} DBs".format(discdf.shape, coldiscdf.shape))
    musicdf = coldiscdf.merge(discdf, on=["Ref", "Name"], how='left')
    musicdf.index = coldiscdf.index
    print("  Merged DataFrame Shape is {0}".format(musicdf.shape))
    return musicdf

# Artists Discogs Merge

In [None]:
%load_ext autoreload
%autoreload
disc      = discogs()
discdf    = createArtistDB(disc)
discdf    = createArtistName(discdf)

savename = disc.getMasterDiscogsDBFilename()
saveFile(idata=discdf, ifile=savename, debug=True)

discdf.head()

# Discogs Merge

In [None]:
createArtistDB

In [None]:
%load_ext autoreload
%autoreload
disc      = discogs()
discdf    = createArtistDB(disc)
discdf    = createArtistName(discdf)

savename = disc.getMasterDiscogsDBFilename()
saveFile(idata=discdf, ifile=savename, debug=True)

discdf.head()

# Full Merge

In [None]:
%load_ext autoreload
%autoreload
disc      = discogs()
discdf    = createArtistAlbumsDB(disc)
discdf    = createArtistName(discdf)
discdf    = createAlbums(discdf)

savename = disc.getMasterDiscogsDBFilename()
saveFile(idata=discdf, ifile=savename, debug=True)

discdf.head()

# Artist ID --> Ref and Name

In [None]:
start, cmt = clock("Creating Artist DBs")
from searchUtils import findPatternExt

artistIDToName       = {}
artistIDToRef        = {}
artistIDToVariations = {}

artistMetadataDBDir = disc.getArtistsMetadataDBDir()
files = findPatternExt(artistMetadataDBDir, pattern="-Metadata", ext='.p')

for i,ifile in enumerate(files):
    print(ifile,' \t',end="")
    db = getFile(ifile)
    artistIDToName.update({k: v[0] for k,v in db.items()})
    artistIDToRef.update({k: v[1] for k,v in db.items()})    
    artistIDToVariations.update({k: v[2] for k,v in db.items()})

    print(i,len(artistIDToName))
print("\n\n==============================================\n")
    
savenames = {"IDToRef": artistIDToRef, "IDToName": artistIDToName, "IDToVariations": artistIDToVariations}
for basename,savedata in savenames.items():
    savename = setFile(disc.getDiscogDBDir(), "Artist{0}.p".format(basename))
    print("Saving {0} entries to {1}\n".format(len(savedata), savename))
    saveFile(ifile=savename, idata=savedata, debug=True)
    
elapsed(start, cmt)

# Artist ID --> Albums

In [None]:
start, cmt = clock("Creating Artist DBs")

artistIDAlbumNames     = {}
artistIDAlbumRefs      = {}
artistIDCoreAlbumNames = {}
artistIDCoreAlbumRefs  = {}

artistMetadataDBDir = disc.getArtistsMetadataDBDir()
files = findPatternExt(artistMetadataDBDir, pattern="-MediaMetadata", ext='.p')

core = ["Albums"]

for i,ifile in enumerate(files):
    print(ifile,'\t',end="")
    db = getFile(ifile)
    
    for artistID,artistData in db.items():
        artistIDAlbumNames[artistID]     = {}
        artistIDAlbumRefs[artistID]      = {}
        artistIDCoreAlbumNames[artistID] = {}
        artistIDCoreAlbumRefs[artistID]  = {}
        
        for mediaName,mediaData in artistData.items():
            artistIDAlbumNames[artistID].update({mediaName: mediaData[0]})
            artistIDAlbumRefs[artistID].update({mediaName: mediaData[1]})
            if mediaName in core:
                artistIDCoreAlbumNames[artistID].update({mediaName: mediaData[0]})
                artistIDCoreAlbumRefs[artistID].update({mediaName: mediaData[1]})
                
    print(len(artistIDAlbumNames))
print("\n\n==============================================\n")
    
    
savenames = {"IDToVariations": artistIDToVariations,
             "IDToAlbumNames": artistIDAlbumNames, "IDToAlbumRefs": artistIDAlbumRefs, 
             "IDToCoreAlbumNames": artistIDCoreAlbumNames, "IDToCoreAlbumRefs": artistIDCoreAlbumRefs}
for basename,savedata in savenames.items():
    savename = setFile(disc.getDiscogDBDir(), "Artist{0}.p".format(basename))
    print("Saving {0} entries to {1}\n".format(len(savedata), savename))
    saveFile(ifile=savename, idata=savedata, debug=True)
    
    
elapsed(start, cmt)

# Artist ID --> Genre, Style, Artists Lookup Table

In [31]:
start, cmt = clock("Creating Artist DBs")

artistIDGenre          = {}
artistIDStyle          = {}
artistIDCollaborations = {}

albumsMetadataDBDir = disc.getAlbumsMetadataDBDir()
files = findPatternExt(albumsMetadataDBDir, pattern="-ArtistMetadata", ext='.p')

for ifile in files:
    print(ifile,'\t',end="")
    for artistID,artistData in getFile(ifile).items():
        genre   = artistData['Genre']
        artistIDGenre[artistID] = genre
        artists = artistData['Artists']
        artistIDCollaborations[artistID] = artists
        style   = artistData['Style']
        artistIDStyle[artistID] = style
    print(len(artistIDGenre))
print("\n\n==============================================\n")
    
    
savenames = {"IDToGenre": artistIDGenre, "IDToStyle": artistIDStyle, "IDToCollaborations": artistIDCollaborations}
for basename,savedata in savenames.items():
    savename = setFile(disc.getDiscogDBDir(), "Artist{0}.p".format(basename))
    print("Saving {0} entries to {1}\n".format(len(savedata), savename))
    saveFile(ifile=savename, idata=savedata, debug=True)   

Current Time is Mon Oct 14, 2019 21:19:18 for Creating Artist DBs
/Volumes/Music/Discog/albums-db/metadata/0-ArtistMetadata.p 	3493
/Volumes/Music/Discog/albums-db/metadata/1-ArtistMetadata.p 	7238
/Volumes/Music/Discog/albums-db/metadata/10-ArtistMetadata.p 	9892
/Volumes/Music/Discog/albums-db/metadata/11-ArtistMetadata.p 	13550
/Volumes/Music/Discog/albums-db/metadata/12-ArtistMetadata.p 	18436
/Volumes/Music/Discog/albums-db/metadata/13-ArtistMetadata.p 	21415
/Volumes/Music/Discog/albums-db/metadata/14-ArtistMetadata.p 	26812
/Volumes/Music/Discog/albums-db/metadata/15-ArtistMetadata.p 	32399
/Volumes/Music/Discog/albums-db/metadata/16-ArtistMetadata.p 	36881
/Volumes/Music/Discog/albums-db/metadata/17-ArtistMetadata.p 	40634
/Volumes/Music/Discog/albums-db/metadata/18-ArtistMetadata.p 	43345
/Volumes/Music/Discog/albums-db/metadata/19-ArtistMetadata.p 	48814
/Volumes/Music/Discog/albums-db/metadata/2-ArtistMetadata.p 	52340
/Volumes/Music/Discog/albums-db/metadata/20-ArtistMetada

# Album ID --> Name, Ref, Artists Lookup Table

In [47]:
start, cmt = clock("Creating Artist DBs")

albumIDToName    = {}
albumIDToRef     = {}
albumIDToArtists = {}    
    
albumsMetadataDBDir = disc.getAlbumsMetadataDBDir()
files = findPatternExt(albumsMetadataDBDir, pattern="-ArtistAlbums", ext='.p')
for ifile in files:
    print(ifile,'\t',end="")
    for artistID,artistData in getFile(ifile).items():
        for albumID,albumData in artistData.items():
            albumName    = albumData[0]
            albumRef     = albumData[1]
            albumCountry = albumData[2].most_common(1)[0]
            albumYear    = albumData[3].most_common(1)[0]

            
            albumIDToName[albumID] = albumName
            albumIDToRef[albumID]  = albumRef

            if albumIDToArtists.get(albumID) is None:                
                albumIDToArtists[albumID] = []
            albumIDToArtists[albumID].append(artistID)
    print(len(albumIDToArtists))
print("\n\n==============================================\n")

for albumID in albumIDToArtists.keys():
    albumIDToArtists[albumID] = list(set(albumIDToArtists[albumID]))
print("\n\n==============================================\n")

    
savenames = {"IDToName": albumIDToName, "IDToRef": albumIDToRef, "IDToArtists": albumIDToArtists}
for basename,savedata in savenames.items():
    savename = setFile(disc.getDiscogDBDir(), "Album{0}.p".format(basename))
    print("Saving {0} entries to {1}\n".format(len(savedata), savename))
    saveFile(ifile=savename, idata=savedata, debug=True)

Current Time is Mon Oct 14, 2019 21:51:21 for Creating Artist DBs
/Volumes/Music/Discog/albums-db/metadata/0-ArtistAlbums.p 	12701
/Volumes/Music/Discog/albums-db/metadata/1-ArtistAlbums.p 	27485
/Volumes/Music/Discog/albums-db/metadata/10-ArtistAlbums.p 	35046
/Volumes/Music/Discog/albums-db/metadata/11-ArtistAlbums.p 	47136
/Volumes/Music/Discog/albums-db/metadata/12-ArtistAlbums.p 	60623
/Volumes/Music/Discog/albums-db/metadata/13-ArtistAlbums.p 	69479
/Volumes/Music/Discog/albums-db/metadata/14-ArtistAlbums.p 	89609
/Volumes/Music/Discog/albums-db/metadata/15-ArtistAlbums.p 	104245
/Volumes/Music/Discog/albums-db/metadata/16-ArtistAlbums.p 	119341
/Volumes/Music/Discog/albums-db/metadata/17-ArtistAlbums.p 	129051
/Volumes/Music/Discog/albums-db/metadata/18-ArtistAlbums.p 	141675
/Volumes/Music/Discog/albums-db/metadata/19-ArtistAlbums.p 	158718
/Volumes/Music/Discog/albums-db/metadata/2-ArtistAlbums.p 	171692
/Volumes/Music/Discog/albums-db/metadata/20-ArtistAlbums.p 	180990
/Volum

# Artist Pandas DB

In [48]:
def getArtistDB():
    start, cmt = clock("===================================== Creating Artist DB =====================================")
    from pandas import Series, DataFrame
    print("Loading ArtistID Data")
    artistIDtoName  = Series(disc.getArtistIDToNameData())
    artistIDtoRef   = Series(disc.getArtistIDToRefData())
    artistIDToVariations  = Series(disc.getArtistIDToVariationsData())
    artistIDtoAlbumNames  = Series(disc.getArtistIDToAlbumNamesData())
    #artistIDtoAlbumRefs   = Series(disc.getArtistIDToAlbumRefsData())

    print("Creating Pandas DataFrame for {0} Artists".format(artistIDtoName.shape[0]))
    cols = ["Name"]
    discdf = DataFrame(artistIDtoName)
    discdf.columns = cols

    print("  Joining Ref")
    discdf = discdf.join(DataFrame(artistIDtoRef))
    cols += ["Ref"]
    discdf.columns = cols

    print("  Joining Variations")
    discdf = discdf.join(DataFrame(artistIDToVariations))
    cols += ["Variations"]
    discdf.columns = cols

    print("  Joining Albums")
    discdf = discdf.join(DataFrame(artistIDtoAlbumNames))
    cols += ["Albums"]
    discdf.columns = cols

    discdf["Known"] = True

    discdf = createArtistName(discdf)

    print("DataFrame Shape is {0}".format(discdf.shape))
    elapsed(start, cmt)
    
    return discdf



def getArtistMetadataDB():
    start, cmt = clock("===================================== Creating Artist Metadata DB =====================================")
    from pandas import Series, DataFrame
    print("Loading ArtistID Data")
    artistIDtoGenre          = Series(disc.getArtistIDToGenreData())
    artistIDtoStyle          = Series(disc.getArtistIDToStyleData())
    artistIDToCollaboration  = Series(disc.getArtistIDToCollaborationData())

    print("Creating Pandas DataFrame for {0} Artists".format(artistIDtoGenre.shape[0]))
    cols = ["Genre"]
    discdf = DataFrame(artistIDtoGenre)
    discdf.columns = cols

    print("  Joining Style")
    discdf = discdf.join(DataFrame(artistIDtoStyle))
    cols += ["Style"]
    discdf.columns = cols

    print("  Joining Collaboration")
    discdf = discdf.join(DataFrame(artistIDToCollaboration))
    cols += ["Collaboration"]
    discdf.columns = cols

    print("DataFrame Shape is {0}".format(discdf.shape))
    elapsed(start, cmt)
    
    return discdf



def getAlbumDB():
    start, cmt = clock("===================================== Creating Artist Album DB =====================================")
    from pandas import Series, DataFrame
    print("Loading AlbumID Data")
    albumIDtoName    = Series(disc.getAlbumIDToNameData())
    albumIDtoRef     = Series(disc.getAlbumIDToRefData())
    albumIDToArtists = Series(disc.getAlbumIDToArtistsData())

    print("Creating Pandas DataFrame for {0} Albums".format(albumIDtoName.shape[0]))
    cols = ["Name"]
    discdf = DataFrame(albumIDtoName)
    discdf.columns = cols

    print("  Joining Ref")
    discdf = discdf.join(DataFrame(albumIDtoRef))
    cols += ["Ref"]
    discdf.columns = cols

    print("  Joining Artists")
    discdf = discdf.join(DataFrame(albumIDToArtists))
    cols += ["Artists"]
    discdf.columns = cols

    print("DataFrame Shape is {0}".format(discdf.shape))
    
    return discdf

In [57]:
discArtistDB = getArtistDB()
#discArtistMetaDB = getArtistMetadataDB()
#print("DataFrame Shape is {0}".format(discdf.shape))
#discArtistDB = discArtistDB.join(discArtistMetaDB, how='left')
#print("DataFrame Shape is {0}".format(discdf.shape))

Loading ArtistID Data
Loading data from /Volumes/Music/Discog/db/ArtistIDToName.p
  --> This file is 10.3MB.
Loading /Volumes/Music/Discog/db/ArtistIDToName.p
Loading data from /Volumes/Music/Discog/db/ArtistIDToRef.p
  --> This file is 12.2MB.
Loading /Volumes/Music/Discog/db/ArtistIDToRef.p
Loading data from /Volumes/Music/Discog/db/ArtistIDToVariations.p
  --> This file is 40.0MB.
Loading /Volumes/Music/Discog/db/ArtistIDToVariations.p
Loading data from /Volumes/Music/Discog/db/ArtistIDToAlbumNames.p
  --> This file is 67.5MB.
Loading /Volumes/Music/Discog/db/ArtistIDToAlbumNames.p
Creating Pandas DataFrame for 544462 Artists
  Joining Ref
  Joining Variations
  Joining Albums
DataFrame Shape is (544462, 7)


In [58]:
savename = disc.getMasterDiscogsDBFilename()
saveFile(idata=discArtistDB, ifile=savename, debug=True)

Saving data to /Volumes/Music/Discog/db/MasterDB.p
  --> This file is 209.7MB.
Saved data to /Volumes/Music/Discog/db/MasterDB.p
  --> This file is 209.7MB.


In [52]:
discAlbumDB = getAlbumDB()

Loading AlbumID Data
Loading data from /Volumes/Music/Discog/db/AlbumIDToName.p
  --> This file is 26.1MB.
Loading /Volumes/Music/Discog/db/AlbumIDToName.p
Loading data from /Volumes/Music/Discog/db/AlbumIDToRef.p
  --> This file is 36.7MB.
Loading /Volumes/Music/Discog/db/AlbumIDToRef.p
Loading data from /Volumes/Music/Discog/db/AlbumIDToArtists.p
  --> This file is 20.8MB.
Loading /Volumes/Music/Discog/db/AlbumIDToArtists.p
Creating Pandas DataFrame for 1123572 Albums
  Joining Ref
  Joining Artists
DataFrame Shape is (1123572, 3)


# Missing / ToDo

In [None]:
from fsUtils import removeFile

In [None]:
%load_ext autoreload
%autoreload
disc = discogs()
arts = artists(disc)

toget = discdf[discdf['Name'].isna()]['Ref'].to_dict()
for artistID,artistRef in toget.items():
    url = arts.getArtistURL(artistRef)
    savename = arts.getArtistSavename(artistID)
    if isFile(savename):
        removeFile(savename)
    try:
        arts.downloadArtistURL(url=url, savename=savename, debug=True, force=True)
    except:
        continue
