# Master Discogs Database

In [1]:
## Basic stuff
%load_ext autoreload
%autoreload
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))

## Python Version
import sys
print("Python: {0}".format(sys.version))


################################################################################
## General Stuff
################################################################################
from ioUtils import saveJoblib, loadJoblib, saveFile, getFile
import urllib
from urllib.parse import quote
from collections import Counter
from artist import artist
from searchUtils import findExt, findSubExt, findPatternExt, findNearest
from timeUtils import clock, elapsed
from fsUtils import moveFile, setFile, setDir, setSubDir, isFile, isDir, mkDir
from fileUtils import getFileBasics, getBasename
from time import sleep


################################################################################
## Music Stuff
################################################################################

### MultiArtist
from multiArtist import multiartist

### My Music DB
from myMusicDBMap import myMusicDBMap

### Master DB code
from masterdb import masterdb


import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))

Python: 3.7.3 (default, Mar 27 2019, 16:54:48) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
Notebook Last Run Initiated: 2020-06-12 19:43:02.790435


In [2]:
mdb = myMusicDBMap(debug=True)

Creating myMusicDBMap()
   Loading my music db map: /anaconda3/envs/py37/musicdb/myMusicMap.p
   DB keys: ['Discogs', 'AllMusic', 'MusicBrainz', 'AceBootlegs', 'RateYourMusic', 'LastFM', 'DatPiff', 'RockCorner', 'CDandLP', 'MusicStack', 'MetalStorm']
Counter({'Discogs': 3919, 'AllMusic': 3886, 'MusicBrainz': 3674, 'LastFM': 2777, 'RockCorner': 466, 'DatPiff': 458, 'AceBootlegs': 175, 'CDandLP': 80, 'RateYourMusic': 30, 'MusicStack': 5, 'MetalStorm': 0})


In [3]:
dbdata = {}
keys   = ["Artists", "Artist", "Utils"]

### Discogs
from artistsDC import artistsDC
from artistDC import artistDC
from discogsUtils import discogsUtils
dbinfo = [artistsDC, artistDC, discogsUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["Discogs"] = dbinfo

### AllMusic
from artistsAM import artistsAM
from artistAM import artistAM
from discogsUtils import allmusicUtils
dbinfo = [artistsAM, artistAM, allmusicUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["AllMusic"] = dbinfo

### MusicBrainz
from artistsMB import artistsMB
from artistMB import artistMB
from discogsUtils import musicbrainzUtils
dbinfo = [artistsMB, artistMB, musicbrainzUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["MusicBrainz"] = dbinfo

## AceBootlegs
from artistAB import artistAB
from artistsAB import artistsAB
from discogsUtils import acebootlegsUtils
dbinfo = [artistsAB, artistAB, acebootlegsUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["AceBootlegs"] = dbinfo

## RateYourMusic
from artistRM import artistRM
from artistsRM import artistsRM
from discogsUtils import rateyourmusicUtils
dbinfo = [artistsRM, artistRM, rateyourmusicUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["RateYourMusic"] = dbinfo

## LastFM
from artistLM import artistLM
from artistsLM import artistsLM
from discogsUtils import lastfmUtils
dbinfo = [artistsLM, artistLM, lastfmUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["LastFM"] = dbinfo

## DatPiff
from artistDP import artistDP
from artistsDP import artistsDP
from discogsUtils import datpiffUtils
dbinfo = [artistsDP, artistDP, datpiffUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["DatPiff"] = dbinfo

## RockCorner
from artistRC import artistRC
from artistsRC import artistsRC
from discogsUtils import rockcornerUtils
dbinfo = [artistsRC, artistRC, rockcornerUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["RockCorner"] = dbinfo

## CDandLP
from artistCL import artistCL
from artistsCL import artistsCL
from discogsUtils import cdandlpUtils
dbinfo = [artistsCL, artistCL, cdandlpUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["CDandLP"] = dbinfo

## MusicStack
from artistMS import artistMS
from artistsMS import artistsMS
from discogsUtils import musicstackUtils
dbinfo = [artistsMS, artistMS, musicstackUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["MusicStack"] = dbinfo

## MetalStorm
from artistMT import artistMT
from artistsMT import artistsMT
from discogsUtils import metalstormUtils
dbinfo = [artistsMT, artistMT, metalstormUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["MetalStorm"] = dbinfo

## General
from discogsBase import discogs
for db in dbdata.keys():
    print("Creating DB Info For {0}".format(db))
    dbdata[db]["Disc"]    = discogs(db.lower())
    dbdata[db]["Artist"]  = dbdata[db]["Artist"](dbdata[db]["Disc"])
    dbdata[db]["Artists"] = dbdata[db]["Artists"](dbdata[db]["Disc"])
    dbdata[db]["Utils"]   = dbdata[db]["Utils"]()

Creating DB Info For Discogs
Creating DB Info For AllMusic
Creating DB Info For MusicBrainz
Creating DB Info For AceBootlegs
Creating DB Info For RateYourMusic
Creating DB Info For LastFM
Creating DB Info For DatPiff
Creating DB Info For RockCorner
Creating DB Info For CDandLP
Creating DB Info For MusicStack
Creating DB Info For MetalStorm


In [4]:
%load_ext autoreload
%autoreload

dbs = dbdata.keys()
dbs = ["DatPiff", "RockCorner", "CDandLP", "MusicStack", "MetalStorm"]
dbs = ["Discogs", "AllMusic", "MusicBrainz"]

########################################################################################################
## Loop over DBs
########################################################################################################
for db in dbs:
    print("\n")
    print("="*125)
    print("="*125)
    print("=",db)
    print("="*125)
    print("="*125)
    print("\n")
    
    
    disc  = dbdata[db]["Disc"]
    mymdb = masterdb(db, disc, force=True)
    mymdb.setMyMusicDB(mdb)
    
    
    #################################
    # Artists
    #################################
    mymdb.createArtistIDMap()
    slimArtistDB      = mymdb.getSlimArtistDB()
    print("All   --> {0}".format(slimArtistDB.shape))
    knownSlimArtistDB = mymdb.getKnownSlimArtistDB()
    print("Known --> {0}".format(knownSlimArtistDB.shape))
        
    
    #################################
    # Artist Albums
    #################################
    mymdb.createArtistAlbumIDMap()
    slimArtistAlbumsDB      = mymdb.getSlimArtistAlbumsDB()
    print("All Albums    --> {0}".format(slimArtistAlbumsDB.shape))
    knownSlimArtistAlbumsDB = mymdb.getKnownSlimArtistAlbumsDB()
    print("Known Albums  --> {0}".format(knownSlimArtistAlbumsDB.shape))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


= Discogs


Current Time is Fri Jun 12, 2020 19:43:57 for Creating Artist DBs
/Volumes/Piggy/Discog/artists-discogs-db/metadata/0-Metadata.p  	0 7326
/Volumes/Piggy/Discog/artists-discogs-db/metadata/1-Metadata.p  	1 14910
/Volumes/Piggy/Discog/artists-discogs-db/metadata/10-Metadata.p  	2 22291
/Volumes/Piggy/Discog/artists-discogs-db/metadata/11-Metadata.p  	3 29743
/Volumes/Piggy/Discog/artists-discogs-db/metadata/12-Metadata.p  	4 37229
/Volumes/Piggy/Discog/artists-discogs-db/metadata/13-Metadata.p  	5 44748
/Volumes/Piggy/Discog/artists-discogs-db/metadata/14-Metadata.p  	6 52017
/Volumes/Piggy/Discog/artists-discogs-db/metadata/15-Metadata.p  	7 59417
/Volumes/Piggy/Discog/artists-discogs-db/metadata/16-Metadata.p  	8 66738
/Volumes/Piggy/Discog/artists-discogs-db/metadata/17-Metadata.p  	9 74165
/Volumes/Piggy/Discog/artists-discogs-db/metadata/18-Metadata.p  	10 81610
/Volumes/Piggy/Disco

/Volumes/Piggy/Discog/artists-discogs-db/metadata/99-Metadata.p  	99 738742



Saving 738742 entries to /Volumes/Piggy/Discog/db-discogs/ArtistIDToRef.p

Saving data to /Volumes/Piggy/Discog/db-discogs/ArtistIDToRef.p
  --> This file is 16.8MB.
Saved data to /Volumes/Piggy/Discog/db-discogs/ArtistIDToRef.p
  --> This file is 16.8MB.
Saving 738742 entries to /Volumes/Piggy/Discog/db-discogs/ArtistIDToName.p

Saving data to /Volumes/Piggy/Discog/db-discogs/ArtistIDToName.p
  --> This file is 14.1MB.
Saved data to /Volumes/Piggy/Discog/db-discogs/ArtistIDToName.p
  --> This file is 14.1MB.
Saving 738742 entries to /Volumes/Piggy/Discog/db-discogs/ArtistIDToVariations.p

Saving data to /Volumes/Piggy/Discog/db-discogs/ArtistIDToVariations.p
  --> This file is 27.5MB.
Saved data to /Volumes/Piggy/Discog/db-discogs/ArtistIDToVariations.p
  --> This file is 27.5MB.
Current Time is Fri Jun 12, 2020 19:45:20 for Done with Creating Artist DBs
Process [Done with Creating Artist DBs] took 1.4 minu

/Volumes/Piggy/Discog/artists-discogs-db/metadata/6-MediaMetadata.p 	420268    925711    3924357   
/Volumes/Piggy/Discog/artists-discogs-db/metadata/60-MediaMetadata.p 	427671    942588    3997946   
/Volumes/Piggy/Discog/artists-discogs-db/metadata/61-MediaMetadata.p 	435024    959878    4068549   
/Volumes/Piggy/Discog/artists-discogs-db/metadata/62-MediaMetadata.p 	442495    976503    4135386   
/Volumes/Piggy/Discog/artists-discogs-db/metadata/63-MediaMetadata.p 	449961    992120    4205471   
/Volumes/Piggy/Discog/artists-discogs-db/metadata/64-MediaMetadata.p 	457479    1008427   4274296   
/Volumes/Piggy/Discog/artists-discogs-db/metadata/65-MediaMetadata.p 	464894    1025819   4347692   
/Volumes/Piggy/Discog/artists-discogs-db/metadata/66-MediaMetadata.p 	472286    1042466   4417107   
/Volumes/Piggy/Discog/artists-discogs-db/metadata/67-MediaMetadata.p 	479768    1058177   4484721   
/Volumes/Piggy/Discog/artists-discogs-db/metadata/68-MediaMetadata.p 	487026    1076473   45

/Volumes/Piggy/Discog/artists-allmusic-db/metadata/14-Metadata.p  	0 362
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/21-Metadata.p  	1 748
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/0-Metadata.p  	2 1131
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/7-Metadata.p  	3 1532
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/15-Metadata.p  	4 1893
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/22-Metadata.p  	5 2254
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/1-Metadata.p  	6 2619
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/8-Metadata.p  	7 3016
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/16-Metadata.p  	8 3410
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/23-Metadata.p  	9 3816
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/2-Metadata.p  	10 4210
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/9-Metadata.p  	11 4605
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/24-Metadata.p  	12 4982
/Volumes/Piggy/Discog/artists-allmusic-db/m

  --> This file is 835.2kB.
Saved data to /Volumes/Piggy/Discog/db-allmusic/ArtistIDToVariations.p
  --> This file is 835.2kB.
Current Time is Fri Jun 12, 2020 20:00:52 for Done with Creating Artist DBs
Process [Done with Creating Artist DBs] took 5 seconds.
Current Time is Fri Jun 12, 2020 20:00:52 for 
Loading ArtistID Data
Creating Pandas DataFrame for 38907 Artists
	Shape --> (38907, 1)
  Finding Real Artist Name
	Shape --> (38907, 3)
  Removing None Artist
	Shape --> (38907, 3)
  Finding Disc Artist Name
	Shape --> (38907, 4)
DataFrame Shape is (38907, 4)
Current Time is Fri Jun 12, 2020 20:00:53 for Done with 
Process [Done with 
Saving Master Artist DB File: /Volumes/Piggy/Discog/db-allmusic/MasterSlimArtistDB.p
  --> This file is 2.9MB.
All   --> (38907, 4)
Current Time is Fri Jun 12, 2020 20:00:54 for 
DataFrame Shape is (38907, 4)
Found 4085 artist entries
Counter({'Discogs': 3919, 'AllMusic': 3886, 'MusicBrainz': 3674, 'LastFM': 2777, 'RockCorner': 466, 'DatPiff': 458, 'AceB

/Volumes/Piggy/Discog/artists-allmusic-db/metadata/78-MediaMetadata.p 	24931     111953    312005    
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/65-MediaMetadata.p 	25312     114026    317220    
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/58-MediaMetadata.p 	25711     116015    322774    
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/72-MediaMetadata.p 	26096     118457    328980    
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/79-MediaMetadata.p 	26479     120451    333464    
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/66-MediaMetadata.p 	26870     122099    337706    
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/59-MediaMetadata.p 	27242     124174    343022    
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/73-MediaMetadata.p 	27619     126171    348037    
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/80-MediaMetadata.p 	28042     128804    354176    
/Volumes/Piggy/Discog/artists-allmusic-db/metadata/67-MediaMetadata.p 	28428     1

/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/19-Metadata.p  	11 11331
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/2-Metadata.p  	12 12312
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/20-Metadata.p  	13 13253
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/21-Metadata.p  	14 14216
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/22-Metadata.p  	15 15151
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/23-Metadata.p  	16 16093
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/24-Metadata.p  	17 17069
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/25-Metadata.p  	18 18027
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/26-Metadata.p  	19 18993
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/27-Metadata.p  	20 19939
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/28-Metadata.p  	21 20868
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/29-Metadata.p  	22 21832
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata

	Shape --> (94393, 4)
  Cleaning Disc Artist Name (MusicBrainz Only)
	Shape --> (94393, 4)
DataFrame Shape is (94393, 4)
Current Time is Fri Jun 12, 2020 20:02:13 for Done with 
Process [Done with 
Saving Master Artist DB File: /Volumes/Piggy/Discog/db-musicbrainz/MasterSlimArtistDB.p
  --> This file is 8.3MB.
All   --> (94393, 4)
Current Time is Fri Jun 12, 2020 20:02:14 for 
DataFrame Shape is (94393, 4)
Found 4085 artist entries
Counter({'Discogs': 3919, 'AllMusic': 3886, 'MusicBrainz': 3674, 'LastFM': 2777, 'RockCorner': 466, 'DatPiff': 458, 'AceBootlegs': 175, 'CDandLP': 80, 'RateYourMusic': 30, 'MusicStack': 5, 'MetalStorm': 0})
DataFrame Shape is (3573, 4)
Saving Master Known Artist DB File: /Volumes/Piggy/Discog/db-musicbrainz/MasterKnownSlimArtistDB.p
  --> This file is 318.0kB.
Known --> (3573, 4)
Current Time is Fri Jun 12, 2020 20:02:15 for Creating Artist DBs
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/0-MediaMetadata.p 	936       0         5018      
/Volumes/Pi

/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/7-MediaMetadata.p 	64154     0         320353    
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/70-MediaMetadata.p 	65067     0         325116    
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/71-MediaMetadata.p 	66015     0         330236    
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/72-MediaMetadata.p 	66987     0         334848    
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/73-MediaMetadata.p 	67944     0         339935    
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/74-MediaMetadata.p 	68840     0         344267    
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/75-MediaMetadata.p 	69804     0         349198    
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/76-MediaMetadata.p 	70700     0         353729    
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/77-MediaMetadata.p 	71678     0         358268    
/Volumes/Piggy/Discog/artists-musicbrainz-db/metadata/78

In [40]:
mdb = myMusicDBMap(debug=False)
musicmap = mdb.get()

Counter({'Discogs': 3919, 'AllMusic': 3887, 'MusicBrainz': 3674, 'LastFM': 2777, 'RockCorner': 466, 'DatPiff': 458, 'AceBootlegs': 175, 'CDandLP': 80, 'RateYourMusic': 30, 'MusicStack': 5, 'MetalStorm': 0})


In [None]:
knownSlimArtistDB

In [None]:

    
    

########################################################################################################################
#
# Artist DB
#
########################################################################################################################
def getArtistDB(disc, force=False):
    start, cmt = clock("\n=================================== Creating Artist DB ===================================")
    if force is False:
        print("Using previously created Artist DB")
        discdf = disc.getMasterArtistDiscogsDB()
        elapsed(start, cmt)
        return discdf
    
    print("Loading ArtistID Data")
    artistIDtoName  = Series(disc.getArtistIDToNameData())
    artistIDtoRef   = Series(disc.getArtistIDToRefData())
    artistIDToVariations  = Series(disc.getArtistIDToVariationsData())

    print("Creating Pandas DataFrame for {0} Artists".format(artistIDtoName.shape[0]))
    cols = ["Name"]
    discdf = DataFrame(artistIDtoName)
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("  Joining Ref")
    discdf = discdf.join(DataFrame(artistIDtoRef))
    cols += ["Ref"]
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("  Joining Variations")
    discdf = discdf.join(DataFrame(artistIDToVariations))
    cols += ["Variations"]
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    discdf["Known"] = True
    
    print("  Finding Real Artist Name")
    discdf[["Artist", "Num"]] = DataFrame(discdf['Name'].apply(self.realName).tolist(), index=discdf.index)
    print("\tShape --> {0}".format(discdf.shape))

    

    print("DataFrame Shape is {0}".format(discdf.shape))
    elapsed(start, cmt)

    print("Saving Master Artist DB File")
    saveFilename = disc.getMasterArtistDiscogsDBFilename()
    saveFile(ifile=saveFilename, idata=discdf, debug=False)
    
    return discdf    
    
    
    

########################################################################################################################
#
# Artist Metadata DB
#
########################################################################################################################
def getArtistMetadataDB(disc, force=True):
    start, cmt = clock("\n=================================== Creating Artist Metadata DB ===================================")
    if force is False:
        print("Using previously created Artist Metadata DB")
        discdf = disc.getMasterArtistMetadataDiscogsDB()
        elapsed(start, cmt)
        return discdf
    
    print("Loading ArtistID Data")
    artistIDtoGenre          = Series(disc.getArtistIDToGenreData())
    artistIDtoStyle          = Series(disc.getArtistIDToStyleData())
    artistIDToCollaboration  = Series(disc.getArtistIDToCollaborationData())

    print("Creating Pandas DataFrame for {0} Artists".format(artistIDtoGenre.shape[0]))
    cols = ["Genre"]
    discdf = DataFrame(artistIDtoGenre)
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("  Joining Style")
    discdf = discdf.join(DataFrame(artistIDtoStyle))
    cols += ["Style"]
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("  Joining Collaboration")
    discdf = discdf.join(DataFrame(artistIDToCollaboration))
    cols += ["Collaboration"]
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("DataFrame Shape is {0}".format(discdf.shape))
    elapsed(start, cmt)

    print("Saving Master Artist Metadata DB File")
    saveFilename = disc.getMasterArtistMetadataDiscogsDBFilename()
    saveFile(ifile=saveFilename, idata=discdf, debug=False)
    
    return discdf




########################################################################################################################
#
# Artist Albums DB
#
########################################################################################################################
def getArtistAlbumsDB(disc, loadRefs=False, force=False):
    start, cmt = clock("\n=================================== Creating Artist Albums DB ===================================")
    if force is False:
        print("Using previously created Artist Albums DB")
        discdf = disc.getMasterArtistAlbumsDiscogsDB()
        elapsed(start, cmt)
        return discdf
    
    print("Loading ArtistID Data")
    artistIDtoAlbumNames  = Series(disc.getArtistIDToAlbumNamesData())
    if loadRefs:
        artistIDtoAlbumRefs   = Series(disc.getArtistIDToAlbumRefsData())

    print("Creating Pandas DataFrame for {0} Artists".format(artistIDtoAlbumNames.shape[0]))
    cols = ["Albums"]
    discdf = DataFrame(artistIDtoAlbumNames)
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("DataFrame Shape is {0}".format(discdf.shape))
    
    print("Saving Master Artist Albums DB File")
    saveFilename = disc.getMasterArtistAlbumsDiscogsDBFilename()
    saveFile(ifile=saveFilename, idata=discdf, debug=False)
    
    elapsed(start, cmt)
        
    return discdf






########################################################################################################################
#
# Artist Album Known DB
#
########################################################################################################################
def getArtistAlbumKnownDB(discAlbumDB, discArtistAlbumsDB):
    start, cmt = clock("\n=================================== Creating Artist Album DB ===================================")
    from pandas import Series, DataFrame
    
    idx=discAlbumDB.index
    
    tmpdb = discArtistAlbumsDB["Albums"].copy()
    print("Creating Pandas DataFrame for {0} Arist Albums".format(tmpdb.shape[0]))
    discdf = DataFrame(tmpdb.apply(isKnownAlbum, idx=idx).tolist(), index=tmpdb.index)
    discdf.columns = ["Known Albums", "All Albums", "Albums"]
    print("\tShape --> {0}".format(discdf.shape))
    
    print("DataFrame Shape is {0}".format(discdf.shape))
    elapsed(start, cmt)
    
    return discdf
    
def isKnownAlbum(x, **kwargs):
    retval = {}
    albumSummary = [0, 0]
    for mediaType in x.keys():
        for albumID in x[mediaType].keys():
            albumName = x[mediaType][albumID]
            #print(mediaType,albumID,albumName,'\t\t',end="")
            known     = albumID in kwargs['idx']
            #print(known)
            
            retval[albumID] = [albumName, mediaType, known]
            albumSummary[0] += known
            albumSummary[1] += 1
            
    return [albumSummary[0], albumSummary[1], retval]





########################################################################################################################
#
# Album DB
#
########################################################################################################################
def getAlbumDB(disc):
    start, cmt = clock("\n=================================== Creating Artist Album DB ===================================")
    from pandas import Series, DataFrame
    print("Loading AlbumID Data")
    albumIDtoName    = Series(disc.getAlbumIDToNameData())
    albumIDtoRef     = Series(disc.getAlbumIDToRefData())
    albumIDToArtists = Series(disc.getAlbumIDToArtistsData())

    print("Creating Pandas DataFrame for {0} Albums".format(albumIDtoName.shape[0]))
    cols = ["Name"]
    discdf = DataFrame(albumIDtoName)
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("  Joining Ref")
    discdf = discdf.join(DataFrame(albumIDtoRef))
    cols += ["Ref"]
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("  Joining Artists")
    discdf = discdf.join(DataFrame(albumIDToArtists))
    cols += ["Artists"]
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("DataFrame Shape is {0}".format(discdf.shape))
    elapsed(start, cmt)
    
    return discdf





########################################################################################################################
#
# Master DB Join
#
########################################################################################################################
def createMasterDB(disc, discArtistDB, discArtistMetadataDB, discArtistAlbumKnownDB):
    start, cmt = clock("\n=================================== Creating Artist ID DB ===================================")
    print("Creating Pandas DataFrame for {0} Arist IDs".format(discArtistDB.shape[0]))
    print("  Joining Artist Metadata")
    discdf = discArtistDB.join(discArtistMetadataDB)
    print("\tShape --> {0}".format(discdf.shape))
    print("  Joining Artist Albums")
    discdf = discdf.join(discArtistAlbumKnownDB)
    print("\tShape --> {0}".format(discdf.shape))
    elapsed(start, cmt)

    savename = disc.getMasterDiscogsDBFilename()
    saveFile(idata=discdf, ifile=savename, debug=True)
    
    
    
    
    
    
########################################################################################################################
#
# Associated Functions
#
########################################################################################################################
def directoryName(x):
    if x is None:
        return x
    if "..." in x:
        x = x.replace("...", "")
    if "/" in x:
        x = x.replace("/", "-")
    return x

def realName(x):
    if x is None:
        return [None,-1]
    
    lenx = len(x)
    if len(x) < 1:
        return [x,-1]

    if x[-1] != ")":
        return [x, None]
    

    if lenx >=5:
        if x[-3] == "(":
            try:
                num = int(x[-2:-1])
                val = x[:-3].strip()
                return [val, num]
            except:
                return [x, None]
            
    if lenx >= 6:
        if x[-4] == "(":
            try:
                num = int(x[-3:-1])
                val = x[:-4].strip()
                return [val, num]
            except:
                return [x, None]
            
    if lenx >= 7:
        if x[-4] == "(":
            try:
                num = int(x[-3:-1])
                val = x[:-4].strip()
                return [val, num]
            except:
                return [x, None]

    return [x, None]

def discConv(x):
    if x is None:
        return ""
    x = x.replace("/", "-")
    x = x.replace("¡", "")
    while x.startswith(".") and len(x) > 1:
        x = x[1:]
    x = x.strip()
    return x

def cleanMB(x):
    pos = [x.rfind("(")+1, x.rfind(")")]
    if sum([p > 0 for p in pos]) != len(pos):
        return x
    parval = x[pos[0]:pos[1]]
    return x[:pos[0]-2].strip()

# Create Dictionary Lookup Files

## Artist ID --> Ref and Name

In [None]:
%load_ext autoreload
%autoreload

from lookup import createArtistIDMap
from masterdb import getSlimArtistDB
from lookup import createArtistAlbumIDMap
from masterdb import getArtistAlbumsDB

dbs = dbdata.keys()
dbs = ["AllMusic"]

########################################################################################################
## Loop over DBs
########################################################################################################
for db in dbs:
    print("\n")
    print("="*125)
    print("="*125)
    print("=",db)
    print("="*125)
    print("="*125)
    print("\n")
    
    
    ####################################################################################################
    ## Create Artist ID Lookup
    ####################################################################################################
    createArtistIDMap(dbdata[db]["Disc"])
    
    
    ####################################################################################################
    ## Create Artist ID Pandas DataFrame
    ####################################################################################################
    discSlimArtistAMDB      = getSlimArtistDB(dbdata[db]["Disc"], force=True)
    discKnownSlimArtistAMDB = getKnownSlimArtistDB(dbdata[db]["Disc"], mydb, force=True)


    ####################################################################################################
    ## Create Artist ID to Album ID Lookup
    ####################################################################################################
    createArtistAlbumIDMap(dbdata[db]["Disc"])


    ####################################################################################################
    ## Create Artist ID Pandas DataFrame
    ####################################################################################################
    discArtistAMAlbumsDB = getArtistAlbumsDB(dbdata[db]["Disc"], force=True)

In [None]:
%load_ext autoreload
%autoreload
from myMusicDBMap import myMusicDBMap
from artistDB import artistDB

mdb = myMusicDBMap(debug=True)
mydb = mdb.get()

***
***
***

## Artist ID --> Genre, Style, Artists Lookup Table

In [None]:
## Basic stuff
%load_ext autoreload
%autoreload
from lookup import createArtistMetadataMap
createArtistMetadataMap(discAM)

In [None]:
## Basic stuff
%load_ext autoreload
%autoreload
from lookup import createArtistMetadataMap
createArtistMetadataMap(discDC)

## Album ID --> Name, Ref, Artists Lookup Table

In [None]:
## Basic stuff
%load_ext autoreload
%autoreload
from lookup import createAlbumIDMap
createAlbumIDMap(disc)

# Master Lookup Tests

In [None]:
%load_ext autoreload
%autoreload
from lookup import testLookupMaps
testLookupMaps(disc)

In [None]:

discArtistLMAlbumsDB = getArtistAlbumsDB(discRM, force=False)

In [None]:
from collections import Counter
cntr = Counter()
for artistData in discArtistLMAlbumsDB["Albums"]:
    for key in artistData.keys():
        cntr[key] += 1
cntr

# Pandas DB

## Slim Artist DB

In [None]:
from masterdb import getSlimArtistDB
discSlimArtistDB = getSlimArtistDB(disc)
discSlimArtistDB.head()

## Artist DB

In [None]:
from masterdb import getArtistDB

In [None]:
discArtistDB = getArtistDB(disc)
discArtistDB.head()

## Artist Metadata DB

In [None]:
from masterdb import getArtistMetadataDB

In [None]:
discArtistMetadataDB = getArtistMetadataDB(disc)
discArtistMetadataDB.head()

## Artist Albums DB

In [None]:
from masterdb import getArtistAlbumsDB

In [None]:
discArtistAlbumsDB = getArtistAlbumsDB(disc, force=False)
discArtistAlbumsDB.head()

In [None]:
discArtistAlbumsDB = getArtistAlbumsDB(disc)
discArtistAlbumsDB.head()

## Albums DB

In [None]:
from masterdb import getAlbumDB

In [None]:
discAlbumDB = getAlbumDB(disc)
discAlbumDB.head()

## Artist Album ID --> Known Albums

In [None]:
from masterdb import getArtistAlbumKnownDB

In [None]:
discArtistAlbumKnownDB = getArtistAlbumKnownDB(discAlbumDB, discArtistAlbumsDB)
discArtistAlbumKnownDB.head()

# Joining Artist ID DataFrame

In [None]:
from masterdb import createMasterDB

In [None]:
createMasterDB(disc, discArtistDB, discArtistMetadataDB, discArtistAlbumKnownDB)

In [None]:
from random import random

# Download Artist Data

In [None]:
artistName = "Killin' Baudelaire"

In [None]:
dbsToGet = ["AllMusic", "Discogs", "MusicBrainz", "LastFM", "RockCorner", "CDandLP"]
searches = [dbdata["AllMusic"]["Artists"].searchAllMusicForArtist, dbdata["Discogs"]["Artists"].searchDiscogForArtist,
            dbdata["MusicBrainz"]["Artists"].searchMusicBrainzForArtist, dbdata["LastFM"]["Artists"].searchLastFMForArtist,
            dbdata["RockCorner"]["Artists"].searchRockCornerForArtist, dbdata["CDandLP"]["Artists"].searchCDandLPForArtist]

dbsToGet = ["AllMusic"]
searches = [dbdata["AllMusic"]["Artists"].searchAllMusicForArtist]

In [None]:
for i,artistName in enumerate(mdb.getArtists()):
        if artistName == "Riff Raff":
            print(i)
            break

In [None]:
def searchAll(mdb, minI=-1):
    nArtists = len(mdb.getArtists())
    for i,artistName in enumerate(mdb.getArtists()):
        if i <= minI:
            continue

        print('\n\n\n')
        print('-'*130)
        print(i,'/',nArtists,'===>\t',artistName)
        print('-'*130)
        print('\n')
        
        dbdata = mdb.getArtistData(artistName)
        if all([dbdata.get("LastFM"), dbdata.get("Discogs"), dbdata.get("AllMusic"), dbdata.get("MusicBrainz")]):
            print("{0} is well known.".format(artistName))
            continue
        else:
            print("{0} get it!".format(artistName))

        for search in searches:
            print("====>",search)
            try:
                search(artistName)
            except:
                sleep(3)

In [None]:
searchAll(mdb, 2383)

In [None]:
# 3404