# Master Discogs Database

In [6]:
## Basic stuff
%load_ext autoreload
%autoreload
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))

## Python Version
import sys
print("Python: {0}".format(sys.version))


################################################################################
## General Stuff
################################################################################
from ioUtils import saveJoblib, loadJoblib, saveFile, getFile
import urllib
from urllib.parse import quote
from collections import Counter
from artist import artist
from searchUtils import findExt, findSubExt, findPatternExt, findNearest
from timeUtils import clock, elapsed
from fsUtils import moveFile, setFile, setDir, setSubDir, isFile, isDir, mkDir
from fileUtils import getFileBasics, getBasename
from time import sleep


################################################################################
## Music Stuff
################################################################################

### MultiArtist
from multiArtist import multiartist

### My Music DB
from myMusicDBMap import myMusicDBMap

### Master DB code
from masterdb import discConv, isKnownAlbum, directoryName
from masterdb import getSlimArtistDB, getArtistNameToIDMap
from masterdb import getMusicData, getRowByIndex
from masterdb import masterdb


import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Python: 3.7.3 (default, Mar 27 2019, 16:54:48) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
Notebook Last Run Initiated: 2020-05-27 20:02:54.817441


In [2]:
mdb = myMusicDBMap(debug=True)

Creating myMusicDBMap()
   Loading my music db map: /anaconda3/envs/py37/musicdb/myMusicMap.p
   DB keys: ['Discogs', 'AllMusic', 'MusicBrainz', 'AceBootlegs', 'RateYourMusic', 'LastFM', 'DatPiff', 'RockCorner', 'CDandLP', 'MusicStack', 'MetalStorm']
Counter({'Discogs': 3919, 'AllMusic': 3887, 'MusicBrainz': 3674, 'LastFM': 2777, 'RockCorner': 466, 'DatPiff': 458, 'AceBootlegs': 175, 'CDandLP': 80, 'RateYourMusic': 30, 'MusicStack': 5, 'MetalStorm': 0})


In [7]:
dbdata = {}
keys   = ["Artists", "Artist", "Utils"]

### Discogs
from artistsDC import artistsDC
from artistDC import artistDC
from discogsUtils import discogsUtils
dbinfo = [artistsDC, artistDC, discogsUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["Discogs"] = dbinfo

### AllMusic
from artistsAM import artistsAM
from artistAM import artistAM
from discogsUtils import allmusicUtils
dbinfo = [artistsAM, artistAM, allmusicUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["AllMusic"] = dbinfo

### MusicBrainz
from artistsMB import artistsMB
from artistMB import artistMB
from discogsUtils import musicbrainzUtils
dbinfo = [artistsMB, artistMB, musicbrainzUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["MusicBrainz"] = dbinfo

## AceBootlegs
from artistAB import artistAB
from artistsAB import artistsAB
from discogsUtils import acebootlegsUtils
dbinfo = [artistsAB, artistAB, acebootlegsUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["AceBootlegs"] = dbinfo

## RateYourMusic
from artistRM import artistRM
from artistsRM import artistsRM
from discogsUtils import rateyourmusicUtils
dbinfo = [artistsRM, artistRM, rateyourmusicUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["RateYourMusic"] = dbinfo

## LastFM
from artistLM import artistLM
from artistsLM import artistsLM
from discogsUtils import lastfmUtils
dbinfo = [artistsLM, artistLM, lastfmUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["LastFM"] = dbinfo

## DatPiff
from artistDP import artistDP
from artistsDP import artistsDP
from discogsUtils import datpiffUtils
dbinfo = [artistsDP, artistDP, datpiffUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["DatPiff"] = dbinfo

## RockCorner
from artistRC import artistRC
from artistsRC import artistsRC
from discogsUtils import rockcornerUtils
dbinfo = [artistsRC, artistRC, rockcornerUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["RockCorner"] = dbinfo

## CDandLP
from artistCL import artistCL
from artistsCL import artistsCL
from discogsUtils import cdandlpUtils
dbinfo = [artistsCL, artistCL, cdandlpUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["CDandLP"] = dbinfo

## MusicStack
from artistMS import artistMS
from artistsMS import artistsMS
from discogsUtils import musicstackUtils
dbinfo = [artistsMS, artistMS, musicstackUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["MusicStack"] = dbinfo

## MetalStorm
from artistMT import artistMT
from artistsMT import artistsMT
from discogsUtils import metalstormUtils
dbinfo = [artistsMT, artistMT, metalstormUtils]
dbinfo = dict(zip(keys, dbinfo))
dbdata["MetalStorm"] = dbinfo

## General
from discogsBase import discogs
for db in dbdata.keys():
    print("Creating DB Info For {0}".format(db))
    dbdata[db]["Disc"]    = discogs(db.lower())
    dbdata[db]["Artist"]  = dbdata[db]["Artist"](dbdata[db]["Disc"])
    dbdata[db]["Artists"] = dbdata[db]["Artists"](dbdata[db]["Disc"])
    dbdata[db]["Utils"]   = dbdata[db]["Utils"]()

Creating DB Info For Discogs
Creating DB Info For AllMusic
Creating DB Info For MusicBrainz
Creating DB Info For AceBootlegs
Creating DB Info For RateYourMusic
Creating DB Info For LastFM
Creating DB Info For DatPiff
Creating DB Info For RockCorner
Creating DB Info For CDandLP
Creating DB Info For MusicStack
Creating DB Info For MetalStorm


In [12]:
%load_ext autoreload
%autoreload

dbs = dbdata.keys()
dbs = ["DatPiff", "RockCorner", "CDandLP", "MusicStack", "MetalStorm"]

########################################################################################################
## Loop over DBs
########################################################################################################
for db in dbs:
    print("\n")
    print("="*125)
    print("="*125)
    print("=",db)
    print("="*125)
    print("="*125)
    print("\n")
    
    
    disc  = dbdata[db]["Disc"]
    mymdb = masterdb(db, disc, force=True)
    
    
    #################################
    # Artists
    #################################
    mymdb.createArtistIDMap()
    slimArtistDB      = mymdb.getSlimArtistDB()
    print("All   --> {0}".format(slimArtistDB.shape))
    mymdb.setMyMusicDB(mdb)
    knownSlimArtistDB = mymdb.getKnownSlimArtistDB()
    print("Known --> {0}".format(knownSlimArtistDB.shape))
    
    
    #################################
    # Artist Albums
    #################################
    mymdb.createArtistAlbumIDMap()
    slimArtistAlbumsDB      = mymdb.getArtistAlbumsDB()
    print("All Albums    --> {0}".format(slimArtistAlbumsDB.shape))
    knownSlimArtistAlbumsDB = mymdb.getKnownArtistAlbumsDB()
    print("Known Albums  --> {0}".format(knownSlimArtistAlbumsDB.shape))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


= DatPiff


Current Time is Wed May 27, 2020 20:24:00 for Creating Artist DBs
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/0-Metadata.p  	0 410
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/1-Metadata.p  	1 830
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/10-Metadata.p  	2 1276
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/11-Metadata.p  	3 1750
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/12-Metadata.p  	4 2199
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/13-Metadata.p  	5 2623
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/14-Metadata.p  	6 3073
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/15-Metadata.p  	7 3511
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/16-Metadata.p  	8 3954
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/17-Metadata.p  	9 4381
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/18-Metadata.p  	10 4785
/Volumes/Piggy/Discog/artists-da

  --> This file is 291.1kB.
Saved data to /Volumes/Piggy/Discog/db-datpiff/ArtistIDToRef.p
  --> This file is 291.1kB.
Saving 44209 entries to /Volumes/Piggy/Discog/db-datpiff/ArtistIDToName.p

Saving data to /Volumes/Piggy/Discog/db-datpiff/ArtistIDToName.p
  --> This file is 818.3kB.
Saved data to /Volumes/Piggy/Discog/db-datpiff/ArtistIDToName.p
  --> This file is 818.3kB.
Saving 44209 entries to /Volumes/Piggy/Discog/db-datpiff/ArtistIDToVariations.p

Saving data to /Volumes/Piggy/Discog/db-datpiff/ArtistIDToVariations.p
  --> This file is 956.8kB.
Saved data to /Volumes/Piggy/Discog/db-datpiff/ArtistIDToVariations.p
  --> This file is 956.8kB.
Current Time is Wed May 27, 2020 20:24:05 for Done with Creating Artist DBs
Process [Done with Creating Artist DBs] took 5 seconds.
Current Time is Wed May 27, 2020 20:24:05 for 
Loading ArtistID Data
Creating Pandas DataFrame for 44209 Artists
	Shape --> (44209, 1)
  Finding Real Artist Name
	Shape --> (44209, 3)
  Removing None Artist
	Sha

/Volumes/Piggy/Discog/artists-datpiff-db/metadata/65-MediaMetadata.p 	27817     0         76193     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/66-MediaMetadata.p 	28261     0         79266     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/67-MediaMetadata.p 	28724     0         80773     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/68-MediaMetadata.p 	29159     0         82322     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/69-MediaMetadata.p 	29623     0         84602     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/7-MediaMetadata.p 	30092     0         85502     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/70-MediaMetadata.p 	30511     0         86626     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/71-MediaMetadata.p 	30954     0         87727     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/72-MediaMetadata.p 	31392     0         88514     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/73-MediaMetadata.p 	31846     0         91

/Volumes/Piggy/Discog/artists-datpiff-db/metadata/35-MediaMetadata.p 	13159     0         36793     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/36-MediaMetadata.p 	13589     0         38066     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/37-MediaMetadata.p 	14012     0         38962     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/38-MediaMetadata.p 	14448     0         39901     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/39-MediaMetadata.p 	14900     0         40796     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/4-MediaMetadata.p 	15340     0         41685     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/40-MediaMetadata.p 	15799     0         42841     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/41-MediaMetadata.p 	16257     0         43929     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/42-MediaMetadata.p 	16713     0         44778     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/43-MediaMetadata.p 	17102     0         45

  --> This file is 390.9kB.
Saved data to /Volumes/Piggy/Discog/db-datpiff/ArtistIDToCoreAlbumRefs.p
  --> This file is 390.9kB.
Current Time is Wed May 27, 2020 20:24:30 for Done with Creating Artist DBs
Process [Done with Creating Artist DBs] took 11 seconds.
All Albums    --> (44209, 1)
Current Time is Wed May 27, 2020 20:24:30 for 
Current Time is Wed May 27, 2020 20:24:30 for Creating Artist DBs
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/0-MediaMetadata.p 	410       0         2820      
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/1-MediaMetadata.p 	830       0         3740      
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/10-MediaMetadata.p 	1276      0         4628      
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/11-MediaMetadata.p 	1750      0         5944      
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/12-MediaMetadata.p 	2199      0         7149      
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/13-MediaMetadata.p 	2623      0         833

/Volumes/Piggy/Discog/artists-datpiff-db/metadata/8-MediaMetadata.p 	34812     0         99022     
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/80-MediaMetadata.p 	35246     0         100245    
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/81-MediaMetadata.p 	35671     0         102042    
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/82-MediaMetadata.p 	36122     0         103092    
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/83-MediaMetadata.p 	36558     0         104208    
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/84-MediaMetadata.p 	37002     0         105345    
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/85-MediaMetadata.p 	37484     0         106979    
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/86-MediaMetadata.p 	37923     0         107867    
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/87-MediaMetadata.p 	38364     0         108762    
/Volumes/Piggy/Discog/artists-datpiff-db/metadata/88-MediaMetadata.p 	38798     0         10

/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/88-Metadata.p  	87 1109
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/89-Metadata.p  	88 1117
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/9-Metadata.p  	89 1131
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/90-Metadata.p  	90 1143
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/91-Metadata.p  	91 1153
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/92-Metadata.p  	92 1165
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/93-Metadata.p  	93 1177
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/94-Metadata.p  	94 1195
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/95-Metadata.p  	95 1208
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/96-Metadata.p  	96 1221
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/97-Metadata.p  	97 1236
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/98-Metadata.p  	98 1252
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/99-Metadata.p  	99 1262


/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/57-MediaMetadata.p 	680       7820      47262     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/58-MediaMetadata.p 	690       7919      47935     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/59-MediaMetadata.p 	703       8113      48907     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/6-MediaMetadata.p 	714       8232      49763     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/60-MediaMetadata.p 	730       8436      51004     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/61-MediaMetadata.p 	744       8569      51998     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/62-MediaMetadata.p 	756       8663      52717     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/63-MediaMetadata.p 	773       8765      53613     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/64-MediaMetadata.p 	793       9057      55129     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/65-MediaMeta

/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/30-MediaMetadata.p 	314       3304      21606     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/31-MediaMetadata.p 	330       3528      22813     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/32-MediaMetadata.p 	341       3635      23507     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/33-MediaMetadata.p 	358       3804      24560     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/34-MediaMetadata.p 	378       4029      26072     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/35-MediaMetadata.p 	397       4304      27481     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/36-MediaMetadata.p 	401       4336      27717     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/37-MediaMetadata.p 	411       4413      28300     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/38-MediaMetadata.p 	420       4513      28888     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/39-MediaMet

  --> This file is 2.4MB.
Saved data to /Volumes/Piggy/Discog/db-rockcorner/ArtistIDToAlbumRefs.p
  --> This file is 2.4MB.
Saving 1262 entries to /Volumes/Piggy/Discog/db-rockcorner/ArtistIDToCoreAlbumNames.p

Saving data to /Volumes/Piggy/Discog/db-rockcorner/ArtistIDToCoreAlbumNames.p
  --> This file is 360.0kB.
Saved data to /Volumes/Piggy/Discog/db-rockcorner/ArtistIDToCoreAlbumNames.p
  --> This file is 360.0kB.
Saving 1262 entries to /Volumes/Piggy/Discog/db-rockcorner/ArtistIDToCoreAlbumRefs.p

Saving data to /Volumes/Piggy/Discog/db-rockcorner/ArtistIDToCoreAlbumRefs.p
  --> This file is 393.1kB.
Saved data to /Volumes/Piggy/Discog/db-rockcorner/ArtistIDToCoreAlbumRefs.p
  --> This file is 393.1kB.
Current Time is Wed May 27, 2020 20:24:53 for Done with Creating Artist DBs
Process [Done with Creating Artist DBs] took 4 seconds.
All Albums    --> (0, 0)
Current Time is Wed May 27, 2020 20:24:53 for 
Current Time is Wed May 27, 2020 20:24:53 for Creating Artist DBs
/Volumes/Pigg

/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/71-MediaMetadata.p 	888       10128     61591     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/72-MediaMetadata.p 	904       10308     62680     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/73-MediaMetadata.p 	919       10506     63782     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/74-MediaMetadata.p 	928       10560     64362     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/75-MediaMetadata.p 	941       10699     65220     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/76-MediaMetadata.p 	956       10886     66309     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/77-MediaMetadata.p 	966       10980     66995     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/78-MediaMetadata.p 	977       11125     67670     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/79-MediaMetadata.p 	991       11304     68680     
/Volumes/Piggy/Discog/artists-rockcorner-db/metadata/8-MediaMeta

/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/48-Metadata.p  	43 2147
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/49-Metadata.p  	44 2209
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/5-Metadata.p  	45 2264
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/50-Metadata.p  	46 2312
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/51-Metadata.p  	47 2360
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/52-Metadata.p  	48 2416
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/53-Metadata.p  	49 2467
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/54-Metadata.p  	50 2508
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/55-Metadata.p  	51 2553
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/56-Metadata.p  	52 2594
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/57-Metadata.p  	53 2640
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/58-Metadata.p  	54 2688
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/59-Metadata.p  	55 2732
/Volumes/Piggy/Discog/artists-cdandlp-d

/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/30-MediaMetadata.p 	1211      9429      9429      
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/31-MediaMetadata.p 	1266      9822      9822      
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/32-MediaMetadata.p 	1313      10162     10162     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/33-MediaMetadata.p 	1352      10314     10314     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/34-MediaMetadata.p 	1394      10639     10639     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/35-MediaMetadata.p 	1439      11073     11073     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/36-MediaMetadata.p 	1492      11639     11639     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/37-MediaMetadata.p 	1547      11950     11950     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/38-MediaMetadata.p 	1603      12448     12448     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/39-MediaMetadata.p 	1643      12631     1

  --> This file is 1.3MB.
Saved data to /Volumes/Piggy/Discog/db-cdandlp/ArtistIDToCoreAlbumNames.p
  --> This file is 1.3MB.
Saving 4977 entries to /Volumes/Piggy/Discog/db-cdandlp/ArtistIDToCoreAlbumRefs.p

Saving data to /Volumes/Piggy/Discog/db-cdandlp/ArtistIDToCoreAlbumRefs.p
  --> This file is 1.6MB.
Saved data to /Volumes/Piggy/Discog/db-cdandlp/ArtistIDToCoreAlbumRefs.p
  --> This file is 1.6MB.
Current Time is Wed May 27, 2020 20:25:05 for Done with Creating Artist DBs
Process [Done with Creating Artist DBs] took 4 seconds.
Current Time is Wed May 27, 2020 20:25:05 for Creating Artist DBs
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/0-MediaMetadata.p 	64        409       409       
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/1-MediaMetadata.p 	128       866       866       
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/10-MediaMetadata.p 	189       1349      1349      
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/11-MediaMetadata.p 	234       1735      173

/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/81-MediaMetadata.p 	3991      31758     31758     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/82-MediaMetadata.p 	4046      32177     32177     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/83-MediaMetadata.p 	4097      32474     32474     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/84-MediaMetadata.p 	4146      32871     32871     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/85-MediaMetadata.p 	4199      33295     33295     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/86-MediaMetadata.p 	4252      33677     33677     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/87-MediaMetadata.p 	4297      33976     33976     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/88-MediaMetadata.p 	4343      34275     34275     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/89-MediaMetadata.p 	4396      34752     34752     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/9-MediaMetadata.p 	4445      35201     35

/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/63-MediaMetadata.p 	3012      23564     23564     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/64-MediaMetadata.p 	3061      23937     23937     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/65-MediaMetadata.p 	3118      24509     24509     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/66-MediaMetadata.p 	3170      24762     24762     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/67-MediaMetadata.p 	3221      25099     25099     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/68-MediaMetadata.p 	3266      25543     25543     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/69-MediaMetadata.p 	3313      25988     25988     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/7-MediaMetadata.p 	3362      26414     26414     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/70-MediaMetadata.p 	3397      26658     26658     
/Volumes/Piggy/Discog/artists-cdandlp-db/metadata/71-MediaMetadata.p 	3442      27097     27

Creating Pandas DataFrame for 187 Artists
	Shape --> (187, 1)
  Finding Real Artist Name
	Shape --> (187, 3)
  Removing None Artist
	Shape --> (187, 3)
  Finding Disc Artist Name
	Shape --> (187, 4)
DataFrame Shape is (187, 4)
Current Time is Wed May 27, 2020 20:25:13 for Done with 
Process [Done with 
Saving Master Artist DB File: /Volumes/Piggy/Discog/db-musicstack/MasterSlimArtistDB.p
  --> This file is 4.8kB.
All   --> (187, 4)
Current Time is Wed May 27, 2020 20:25:13 for 
DataFrame Shape is (187, 4)
Found 4084 artist entries
Counter({'Discogs': 3919, 'AllMusic': 3887, 'MusicBrainz': 3674, 'LastFM': 2777, 'RockCorner': 466, 'DatPiff': 458, 'AceBootlegs': 175, 'CDandLP': 80, 'RateYourMusic': 30, 'MusicStack': 5, 'MetalStorm': 0})
DataFrame Shape is (5, 4)
Saving Master Known Artist DB File: /Volumes/Piggy/Discog/db-musicstack/MasterKnownSlimArtistDB.p
  --> This file is 879B.
Known --> (5, 4)
Current Time is Wed May 27, 2020 20:25:13 for Creating Artist DBs
/Volumes/Piggy/Discog/ar

/Volumes/Piggy/Discog/artists-musicstack-db/metadata/58-MediaMetadata.p 	100       0         393       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/59-MediaMetadata.p 	103       0         397       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/6-MediaMetadata.p 	105       0         399       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/60-MediaMetadata.p 	106       0         400       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/61-MediaMetadata.p 	107       0         401       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/62-MediaMetadata.p 	109       0         409       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/63-MediaMetadata.p 	111       0         415       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/64-MediaMetadata.p 	116       0         420       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/65-MediaMetadata.p 	118       0         422       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/66-MediaMeta

/Volumes/Piggy/Discog/artists-musicstack-db/metadata/7-MediaMetadata.p 	127       0         486       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/70-MediaMetadata.p 	130       0         489       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/71-MediaMetadata.p 	133       0         493       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/72-MediaMetadata.p 	134       0         494       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/73-MediaMetadata.p 	139       0         508       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/74-MediaMetadata.p 	142       0         513       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/75-MediaMetadata.p 	144       0         515       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/76-MediaMetadata.p 	145       0         517       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/78-MediaMetadata.p 	146       0         524       
/Volumes/Piggy/Discog/artists-musicstack-db/metadata/79-MediaMeta

In [None]:
mdb = myMusicDBMap(debug=False)
musicmap = mdb.get()

In [None]:
knownSlimArtistDB

In [None]:

    
    

########################################################################################################################
#
# Artist DB
#
########################################################################################################################
def getArtistDB(disc, force=False):
    start, cmt = clock("\n=================================== Creating Artist DB ===================================")
    if force is False:
        print("Using previously created Artist DB")
        discdf = disc.getMasterArtistDiscogsDB()
        elapsed(start, cmt)
        return discdf
    
    print("Loading ArtistID Data")
    artistIDtoName  = Series(disc.getArtistIDToNameData())
    artistIDtoRef   = Series(disc.getArtistIDToRefData())
    artistIDToVariations  = Series(disc.getArtistIDToVariationsData())

    print("Creating Pandas DataFrame for {0} Artists".format(artistIDtoName.shape[0]))
    cols = ["Name"]
    discdf = DataFrame(artistIDtoName)
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("  Joining Ref")
    discdf = discdf.join(DataFrame(artistIDtoRef))
    cols += ["Ref"]
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("  Joining Variations")
    discdf = discdf.join(DataFrame(artistIDToVariations))
    cols += ["Variations"]
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    discdf["Known"] = True
    
    print("  Finding Real Artist Name")
    discdf[["Artist", "Num"]] = DataFrame(discdf['Name'].apply(self.realName).tolist(), index=discdf.index)
    print("\tShape --> {0}".format(discdf.shape))

    

    print("DataFrame Shape is {0}".format(discdf.shape))
    elapsed(start, cmt)

    print("Saving Master Artist DB File")
    saveFilename = disc.getMasterArtistDiscogsDBFilename()
    saveFile(ifile=saveFilename, idata=discdf, debug=False)
    
    return discdf    
    
    
    

########################################################################################################################
#
# Artist Metadata DB
#
########################################################################################################################
def getArtistMetadataDB(disc, force=True):
    start, cmt = clock("\n=================================== Creating Artist Metadata DB ===================================")
    if force is False:
        print("Using previously created Artist Metadata DB")
        discdf = disc.getMasterArtistMetadataDiscogsDB()
        elapsed(start, cmt)
        return discdf
    
    print("Loading ArtistID Data")
    artistIDtoGenre          = Series(disc.getArtistIDToGenreData())
    artistIDtoStyle          = Series(disc.getArtistIDToStyleData())
    artistIDToCollaboration  = Series(disc.getArtistIDToCollaborationData())

    print("Creating Pandas DataFrame for {0} Artists".format(artistIDtoGenre.shape[0]))
    cols = ["Genre"]
    discdf = DataFrame(artistIDtoGenre)
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("  Joining Style")
    discdf = discdf.join(DataFrame(artistIDtoStyle))
    cols += ["Style"]
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("  Joining Collaboration")
    discdf = discdf.join(DataFrame(artistIDToCollaboration))
    cols += ["Collaboration"]
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("DataFrame Shape is {0}".format(discdf.shape))
    elapsed(start, cmt)

    print("Saving Master Artist Metadata DB File")
    saveFilename = disc.getMasterArtistMetadataDiscogsDBFilename()
    saveFile(ifile=saveFilename, idata=discdf, debug=False)
    
    return discdf




########################################################################################################################
#
# Artist Albums DB
#
########################################################################################################################
def getArtistAlbumsDB(disc, loadRefs=False, force=False):
    start, cmt = clock("\n=================================== Creating Artist Albums DB ===================================")
    if force is False:
        print("Using previously created Artist Albums DB")
        discdf = disc.getMasterArtistAlbumsDiscogsDB()
        elapsed(start, cmt)
        return discdf
    
    print("Loading ArtistID Data")
    artistIDtoAlbumNames  = Series(disc.getArtistIDToAlbumNamesData())
    if loadRefs:
        artistIDtoAlbumRefs   = Series(disc.getArtistIDToAlbumRefsData())

    print("Creating Pandas DataFrame for {0} Artists".format(artistIDtoAlbumNames.shape[0]))
    cols = ["Albums"]
    discdf = DataFrame(artistIDtoAlbumNames)
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("DataFrame Shape is {0}".format(discdf.shape))
    
    print("Saving Master Artist Albums DB File")
    saveFilename = disc.getMasterArtistAlbumsDiscogsDBFilename()
    saveFile(ifile=saveFilename, idata=discdf, debug=False)
    
    elapsed(start, cmt)
        
    return discdf






########################################################################################################################
#
# Artist Album Known DB
#
########################################################################################################################
def getArtistAlbumKnownDB(discAlbumDB, discArtistAlbumsDB):
    start, cmt = clock("\n=================================== Creating Artist Album DB ===================================")
    from pandas import Series, DataFrame
    
    idx=discAlbumDB.index
    
    tmpdb = discArtistAlbumsDB["Albums"].copy()
    print("Creating Pandas DataFrame for {0} Arist Albums".format(tmpdb.shape[0]))
    discdf = DataFrame(tmpdb.apply(isKnownAlbum, idx=idx).tolist(), index=tmpdb.index)
    discdf.columns = ["Known Albums", "All Albums", "Albums"]
    print("\tShape --> {0}".format(discdf.shape))
    
    print("DataFrame Shape is {0}".format(discdf.shape))
    elapsed(start, cmt)
    
    return discdf
    
def isKnownAlbum(x, **kwargs):
    retval = {}
    albumSummary = [0, 0]
    for mediaType in x.keys():
        for albumID in x[mediaType].keys():
            albumName = x[mediaType][albumID]
            #print(mediaType,albumID,albumName,'\t\t',end="")
            known     = albumID in kwargs['idx']
            #print(known)
            
            retval[albumID] = [albumName, mediaType, known]
            albumSummary[0] += known
            albumSummary[1] += 1
            
    return [albumSummary[0], albumSummary[1], retval]





########################################################################################################################
#
# Album DB
#
########################################################################################################################
def getAlbumDB(disc):
    start, cmt = clock("\n=================================== Creating Artist Album DB ===================================")
    from pandas import Series, DataFrame
    print("Loading AlbumID Data")
    albumIDtoName    = Series(disc.getAlbumIDToNameData())
    albumIDtoRef     = Series(disc.getAlbumIDToRefData())
    albumIDToArtists = Series(disc.getAlbumIDToArtistsData())

    print("Creating Pandas DataFrame for {0} Albums".format(albumIDtoName.shape[0]))
    cols = ["Name"]
    discdf = DataFrame(albumIDtoName)
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("  Joining Ref")
    discdf = discdf.join(DataFrame(albumIDtoRef))
    cols += ["Ref"]
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("  Joining Artists")
    discdf = discdf.join(DataFrame(albumIDToArtists))
    cols += ["Artists"]
    discdf.columns = cols
    print("\tShape --> {0}".format(discdf.shape))

    print("DataFrame Shape is {0}".format(discdf.shape))
    elapsed(start, cmt)
    
    return discdf





########################################################################################################################
#
# Master DB Join
#
########################################################################################################################
def createMasterDB(disc, discArtistDB, discArtistMetadataDB, discArtistAlbumKnownDB):
    start, cmt = clock("\n=================================== Creating Artist ID DB ===================================")
    print("Creating Pandas DataFrame for {0} Arist IDs".format(discArtistDB.shape[0]))
    print("  Joining Artist Metadata")
    discdf = discArtistDB.join(discArtistMetadataDB)
    print("\tShape --> {0}".format(discdf.shape))
    print("  Joining Artist Albums")
    discdf = discdf.join(discArtistAlbumKnownDB)
    print("\tShape --> {0}".format(discdf.shape))
    elapsed(start, cmt)

    savename = disc.getMasterDiscogsDBFilename()
    saveFile(idata=discdf, ifile=savename, debug=True)
    
    
    
    
    
    
########################################################################################################################
#
# Associated Functions
#
########################################################################################################################
def directoryName(x):
    if x is None:
        return x
    if "..." in x:
        x = x.replace("...", "")
    if "/" in x:
        x = x.replace("/", "-")
    return x

def realName(x):
    if x is None:
        return [None,-1]
    
    lenx = len(x)
    if len(x) < 1:
        return [x,-1]

    if x[-1] != ")":
        return [x, None]
    

    if lenx >=5:
        if x[-3] == "(":
            try:
                num = int(x[-2:-1])
                val = x[:-3].strip()
                return [val, num]
            except:
                return [x, None]
            
    if lenx >= 6:
        if x[-4] == "(":
            try:
                num = int(x[-3:-1])
                val = x[:-4].strip()
                return [val, num]
            except:
                return [x, None]
            
    if lenx >= 7:
        if x[-4] == "(":
            try:
                num = int(x[-3:-1])
                val = x[:-4].strip()
                return [val, num]
            except:
                return [x, None]

    return [x, None]

def discConv(x):
    if x is None:
        return ""
    x = x.replace("/", "-")
    x = x.replace("¡", "")
    while x.startswith(".") and len(x) > 1:
        x = x[1:]
    x = x.strip()
    return x

def cleanMB(x):
    pos = [x.rfind("(")+1, x.rfind(")")]
    if sum([p > 0 for p in pos]) != len(pos):
        return x
    parval = x[pos[0]:pos[1]]
    return x[:pos[0]-2].strip()

# Create Dictionary Lookup Files

## Artist ID --> Ref and Name

In [None]:
%load_ext autoreload
%autoreload

from lookup import createArtistIDMap
from masterdb import getSlimArtistDB
from lookup import createArtistAlbumIDMap
from masterdb import getArtistAlbumsDB

dbs = dbdata.keys()
dbs = ["AllMusic"]

########################################################################################################
## Loop over DBs
########################################################################################################
for db in dbs:
    print("\n")
    print("="*125)
    print("="*125)
    print("=",db)
    print("="*125)
    print("="*125)
    print("\n")
    
    
    ####################################################################################################
    ## Create Artist ID Lookup
    ####################################################################################################
    createArtistIDMap(dbdata[db]["Disc"])
    
    
    ####################################################################################################
    ## Create Artist ID Pandas DataFrame
    ####################################################################################################
    discSlimArtistAMDB      = getSlimArtistDB(dbdata[db]["Disc"], force=True)
    discKnownSlimArtistAMDB = getKnownSlimArtistDB(dbdata[db]["Disc"], mydb, force=True)


    ####################################################################################################
    ## Create Artist ID to Album ID Lookup
    ####################################################################################################
    createArtistAlbumIDMap(dbdata[db]["Disc"])


    ####################################################################################################
    ## Create Artist ID Pandas DataFrame
    ####################################################################################################
    discArtistAMAlbumsDB = getArtistAlbumsDB(dbdata[db]["Disc"], force=True)

In [None]:
%load_ext autoreload
%autoreload
from myMusicDBMap import myMusicDBMap
from artistDB import artistDB

mdb = myMusicDBMap(debug=True)
mydb = mdb.get()

***
***
***

## Artist ID --> Genre, Style, Artists Lookup Table

In [None]:
## Basic stuff
%load_ext autoreload
%autoreload
from lookup import createArtistMetadataMap
createArtistMetadataMap(discAM)

In [None]:
## Basic stuff
%load_ext autoreload
%autoreload
from lookup import createArtistMetadataMap
createArtistMetadataMap(discDC)

## Album ID --> Name, Ref, Artists Lookup Table

In [None]:
## Basic stuff
%load_ext autoreload
%autoreload
from lookup import createAlbumIDMap
createAlbumIDMap(disc)

# Master Lookup Tests

In [None]:
%load_ext autoreload
%autoreload
from lookup import testLookupMaps
testLookupMaps(disc)

In [None]:

discArtistLMAlbumsDB = getArtistAlbumsDB(discRM, force=False)

In [None]:
from collections import Counter
cntr = Counter()
for artistData in discArtistLMAlbumsDB["Albums"]:
    for key in artistData.keys():
        cntr[key] += 1
cntr

# Pandas DB

## Slim Artist DB

In [None]:
from masterdb import getSlimArtistDB
discSlimArtistDB = getSlimArtistDB(disc)
discSlimArtistDB.head()

## Artist DB

In [None]:
from masterdb import getArtistDB

In [None]:
discArtistDB = getArtistDB(disc)
discArtistDB.head()

## Artist Metadata DB

In [None]:
from masterdb import getArtistMetadataDB

In [None]:
discArtistMetadataDB = getArtistMetadataDB(disc)
discArtistMetadataDB.head()

## Artist Albums DB

In [None]:
from masterdb import getArtistAlbumsDB

In [None]:
discArtistAlbumsDB = getArtistAlbumsDB(disc, force=False)
discArtistAlbumsDB.head()

In [None]:
discArtistAlbumsDB = getArtistAlbumsDB(disc)
discArtistAlbumsDB.head()

## Albums DB

In [None]:
from masterdb import getAlbumDB

In [None]:
discAlbumDB = getAlbumDB(disc)
discAlbumDB.head()

## Artist Album ID --> Known Albums

In [None]:
from masterdb import getArtistAlbumKnownDB

In [None]:
discArtistAlbumKnownDB = getArtistAlbumKnownDB(discAlbumDB, discArtistAlbumsDB)
discArtistAlbumKnownDB.head()

# Joining Artist ID DataFrame

In [None]:
from masterdb import createMasterDB

In [None]:
createMasterDB(disc, discArtistDB, discArtistMetadataDB, discArtistAlbumKnownDB)

In [None]:
from random import random

# Download Artist Data

In [None]:
artistName = "Killin' Baudelaire"

In [None]:
dbsToGet = ["AllMusic", "Discogs", "MusicBrainz", "LastFM", "RockCorner", "CDandLP"]
searches = [dbdata["AllMusic"]["Artists"].searchAllMusicForArtist, dbdata["Discogs"]["Artists"].searchDiscogForArtist,
            dbdata["MusicBrainz"]["Artists"].searchMusicBrainzForArtist, dbdata["LastFM"]["Artists"].searchLastFMForArtist,
            dbdata["RockCorner"]["Artists"].searchRockCornerForArtist, dbdata["CDandLP"]["Artists"].searchCDandLPForArtist]

dbsToGet = ["AllMusic"]
searches = [dbdata["AllMusic"]["Artists"].searchAllMusicForArtist]

In [None]:
for i,artistName in enumerate(mdb.getArtists()):
        if artistName == "Riff Raff":
            print(i)
            break

In [None]:
def searchAll(mdb, minI=-1):
    nArtists = len(mdb.getArtists())
    for i,artistName in enumerate(mdb.getArtists()):
        if i <= minI:
            continue

        print('\n\n\n')
        print('-'*130)
        print(i,'/',nArtists,'===>\t',artistName)
        print('-'*130)
        print('\n')
        
        dbdata = mdb.getArtistData(artistName)
        if all([dbdata.get("LastFM"), dbdata.get("Discogs"), dbdata.get("AllMusic"), dbdata.get("MusicBrainz")]):
            print("{0} is well known.".format(artistName))
            continue
        else:
            print("{0} get it!".format(artistName))

        for search in searches:
            print("====>",search)
            try:
                search(artistName)
            except:
                sleep(3)

In [None]:
searchAll(mdb, 2383)

In [None]:
# 3404