# Global

In [1]:
## Basic stuff
%load_ext autoreload
%autoreload

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))
#IPython.Cell.options_default.cm_config.lineNumbers = true;

################################################################################
## Python Version
################################################################################
import sys


################################################################################
## General Stuff
################################################################################
from multiprocessing import Pool
from tqdm import tqdm


################################################################################
## Util Stuff
################################################################################
from timeUtils import clock, elapsed
from ioUtils import saveFile, getFile


################################################################################
## Music DB
################################################################################
from mainDB import mainDB
from musicDBMap import musicDBMap
from masterDBMatchClass import masterDBMatchClass
from matchDBArtist import matchDBArtist


################################################################################
## Music Names
################################################################################
from masterArtistNameDB import masterArtistNameDB


################################################################################
## Chart Stuff
################################################################################
from artistIgnores import getArtistIgnores
from billboardData import billboardData
from top40Data import top40Data
from spotifyData import spotifyData
from chartArtistAlbumData import chartArtistAlbumData
from chartUtils import *
from extraArtists import extraKnownArtists


################################################################################
## Pandas Stuff
################################################################################
import pandas as pd
from pandas import DataFrame
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

chartType = None

print("Python: {0}".format(sys.version))
import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))



Python: 3.7.7 (default, Mar 26 2020, 10:32:53) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
Notebook Last Run Initiated: 2021-04-10 20:20:37.319402


# Charts To Merge

In [2]:
chartsToMerge = ["RYMList", "RYMList2", "RYMAlbum", "RYMSong", "Billboard", "BillboardYE", "MusicVF"] #, "Spotify", "SpotifyViral"]

In [3]:
mdbmaps = {}
for chartType in chartsToMerge:
    mdbmaps[chartType] = musicDBMap(chartType)

Could not determine music db source type for db [RYMList]
  Valid DBs: ['Discogs', 'AllMusic', 'MusicBrainz', 'AceBootlegs', 'RateYourMusic', 'LastFM', 'DatPiff', 'RockCorner', 'CDandLP', 'MusicStack', 'MetalStorm', 'Deezer', 'AppleMusic', 'AlbumOfTheYear', 'Genius', 'IHeart', 'KWorbSpotify', 'KWorbiTunes', 'KWorbYouTube']
  Loaded 31190 previously matched entries
Could not determine music db source type for db [RYMList2]
  Valid DBs: ['Discogs', 'AllMusic', 'MusicBrainz', 'AceBootlegs', 'RateYourMusic', 'LastFM', 'DatPiff', 'RockCorner', 'CDandLP', 'MusicStack', 'MetalStorm', 'Deezer', 'AppleMusic', 'AlbumOfTheYear', 'Genius', 'IHeart', 'KWorbSpotify', 'KWorbiTunes', 'KWorbYouTube']
  Loaded 22237 previously matched entries
Could not determine music db source type for db [RYMAlbum]
  Valid DBs: ['Discogs', 'AllMusic', 'MusicBrainz', 'AceBootlegs', 'RateYourMusic', 'LastFM', 'DatPiff', 'RockCorner', 'CDandLP', 'MusicStack', 'MetalStorm', 'Deezer', 'AppleMusic', 'AlbumOfTheYear', 'Geniu

In [4]:
dbs = ['Discogs', 'AllMusic', 'MusicBrainz', 'AceBootlegs', 'RateYourMusic', 'LastFM', 'DatPiff', 'RockCorner', 'CDandLP', 'MusicStack', 'MetalStorm', 'Deezer', 'AppleMusic', 'AlbumOfTheYear', 'Genius', 'IHeart', 'KWorbSpotify', 'KWorbiTunes', 'KWorbYouTube']

In [7]:
dbToChartMap   = {db: {} for db in dbs}
nameToChartMap = {}
for chartType, mdbmap in mdbmaps.items():
    for primaryKey, primaryData in mdbmap.get().items():
        artistName = primaryData.artistName
        for db,dbData in primaryData.get().items():
            
            if dbToChartMap.get(db) is None:
                raise ValueError("Unknown DB [{0}]".format(db))
                
            dbID,name = dbData.get()
            if dbID is None:
                continue
            
            if dbToChartMap[db].get(dbID) is None:
                dbToChartMap[db][dbID] = {}
            
            if dbToChartMap[db][dbID].get(chartType) is None:
                dbToChartMap[db][dbID][chartType] = {}
            dbToChartMap[db][dbID][chartType][primaryKey] = artistName

In [21]:
## Check For Duplicates
dbToChartReduceMap = {}
for db,dbData in dbToChartMap.items():
    dbToChartReduceMap[db] = {}
    for dbID,dbIDData in dbData.items():
        dbToChartReduceMap[db][dbID] = {"Artist": None, "Charts": None}
        artistNames  = []
        chartData    = {}
        for chartType,chartTypeData in dbIDData.items():
            if len(chartTypeData) > 1:
                print("MultiMatches [{0} , {1}, {2}] --> {3}".format(db, dbID, chartType, chartTypeData))
                continue
            artistNames += chartTypeData.values()
            chartData[chartType] = list(chartTypeData.keys())[0]
            
        artistNames = list(set(artistNames))
        if len(chartTypeData) > 1:
            print("MultiMatches [{0} , {1}] --> {2}".format(db, dbID, [x.encode('UTF-8') for x in artistNames]))
            continue
            
        artistName = artistNames[0]
        dbToChartReduceMap[db][dbID]["Artist"] = artistName
        dbToChartReduceMap[db][dbID]["Charts"] = chartData
        
#dbToChartMap["Discogs"]

MultiMatches [RateYourMusic , 780487, MusicVF] --> {'63a10f4666a92c1070800215eefc5a55': 'Mike Will Made-It', '6d693f3875f83d8c0bcb1c7e405a51ef': 'Mike Will Made‐It'}
MultiMatches [RateYourMusic , 780487] --> [b'Mike Will Made\xe2\x80\x90It']


In [42]:
artistNameToChartData = {}
dbIDMap     = {}
dbIDNameMap = {}
for db,dbData in dbToChartReduceMap.items():
    
    for dbID,dbIDData in dbData.items():
        artistName = dbIDData["Artist"]
        chartData  = dbIDData["Charts"]
        
        if artistNameToChartData.get(artistName) is None:            
            artistNameToChartData[artistName] = []
                    
        key   = tuple([db,dbID])
        value = chartData
        if dbIDMap.get(key) is None:
            dbIDMap[key] = value
        else:
            if dbIDMap[key] != chartData:
                raise ValueError("Multiple Matches: {0} --> {1}".format(key,chartData))
                
        if dbIDNameMap.get(key) is None:
            dbIDNameMap[key] = artistName
        else:
            if dbIDNameMap[key] != artistName:
                raise ValueError("Multiple Matches: {0} --> {1}".format(key,chartData))
                
        artistNameToChartData[artistName].append(key)

In [31]:
artistNameToChartData["Miles Davis"]

{('Discogs', '23755'): True,
 ('AllMusic', '0000423829'): True,
 ('MusicBrainz', '141055994295392496176880148870057252838'): True,
 ('RateYourMusic', '785'): True,
 ('AlbumOfTheYear', '3360'): True}

In [47]:
from pandas import Series
dbIDToNameDF = DataFrame(Series(dbIDNameMap))
dbIDToNameDF.columns = ["Artist"]
dbIDToNameDF.head()

Unnamed: 0,Unnamed: 1,Artist
Discogs,23755,Miles Davis
Discogs,59792,Bob Dylan
Discogs,82730,The Beatles
Discogs,70829,The Beach Boys
Discogs,252864,Belle and Sebastian


In [46]:
dbIDToChartDF = DataFrame(dbIDMap).T
dbIDToChartDF.head()

Unnamed: 0,Unnamed: 1,RYMList,RYMList2,RYMAlbum,Billboard,BillboardYE,RYMSong,MusicVF
Discogs,23755,e7f9fe3b71b3474b737a64c667d74530,07530386a37deac80e1c491ba603a5b6,f4999f60ec916c12891eff40c31d4a69,60d5ae12da0733ebc2c9497dba186951,4a066a51fdf9a2cdb5d640837f3ed3fb,,
Discogs,59792,aabf82fcd82cc26cbf77e9df8128a349,aff0a251e573e22e65a4b5652f4c0dbe,42b35a0818fa535e8aee4c4de318ca70,e1c3f13077e0ab8acde48ec3b255af01,92b1f56b2df3c141b4ebe1e03441051e,4311114ef8d1ee811ca7b5136b8fb734,224101aa64a5fe6747a9b4f261240bed
Discogs,82730,27b3126f47f9f3259f6bb47cea760c6c,e4396ada3e287b81ed9e555d1ff6adf4,b41739197511c409ef71de5fc249dc2d,7dd7a97eae84db630d40c1e78383c026,7f89ac98a1e144863c7f31c7fa95833f,782f0b2b5030b173f4bd2607129a2b9b,59bd3ca73f5b22fabdcccb6295eddd3b
Discogs,70829,289bf386e6b6143b624d42546df76031,9a36a9b7c9e7c223465142fc4420be3c,a9f079f98afa68efc020414910c66e80,588d4154ba2a18f00aadf72efac2300e,eb7ecdd5b4d21d153fa0d92e4de79bbb,be67c61f4672f11074fca6bc7b828894,85571ff52d8de582194608a12451509d
Discogs,252864,25850d1d7a0e23995adae8b91dea4fa6,1b7a136be23d384f31f1a29abecb976c,f576a477dd2d2928b8651775cfb3b0cf,964e5edd7ec2f8d46cdefd7986f8fe24,,ca6bb057f0430258311420182f234ada,


In [48]:
testDF = dbIDToNameDF.join(dbIDToChartDF)

In [69]:
finalDF = {}
for i,(key,df) in enumerate(testDF.groupby("Artist")):
    print("Name ======>",key)
    for key,val in df.to_dict().items():
        print(key,val)
        break
    #finalDF[key] = df

Artist {('Discogs', '832951'): ' Sir Charles Mackerras'}
Artist {('Discogs', '207714'): '!!!'}
Artist {('Discogs', '1089410'): '!T.O.O.H.!', ('MusicBrainz', '283975264308323537535561499987595197088'): '!T.O.O.H.!'}
Artist {('LastFM', '80175561397'): '"14"'}
Artist {('Discogs', '294084'): '"Blue" Gene Tyranny', ('AllMusic', '0000252395'): '"Blue" Gene Tyranny', ('LastFM', '67256244140'): '"Blue" Gene Tyranny'}
Artist {('LastFM', '93498257961'): '"Jack" Haney'}
Artist {('Discogs', '866294'): '"Jojo" Takayanagi Second Concept'}
Artist {('Discogs', '86339'): '"Little Esther" Phillips', ('AllMusic', '0000271029'): '"Little Esther" Phillips', ('MusicBrainz', '190860821552551031001723278987761866026'): '"Little Esther" Phillips', ('AlbumOfTheYear', '6646'): '"Little Esther" Phillips'}
Artist {('AllMusic', '0000391908'): '"New G" New Generation Chorale'}
Artist {('LastFM', '67856307011'): '"Nikiter" Armstrong'}
Artist {('Discogs', '307314'): '"Texas" Alexander'}
Artist {('Discogs', '259422'): 

Artist {('MusicBrainz', '158949027472524805519613719066648547611'): '24kGoldn', ('LastFM', '29117329801'): '24kGoldn', ('AlbumOfTheYear', '69466'): '24kGoldn', ('KWorbSpotify', '110999936677'): '24kGoldn', ('KWorbiTunes', '549631774237'): '24kGoldn'}
Artist {('AllMusic', '0002932256'): '25 Yard Screamer', ('AlbumOfTheYear', '6055'): '25 Yard Screamer'}
Artist {('Discogs', '3678307'): '25,000 Kittens', ('MusicBrainz', '60973970084785860059193779019408892688'): '25,000 Kittens', ('LastFM', '5901680847'): '25,000 Kittens', ('AlbumOfTheYear', '42980'): '25,000 Kittens'}
Artist {('Discogs', '845246'): '2562', ('AllMusic', '0000984874'): '2562'}
Artist {('MusicBrainz', '337388777924509155166871393767170111720'): '257ers'}
Artist {('Discogs', '73103'): '27', ('MusicBrainz', '157433986779275525195684927656843676432'): '27', ('RateYourMusic', '6517'): '27'}
Artist {('Discogs', '273374'): '28 Days', ('MusicBrainz', '288779373034910373211706961404926897847'): '28 Days'}
Artist {('Discogs', '44170

Artist {('LastFM', '26817026879'): '5ive O Posse'}
Artist {('AllMusic', '0000568639'): '5ive Style'}
Artist {('LastFM', '58702540390'): '5lack', ('Deezer', '5198408'): '5lack'}
Artist {('RateYourMusic', '654350'): '5th Avenue Presbyterian Church Choir'}
Artist {('Discogs', '213498'): '5th Ward Boyz', ('AllMusic', '0000572577'): '5th Ward Boyz'}
Artist {('Discogs', '767240'): '5th Ward Juvenilez', ('MusicBrainz', '223034696292726214223993078969222743936'): '5th Ward Juvenilez'}
Artist {('MusicBrainz', '307212029285399366059949615405615723548'): '5th Ward Weebie'}
Artist {('AllMusic', '0000573065'): "5uu's", ('MusicBrainz', '251102338457633047957679218627513444286'): "5uu's", ('LastFM', '84429005729'): "5uu's"}
Artist {('RateYourMusic', '467271'): '6 Tre G'}
Artist {('Discogs', '5676160'): '6 dogs'}
Artist {('RateYourMusic', '752927'): '6.3'}
Artist {('Discogs', '151835'): '60 Second Assassin', ('AllMusic', '0000481155'): '60 Second Assassin'}
Artist {('Discogs', '6568698'): "60's", ('De

Artist {('Discogs', '1346892'): 'A Piedi Nudi'}
Artist {('Discogs', '875613'): 'A Place to Bury Strangers', ('AllMusic', '0000732870'): 'A Place to Bury Strangers'}
Artist {('Discogs', '1152274'): 'A Plea for Purging'}
Artist {('Discogs', '44618'): 'A Primary Industry', ('AllMusic', '0001356722'): 'A Primary Industry', ('MusicBrainz', '230136278466131075272954096623569487681'): 'A Primary Industry'}
Artist {('Discogs', '1636428'): 'A Raincoat', ('AllMusic', '0001375523'): 'A Raincoat'}
Artist {('Discogs', '1163548'): 'A Sei Voci', ('AllMusic', '0002153470'): 'A Sei Voci'}
Artist {('Discogs', '732454'): 'A Shoreline Dream', ('AllMusic', '0001918900'): 'A Shoreline Dream', ('MusicBrainz', '81904624480186207284499881643813445965'): 'A Shoreline Dream'}
Artist {('Discogs', '15411'): 'A Small Good Thing', ('AllMusic', '0000575761'): 'A Small Good Thing'}
Artist {('Discogs', '3201440'): 'A Small World', ('AllMusic', '0002116928'): 'A Small World', ('LastFM', '50872814661'): 'A Small World'}


Artist {('Deezer', '490945'): 'AP.9'}
Artist {('Discogs', '1551686'): 'APB', ('AllMusic', '0003283400'): 'APB', ('MusicBrainz', '272539449819852041977203731211055316274'): 'APB'}
Artist {('LastFM', '13130926421'): 'APB-All Points Bulletin'}
Artist {('AllMusic', '0003867581'): 'AQXDM', ('MusicBrainz', '273093718482364702897409663193132130162'): 'AQXDM', ('Deezer', '14539137'): 'AQXDM', ('AlbumOfTheYear', '43984'): 'AQXDM'}
Artist {('AlbumOfTheYear', '78423'): 'ARC'}
Artist {('Discogs', '7783278'): 'ARIAZ', ('MusicBrainz', '169392138107614293215770905309513264845'): 'ARIAZ', ('LastFM', '53544006404'): 'ARIAZ', ('AlbumOfTheYear', '63642'): 'ARIAZ'}
Artist {('MusicBrainz', '54239465042997003563476775222009423249'): 'ARRM', ('AlbumOfTheYear', '49030'): 'ARRM'}
Artist {('MusicBrainz', '144097712234785347207074846564808009223'): 'ASA-CHANG'}
Artist {('Discogs', '13667'): 'ASC', ('AllMusic', '0000923614'): 'ASC'}
Artist {('MusicBrainz', '185192647514767128106471684661522145261'): 'ASCA', ('Dee

Artist {('AllMusic', '0001713318'): 'Abraham Adzenyah', ('LastFM', '94635672254'): 'Abraham Adzenyah'}
Artist {('Discogs', '3546663'): 'Abraham Mateo', ('AllMusic', '0002599597'): 'Abraham Mateo', ('MusicBrainz', '44351770390428033814274277988131796353'): 'Abraham Mateo', ('Deezer', '320004'): 'Abraham Mateo', ('AlbumOfTheYear', '74325'): 'Abraham Mateo', ('KWorbSpotify', '232529438773'): 'Abraham Mateo', ('KWorbYouTube', '779469293781'): 'Abraham Mateo'}
Artist {('Discogs', '1965741'): "Abraham's Children", ('MusicBrainz', '333232751906096515293437344411000276668'): "Abraham's Children"}
Artist {('Deezer', '14284853'): 'Abrahamm'}
Artist {('MusicBrainz', '17340713794754252378798343719198368536'): 'Abrasive Wheels'}
Artist {('AllMusic', '0001761001'): 'Abrax'}
Artist {('RateYourMusic', '39485'): 'Abraxas'}
Artist {('Discogs', '1430605'): 'Abraxas Pool', ('AllMusic', '0000591217'): 'Abraxas Pool', ('RateYourMusic', '54819'): 'Abraxas Pool'}
Artist {('AllMusic', '0001369327'): 'Abraxis',

Artist {('Discogs', '1365711'): 'Ad Noctum', ('MusicBrainz', '13062651870279183714139504088150408207'): 'Ad Noctum', ('RateYourMusic', '121729'): 'Ad Noctum', ('LastFM', '15672026561'): 'Ad Noctum', ('Deezer', '56708992'): 'Ad Noctum'}
Artist {('Discogs', '84791'): 'Ad Rock'}
Artist {('Discogs', '153641'): 'Ad Visser', ('MusicBrainz', '149861856933312302293069309815009719892'): 'Ad Visser', ('RateYourMusic', '26494'): 'Ad Visser'}
Artist {('AllMusic', '0000848214'): 'Ada'}
Artist {('LastFM', '59682769856'): 'Ada Jones & Walter Van Brunt'}
Artist {('Discogs', '4076459'): 'Ada Jones and Billy Murray', ('RateYourMusic', '243239'): 'Ada Jones and Billy Murray'}
Artist {('RateYourMusic', '222882'): 'Ada Jones and Len Spencer'}
Artist {('Discogs', '1154119'): 'Ada Milea', ('MusicBrainz', '203491786426686838312436342125491299657'): 'Ada Milea', ('LastFM', '13092961997'): 'Ada Milea'}
Artist {('Discogs', '1320627'): 'Ada Moore'}
Artist {('MusicBrainz', '320219849315329967243728113636348504638'

Artist {('LastFM', '61897911361'): 'Adrián Milena'}
Artist {('Discogs', '3685318'): 'Adult Jazz', ('AllMusic', '0003270019'): 'Adult Jazz', ('MusicBrainz', '153470860893522743244627609950107335890'): 'Adult Jazz', ('LastFM', '46606565138'): 'Adult Jazz', ('Deezer', '5898139'): 'Adult Jazz', ('AlbumOfTheYear', '7506'): 'Adult Jazz'}
Artist {('Discogs', '474978'): 'Adult Net', ('AllMusic', '0000599608'): 'Adult Net'}
Artist {('Discogs', '2374812'): 'Advance Base', ('AllMusic', '0002893429'): 'Advance Base', ('MusicBrainz', '299574860002915625234509673058259747060'): 'Advance Base'}
Artist {('Discogs', '1685100'): 'Advantage Lucy', ('AllMusic', '0002078338'): 'Advantage Lucy', ('MusicBrainz', '304541090067623160258770372989948925693'): 'Advantage Lucy', ('LastFM', '59349747227'): 'Advantage Lucy', ('Deezer', '13772879'): 'Advantage Lucy', ('AlbumOfTheYear', '47691'): 'Advantage Lucy'}
Artist {('Discogs', '139004'): 'Adventures'}
Artist {('Discogs', '2172350'): 'Adversarial'}
Artist {('Dis

Artist {('Discogs', '2117781'): 'Agentz'}
Artist {('Discogs', '460925'): 'Agepê', ('AllMusic', '0000601254'): 'Agepê'}
Artist {('Discogs', '4003735'): 'Ager Sonus', ('AllMusic', '0003668717'): 'Ager Sonus', ('MusicBrainz', '9814488176010569833350714005711175815'): 'Ager Sonus'}
Artist {('AllMusic', '0002615582'): 'Ages and Ages', ('MusicBrainz', '149764824100573561061478007453286883046'): 'Ages and Ages'}
Artist {('Discogs', '689755'): 'Aggie Dukes', ('MusicBrainz', '103305000110857732384222397300194285883'): 'Aggie Dukes'}
Artist {('Discogs', '7490911'): 'Aggression Tales', ('LastFM', '78888898377'): 'Aggression Tales', ('Deezer', '4732552'): 'Aggression Tales'}
Artist {('LastFM', '25293517325'): 'Aggressive'}
Artist {('Discogs', '741616'): 'Aggro Knuckle'}
Artist {('Discogs', '1858461'): 'Aggro Santos', ('MusicBrainz', '224268910826951897433719380480819007536'): 'Aggro Santos'}
Artist {('RateYourMusic', '1408187'): 'Aggromance'}
Artist {('AllMusic', '0002006263'): 'Agharta', ('LastFM

Artist {('Discogs', '202206'): 'Akio Suzuki'}
Artist {('RateYourMusic', '1351205'): 'Akira Fujiwara'}
Artist {('Discogs', '1086917'): 'Akira Fuse', ('RateYourMusic', '346947'): 'Akira Fuse'}
Artist {('Discogs', '262765'): 'Akira Ifukube', ('AllMusic', '0000125752'): 'Akira Ifukube'}
Artist {('Discogs', '3079889'): 'Akira Imamura', ('LastFM', '7294929093'): 'Akira Imamura'}
Artist {('RateYourMusic', '64642'): 'Akira Inoue'}
Artist {('MusicBrainz', '165327424659277358839237847520176837525'): 'Akira Ishii', ('LastFM', '27204915484'): 'Akira Ishii', ('Deezer', '1397588'): 'Akira Ishii'}
Artist {('Discogs', '259139'): 'Akira Ishikawa', ('AllMusic', '0001624183'): 'Akira Ishikawa'}
Artist {('AllMusic', '0003950383'): 'Akira Ishikawa & the Jazz Rock Band'}
Artist {('Discogs', '729957'): 'Akira Ito'}
Artist {('Discogs', '946235'): 'Akira Miyazawa', ('RateYourMusic', '775922'): 'Akira Miyazawa'}
Artist {('Discogs', '546128'): 'Akira Okazawa'}
Artist {('Discogs', '53823'): 'Akira Rabelais', ('Al

Artist {('Discogs', '150767'): 'Alan Cook'}
Artist {('Discogs', '40072'): 'Alan Coulthard', ('AllMusic', '0002391124'): 'Alan Coulthard', ('MusicBrainz', '112930550071548156311177407482905454851'): 'Alan Coulthard', ('LastFM', '36484540346'): 'Alan Coulthard'}
Artist {('Discogs', '1380414'): 'Alan Dale'}
Artist {('Discogs', '396572'): 'Alan David'}
Artist {('MusicBrainz', '172188801050529948096513035323351121479'): 'Alan Dean & His Problems', ('LastFM', '64665324079'): 'Alan Dean & His Problems', ('Deezer', '10944990'): 'Alan Dean & His Problems'}
Artist {('RateYourMusic', '188670'): 'Alan Drew'}
Artist {('Discogs', '355859'): 'Alan Feanch', ('MusicBrainz', '131202367889383536892536745578763843298'): 'Alan Feanch'}
Artist {('Deezer', '8049965'): 'Alan Franklin'}
Artist {('Discogs', '598246'): 'Alan Freed'}
Artist {('Discogs', '299211'): 'Alan Gowen'}
Artist {('Discogs', '171886'): 'Alan Howarth', ('MusicBrainz', '137591329083763912205977780793194244145'): 'Alan Howarth'}
Artist {('Disc

Artist {('Discogs', '226672'): 'Alboth!'}
Artist {('Discogs', '577434'): 'Albrecht/d.', ('MusicBrainz', '191808748944063756793082472942255291209'): 'Albrecht/d.', ('RateYourMusic', '252867'): 'Albrecht/d.'}
Artist {('Discogs', '652302'): 'Albro T. Gaul'}
Artist {('Discogs', '5027782'): 'Albuen', ('MusicBrainz', '288143545507931420953159827437253554265'): 'Albuen', ('LastFM', '49698500324'): 'Albuen'}
Artist {('RateYourMusic', '1253776'): 'Albums in One Minute'}
Artist {('Deezer', '2633'): 'Alcatraz'}
Artist {('Discogs', '252174'): 'Alcatrazz'}
Artist {('Discogs', '52222'): 'Alcazar', ('MusicBrainz', '16597937954995229294914573728505120384'): 'Alcazar', ('AlbumOfTheYear', '54430'): 'Alcazar', ('KWorbSpotify', '761029405228'): 'Alcazar'}
Artist {('Discogs', '4801975'): 'Alceo Bocchino', ('AllMusic', '0002203914'): 'Alceo Bocchino', ('MusicBrainz', '193399169876683606922374815348872046441'): 'Alceo Bocchino', ('RateYourMusic', '917260'): 'Alceo Bocchino'}
Artist {('MusicBrainz', '12587407

Artist {('Discogs', '576202'): 'Alex Riel Trio'}
Artist {('Discogs', '2668422'): 'Alex Sanders'}
Artist {('Discogs', '4697914'): 'Alex Sensation', ('AllMusic', '0002105579'): 'Alex Sensation', ('KWorbSpotify', '992493654964'): 'Alex Sensation', ('KWorbYouTube', '374182617853'): 'Alex Sensation'}
Artist {('Discogs', '408091'): 'Alex Turner', ('AllMusic', '0000547435'): 'Alex Turner', ('AlbumOfTheYear', '1639'): 'Alex Turner'}
Artist {('Discogs', '474208'): 'Alex Ubago'}
Artist {('Discogs', '3018484'): 'Alex Ward'}
Artist {('Discogs', '505486'): 'Alex Welsh'}
Artist {('Discogs', '1302196'): 'Alex da Kid', ('AllMusic', '0001015485'): 'Alex da Kid', ('MusicBrainz', '168247935094346006430946520409007415096'): 'Alex da Kid'}
Artist {('Discogs', '201633'): 'Alex de Grassi', ('AllMusic', '0000081836'): 'Alex de Grassi', ('MusicBrainz', '47599107888675147527655300108044895420'): 'Alex de Grassi'}
Artist {('RateYourMusic', '158658'): 'Alexa Leclère'}
Artist {('Discogs', '4078403'): 'Alexanco', (

Artist {('Discogs', '756646'): 'Ali Project', ('AllMusic', '0002328040'): 'Ali Project'}
Artist {('Discogs', '10378'): 'Ali Shaheed Muhammad', ('AllMusic', '0000389128'): 'Ali Shaheed Muhammad', ('MusicBrainz', '209038770288544595544800236849805180321'): 'Ali Shaheed Muhammad'}
Artist {('AlbumOfTheYear', '78619'): 'Ali Spagnola'}
Artist {('Discogs', '253500'): 'Ali Thomson'}
Artist {('Discogs', '396227'): 'Alias', ('Discogs', '299130'): 'Alias', ('AllMusic', '0000006987'): 'Alias', ('RateYourMusic', '28078'): 'Alias'}
Artist {('RateYourMusic', '940461'): 'Alias Conrad Coldwood'}
Artist {('Discogs', '345919'): 'Alice', ('AllMusic', '0000004673'): 'Alice', ('MusicBrainz', '64502234926189653110679042241881520819'): 'Alice', ('LastFM', '31926153489'): 'Alice', ('AlbumOfTheYear', '92289'): 'Alice'}
Artist {('RateYourMusic', '105292'): 'Alice & Ellen Kessler'}
Artist {('Discogs', '322142'): 'Alice Babs', ('AllMusic', '0000743135'): 'Alice Babs', ('MusicBrainz', '25604035968169950527208129357

Artist {('Discogs', '723073'): 'Allan Mortensen', ('RateYourMusic', '259150'): 'Allan Mortensen'}
Artist {('MusicBrainz', '226741647254942653800326211450499721074'): 'Allan Nicholls'}
Artist {('MusicBrainz', '42062769040764064435693552291217645973'): 'Allan Olsen'}
Artist {('Discogs', '431545'): 'Allan Sherman', ('AllMusic', '0000002273'): 'Allan Sherman', ('MusicBrainz', '19006999638440883769447936068845843929'): 'Allan Sherman'}
Artist {('MusicBrainz', '145805464855990934035947719171020435609'): 'Allan Theo'}
Artist {('AllMusic', '0003102966'): 'Allan Toniks'}
Artist {('Discogs', '635795'): 'Allan Vegenfeldt', ('AllMusic', '0001415776'): 'Allan Vegenfeldt', ('MusicBrainz', '183803282375397685917035373173492703139'): 'Allan Vegenfeldt', ('RateYourMusic', '128975'): 'Allan Vegenfeldt', ('LastFM', '91204548347'): 'Allan Vegenfeldt', ('Deezer', '90571'): 'Allan Vegenfeldt'}
Artist {('RateYourMusic', '1192130'): 'Allan Wilson'}
Artist {('Discogs', '405150'): 'Allan Zane'}
Artist {('Discog

Artist {('Discogs', '552011'): 'Alvin Dinkin'}
Artist {('Discogs', '309338'): 'Alvin Lee', ('AllMusic', '0000011620'): 'Alvin Lee', ('MusicBrainz', '265030080117180129850411171824863575094'): 'Alvin Lee', ('LastFM', '59470743094'): 'Alvin Lee', ('Deezer', '97124'): 'Alvin Lee'}
Artist {('Discogs', '9297'): 'Alvin Lucier', ('AllMusic', '0000014981'): 'Alvin Lucier', ('MusicBrainz', '128861554275475377162990480763251599991'): 'Alvin Lucier', ('RateYourMusic', '24067'): 'Alvin Lucier'}
Artist {('Discogs', '2230479'): 'Alvin Risk', ('AllMusic', '0002717108'): 'Alvin Risk'}
Artist {('Discogs', '506845'): 'Alvin Robinson', ('MusicBrainz', '227758704776900047984554758054975650005'): 'Alvin Robinson'}
Artist {('Discogs', '227471'): 'Alvin Stardust', ('AllMusic', '0000011207'): 'Alvin Stardust', ('MusicBrainz', '254214375432925825660186707130298877367'): 'Alvin Stardust'}
Artist {('Discogs', '258467'): 'Alvin Stoller', ('MusicBrainz', '50996653444731363588550196361776726467'): 'Alvin Stoller'}


KeyboardInterrupt: 

In [66]:
df

Unnamed: 0,Unnamed: 1,Artist,RYMList,RYMList2,RYMAlbum,Billboard,BillboardYE,RYMSong,MusicVF
LastFM,25657663576,황보령=SmackSoft,21246d964086b7caab1923abfd35efe9,,,,,,


In [14]:
tmp1 = b'Mike Will Made-It'
tmp2 = b'Mike Will Made\xe2\x80\x90It'
#s=s.replace(b'PatientName',name)

In [18]:
tmp1.replace(b"\xe2\x80\x90", b"-")
tmp2.replace(b"\xe2\x80\x90", b"-")

b'Mike Will Made-It'

In [28]:
def checkForMultipleMatches(mdbmap):
    dbChartMap = {}
    for primaryKey, primaryData in mdbmap.get().items():
        artistName = primaryData.artistName
        for db,dbData in primaryData.get().items():
            if dbChartMap.get(db) is None:
                dbChartMap[db] = {}
            dbID,name = dbData.get()
            if dbID is None:
                continue
            if dbChartMap[db].get(dbID) is None:
                dbChartMap[db][dbID] = {}
            dbChartMap[db][dbID][primaryKey] = artistName
            
    retval = {}
    for db,dbData in dbChartMap.items():
        for dbID,dbIDData in dbData.items():
            if len(dbIDData) > 1:
                if retval.get(db) is None:
                    retval[db] = {}
                retval[db][dbID] = dbIDData
                print("{0: <20}{1}".format(db,dbID))
                print("{0: <20}{1}".format("", dbIDData))
                
    return retval

In [32]:
mdf = DataFrame(multiMatches).T

Unnamed: 0,AlbumOfTheYear,AllMusic,Deezer,Discogs,KWorbSpotify,KWorbYouTube,KWorbiTunes,LastFM,MusicBrainz,RateYourMusic
RYMList,{'34928': {'fad0895a39f230600c8d9bfe8946432e':...,{'0000236077': {'56e01f85c492e6e0e6d845c1a7ab7...,,{'48822': {'613b4001dd5cdade9a905f95d448abec':...,,,,,{'179511679268020170632435938696355720266': {'...,{'2908': {'613b4001dd5cdade9a905f95d448abec': ...
RYMList2,,{'0001934706': {'22abd25bd9ed4822187ea44b45ed0...,,,,,,,,
RYMAlbum,,,,,,,,,,{'491749': {'4180ebc66727addb8e2f118b099c9083'...
RYMSong,{'43486': {'9789441c9561eedf4d371a0b10828b82':...,,,{'2752': {'26fde1adb1d2574c7ac83c66ab7c9fcb': ...,,,,{'29051186523': {'4f42f062997510fb306f329531ca...,{'7252895589662777471698200102759384100': {'54...,{'5452': {'e1fe143a86c46aaf2219f66a5d4eec4c': ...
RYMScript,,,,,,,,,,{'838414': {'6250e264b43eff1ae2f2332981f3cd88'...
Billboard,,,,,,,,,,
BillboardYE,,,,,,,,,,
MusicVF,,,,{'1520817': {'203c4f4b945d1925cd010f91dfda7a6c...,,,,,{'322641216138480833656024574784517733791': {'...,{'780487': {'63a10f4666a92c1070800215eefc5a55'...
Spotify,{'10549': {'d29258ce7756cdf636a20961ca87ad4d':...,{'0000883318': {'82cd344572b6e83f9315e2357ecff...,{'7471': {'d29258ce7756cdf636a20961ca87ad4d': ...,{'92476': {'82cd344572b6e83f9315e2357ecff52d':...,{'184614649763': {'d29258ce7756cdf636a20961ca8...,{'48188217063': {'fe436126c10d8789ebc9a340e898...,{'711867872134': {'bd708bcaac546f7788eab3810a6...,{'10567061175': {'d29258ce7756cdf636a20961ca87...,{'197640539869093828781529494250485905073': {'...,
SpotifyViral,,,,{'2294226': {'23ca063597971ab88144e355c33915ba...,,,{'718646908300': {'ecf0aa97f932cceeed6c3306cf8...,{'61475981353': {'a67cc45a6f8c0c0cee98dfc732ab...,{'285981193072991206153544611149585335162': {'...,


In [49]:
from pandas import Series
for colname in mdf.columns:
    mdfData = mdf[colname].dropna()
    print(colname)
    for key,value in mdfData.iteritems():
        vd = {dbID: list(dbData.values()) for dbID,dbData in value.items()}
        vd = [list(dbData.values()) for dbID,dbData in value.items()]
        print("\t",key,vd)

AlbumOfTheYear
	 RYMList [['Carmel', 'Caramel']]
	 RYMSong [['Tomorrow', '4Tomorrow']]
	 Spotify [['Vince Guaraldi Trio', 'The Vince Guaraldi Trio'], ['G Herbo', "Lil' Herb"], ['Ukendt Kunstner', 'Ukendt kunstner']]
AllMusic
	 RYMList [['Ritual', 'Rituaal']]
	 RYMList2 [['Super Junior', 'Super Junior-H']]
	 Spotify [['Red Hot Chili Peppers', 'The Red Hot Chili Peppers'], ['Vince Guaraldi Trio', 'The Vince Guaraldi Trio'], ['Walk Off the Earth', 'Walk Off The Earth'], ['girl in red', 'Girl in Red'], ['Raaka-Aine', 'Raaka-aine'], ['Dan Balan', 'Dan Bălan'], ['Ania Wyszkoni', 'Anna Wyszkoni'], ['Inti-Illimani', 'Inti‐Illimani'], ['Elis Regina', 'Ellis Regina'], ['Naif', 'Naïf']]
Deezer
	 Spotify [['Vince Guaraldi Trio', 'The Vince Guaraldi Trio'], ['girl in red', 'Girl in Red'], ['Lous and The Yakuza', 'Lous And The Yakuza'], ['Ukendt Kunstner', 'Ukendt kunstner'], ['Dzharakhov', 'Dzharahov'], ['Samanta Tina', 'Samanta Tīna'], ['Café Tacvba', 'Café Tacuba'], ['Patricio Rey y sus Redondito

In [None]:
## AllMusic
Ritual


In [30]:
multiMatches = {}
for chartType,mdbmap in mdbmaps.items():
    print("="*15,chartType,"="*15)
    multiMatches[chartType] = checkForMultipleMatches(mdbmap)

Discogs             48822
                    {'613b4001dd5cdade9a905f95d448abec': 'Bobby Darin', '0274493a0efdeaf23c2973567fa90ec2': 'Bob Darin'}
AllMusic            0000236077
                    {'56e01f85c492e6e0e6d845c1a7ab78b9': 'Ritual', '4c938bf83161be08afa330414e97c0c6': 'Rituaal'}
MusicBrainz         179511679268020170632435938696355720266
                    {'56e01f85c492e6e0e6d845c1a7ab78b9': 'Ritual', '4c938bf83161be08afa330414e97c0c6': 'Rituaal'}
RateYourMusic       2908
                    {'613b4001dd5cdade9a905f95d448abec': 'Bobby Darin', '0274493a0efdeaf23c2973567fa90ec2': 'Bob Darin'}
AlbumOfTheYear      34928
                    {'fad0895a39f230600c8d9bfe8946432e': 'Carmel', '2137c4880dea3f36d77f58f36b9c21cb': 'Caramel'}
AllMusic            0001934706
                    {'22abd25bd9ed4822187ea44b45ed0b11': 'Super Junior', 'a6fb118c3a1491919cce7ca3762684cd': 'Super Junior-H'}
RateYourMusic       491749
                    {'4180ebc66727addb8e2f118b099c9083': 'Lil B

Discogs             2294226
                    {'23ca063597971ab88144e355c33915ba': 'Oscar And The Wolf', '19b2a93e8b2c47c3810ad7dc5ffd618b': 'Oscar and The Wolf'}
MusicBrainz         285981193072991206153544611149585335162
                    {'ecf0aa97f932cceeed6c3306cf848a29': 'Feride Hilal Akın', 'a45f575421b96ebefd6d627778ec7d75': 'Feride Hilal AkÄ±n'}
LastFM              61475981353
                    {'a67cc45a6f8c0c0cee98dfc732abcb85': 'Janelle Monáe', 'edd8661ac4acc3810fe9e927cc660c6e': 'Janelle MonÃe'}
LastFM              74405453254
                    {'ecf0aa97f932cceeed6c3306cf848a29': 'Feride Hilal Akın', 'a45f575421b96ebefd6d627778ec7d75': 'Feride Hilal AkÄ±n'}
KWorbiTunes         718646908300
                    {'ecf0aa97f932cceeed6c3306cf848a29': 'Feride Hilal Akın', 'a45f575421b96ebefd6d627778ec7d75': 'Feride Hilal AkÄ±n'}


In [7]:
def artistChartInfo
dbChartMap = {}
for chartType in chartsToMerge:
    for primaryKey, primaryData in mdbmaps[chartType].get().items():
        artistName = primaryData.artistName
        for db,dbData in primaryData.get().items():
            if dbChartMap.get(db) is None:
                dbChartMap[db] = {}
            dbID,name = dbData.get()
            if dbID is not None:
                if dbChartMap[db].get(dbID) is None:
                    dbChartMap[db][dbID] = {}
                if dbChartMap[db][dbID][chartType] = {} is None:
                    dbChartMap[db][dbID][artistName] = {}
                dbChartMap[db][dbID][artistName][chartType] = primaryKey

In [8]:
dbChartMap["LastFM"]

{'72021401638': {'Joanna Newsom': {'RYMList': '8a1ff3d986f6af76604de879a492152d',
   'RYMList2': '1fe5f3e855f8b976f586bbe5d2c56ef1'}},
 '60046817186': {'Beach House': {'RYMList': '8982f612e59fd5ccd998970becd3607f',
   'RYMList2': '34d02d52e048038efa802e569ed88aeb'}},
 '12812722644': {'The Wailers': {'RYMList': '6848f81bf8ce942ef42cad07f0ad663b'}},
 '77434063941': {'Kraftwerk': {'RYMList': 'e65b9700aa3213f8b62f4d5611ba57b2'}},
 '75599929303': {'Erykah Badu': {'RYMList': 'e0eacc388bcafe72c12173c863f2e7cf',
   'RYMList2': '0c9fff00bf4b6fa9cf50b378799ffc61'}},
 '58657169937': {'Camera Obscura': {'RYMList': '9231b6f5ab522d2bf502c8e5feeea578',
   'RYMList2': 'bd3b202217f9825be327de0a7d1654d6'}},
 '50668659609': {'Beyoncé': {'RYMList': '8b0cb9736e7433891b9d4b95c71e46bf',
   'RYMList2': 'c07151985edd73a355b5929de6de5c38'}},
 '89127449252': {'Johnny Cash': {'RYMList': 'c7a0ea00412936c21ed47178ec5bccdf',
   'RYMList2': '191f1883eb100b56cc80170613b88634'}},
 '87993096124': {'TV On The Radio': {'R

In [None]:
primaryData.get()