# Chart Analysis Functions

In [119]:
## Basic stuff
%load_ext autoreload
%autoreload

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))
#IPython.Cell.options_default.cm_config.lineNumbers = true;

################################################################################
## Python Version
################################################################################
import sys


################################################################################
## General Stuff
################################################################################
from multiprocessing import Pool
from tqdm import tqdm


################################################################################
## Util Stuff
################################################################################
from timeUtils import clock, elapsed
from ioUtils import saveFile, getFile


################################################################################
## Music DB
################################################################################
from mainDB import mainDB
from musicDBMap import musicDBMap
from masterDBMatchClass import masterDBMatchClass
from matchDBArtist import matchDBArtist


################################################################################
## Music Names
################################################################################
from masterArtistNameDB import masterArtistNameDB


################################################################################
## Chart Stuff
################################################################################
from artistIgnores import getArtistIgnores
from billboardData import billboardData
from top40Data import top40Data
from spotifyData import spotifyData
from chartArtistAlbumData import chartArtistAlbumData
from chartUtils import *
from extraArtists import extraKnownArtists


################################################################################
## Pandas Stuff
################################################################################
import pandas as pd
from pandas import DataFrame
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

print("Python: {0}".format(sys.version))
import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Python: 3.7.7 (default, Mar 26 2020, 10:32:53) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
Notebook Last Run Initiated: 2021-02-04 14:31:26.353869


# Rename Input Data

In [None]:
updateMultiDB(2)

In [120]:
manDB      = masterArtistNameDB("main", init=False)
multimanDB = masterArtistNameDB("multi", init=False)

def updateMultiDB(step=1):
    if step == 1:
        tmpmanDB = masterArtistNameDB("multi", init=True)
        tmpmanDB.forceReload(getFile("multi.yaml"))
        tmpmanDB.checkForRecursives()
        
    if step == 2:
        tmpmanDB = masterArtistNameDB("multi", init=True)
        tmpmanDB.forceReload(getFile("multi.yaml"))
        tmpmanDB.save()
        
        multimanDB = masterArtistNameDB("multi", init=False)
        saveFile(idata=multimanDB.getRenames(), ifile="multi.yaml")

def updateManDB(step=1):
    if step == 1:
        tmpmanDB = masterArtistNameDB("main", init=True)
        tmpmanDB.forceReload(getFile("main.yaml"))
        tmpmanDB.checkForRecursives()

    if step == 2:
        tmpmanDB = masterArtistNameDB("main", init=True)
        tmpmanDB.forceReload(getFile("main.yaml"))
        tmpmanDB.save()
        
        manDB = masterArtistNameDB("main", init=False)
        saveFile(idata=manDB.getRenames(), ifile="main.yaml")
        
    if step == 3:
        tmpmanDB = masterArtistNameDB("main", init=False)
        tmpmanDB.addRenames(getFile("relDBRenames2.yaml"))
        tmpmanDB.checkForRecursives()
        
    if step == 4:
        tmpmanDB = masterArtistNameDB("main", init=False)
        tmpmanDB.addRenames(getFile("relDBRenames2.yaml"))
        tmpmanDB.save()
        
        manDB = masterArtistNameDB("main", init=False)
        saveFile(idata=manDB.getRenames(), ifile="main.yaml")
        
    
    _, _ = clock("Last Ran")

  Loading data from /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/mainArtistNameDB.p
  There are currently 7222 artist keys.
  There are currently 8420 renamed artist keys.
  Loading data from /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/multiArtistNameDB.p
  There are currently 136 artist keys.
  There are currently 142 renamed artist keys.


In [None]:
updateMultiDB(2)

# Reverse Chart Lookup

In [None]:
updateMultiDB(1)

In [None]:
updateMultiDB(2)

In [None]:
updateManDB(1)

In [None]:
updateManDB(2)

In [4]:
if False:
    from multiArtist import multiartist
    mularts  = multiartist(cutoff=0.9, discdata=None, exact=False)
    mularts.setKnownMultiDelimArtists(getFile("../multiartist/knownMultiArtists.yaml"))
    knownMultiArtists = getFile("../multiartist/knownMultiArtists.yaml")
    print(len(knownMultiArtists),"Known Artists")

    ignoresComp = ['Soundtrack', 'Various Artists', 'Original Broadway Cast Recording', 'Read-Along','Veggietales', 'Pokemon', 
                   'Barney', 'Walt Disney Read-Along', 'The Powerpuff Girls',
                   'Superstar Kidz', 'Original Cast', 'Original Cast Recording']
    ignores = ignoresComp    

    def matchArtist(artistName):
        subNames = mularts.getArtistNames(artistName)
        result = {subName: mdbmap.isKnownByName(subName) for subName in subNames}
        for subName in result.keys():
            if subName in ignores:
                result[subName] = True
        return result


    for chartName in chartDFs.keys():
        try:
            chartDFs[chartName]['DB'] = chartDFs[chartName]['RenamedArtist'].apply(matchArtist)
        except:
            print("Error with {0}".format(chartName))

In [None]:
vals = {}
for chartName in chartDFs.keys():
    try:
        vals[chartName] = chartDFs[chartName][~chartDFs[chartName]["DB"].apply(lambda x: all(x.values()))].shape[0]
    except:
        pass
from pandas import Series

In [None]:
Series(vals).sort_values(ascending=False)

In [None]:
mdbmap.isKnownByName("Salt-N-Pepa")

In [None]:
chartDF = chartDFs["vinyl-albums"]
def getMissingArtists(chartDF):
    missing = chartDF[~chartDF["DB"].apply(lambda x: all(x.values()))]
    return list(set(getFlatList(missing["DB"].apply(lambda x: [k for k,v in x.items() if v is False]))))
getMissingArtists(chartDF)

In [None]:
def getMissingArtists(chartDF):
    ichartDF[~chartDF["DB"].apply(lambda x: all(x.values()))]

In [None]:
chartName = "independent-albums"
chartDFs[chartName][~chartDFs[chartName]["DB"].apply(lambda x: all(x.values()))]

In [None]:
bMap = {}
for chartTitle,chartName in initMap.items():
    newName = newMap.get(chartTitle)
    if newName is not None:
        bMap[newName] = chartName


In [None]:
saveFile(ifile="billboardMapping.p", idata=bMap)

In [None]:
bMap = getFile("billboardMapping.p")

# Top40 Data

In [121]:
%load_ext autoreload
%autoreload
from top40Data import top40Data
td = top40Data(minYear=1, maxYear=2021)
td.setChartUsage(rank=[0,1,2,3,4])
td.setDBRenames(manDB)
td.setMultiDBRenames(multimanDB)
td.setFullChartData()
td.setArtistAlbumData()
td.saveArtistAlbumData()
td.saveFullChartData()

_, _ = clock("Last Run")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
  Getting Chart For usa
  Using 1 Charts
  Using 1 Charts
  Getting Chart For usaSingles
  Using 1 Charts
  Using 1 Charts
  Getting Chart For uk
  Using 4 Charts
  Using 4 Charts
  Getting Chart For denmark
  Using 2 Charts
  Using 2 Charts
  Getting Chart For europe
  Using 10 Charts
  Using 10 Charts
  Using Charts (None): ['USA Albums', 'USA Singles Top 40', 'UK Singles Top 40', 'UK Top 20 Albums', 'Canada Top 20', 'Ireland Top 20', 'Denmark Top 20', 'German Top 40', 'Belgium Top 20', 'France Top 20', 'Sweden Top 20', 'Netherlands Top 20', 'Europe Official Top 100', 'Norway Top 20', 'Switzerland Top 20', 'Spain Top 20', 'Italy Top 20', 'Austria Top 20']
Found 49 files.
==> USA Singles Top 40                      	1740
==> UK Singles Top 40                       	3518
==> German Top 40                           	4352
==> Austria Top 20                          	4943
==> Belgium Top 20            

# Billboard Data

In [41]:
%load_ext autoreload
%autoreload
from billboardData import billboardData
bd = billboardData(minYear=1, maxYear=2021)
bd.setChartUsage(rank=[0,1,2,3,4,5])
bd.setDBRenames(manDB)
bd.setMultiDBRenames(multimanDB)
bd.setFullChartData()
bd.setArtistAlbumData()

bd.saveArtistAlbumData()
bd.saveFullChartData()

_, _ = clock("Last Run")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Found 62 files.
  Getting Chart For top
name = top
  Using 6 Charts
  Using 6 Charts
  Getting Chart For hot
name = hot
  Using 12 Charts
  Getting Chart For adult
name = adult
  Using 4 Charts
  Using 16 Charts
  Getting Chart For alternative
name = alternative
  Using 4 Charts
  Getting Chart For countryMusic
name = countryMusic
  Using 10 Charts
  Getting Chart For rock
name = rock
  Using 14 Charts
  Using 28 Charts
  Getting Chart For christian
name = christian
  Using 21 Charts
  Getting Chart For rnb
name = rnb
  Using 18 Charts
  Using 39 Charts
  Getting Chart For canadian
name = canadian
  Using 4 Charts
  Getting Chart For comedy
name = comedy
  Using 2 Charts
  Getting Chart For general
name = general
  Using 4 Charts
  Getting Chart For twitter
name = twitter
  Using 4 Charts
  Using 14 Charts
  Getting Chart For folkblue
name = folkblue
  Using 6 Charts
  Getting Chart For classical
na

# BillboardYE Data

In [None]:
%load_ext autoreload
%autoreload
from billboardYE import billboardYE
bYE = billboardYE(minYear=1, maxYear=2021)
bYE.setChartUsage(rank=[0,1,2,3,4,5,6])
bYE.setDBRenames(manDB)
bYE.setFullChartData()
bYE.setArtistAlbumData()

bYE.saveFullChartData()
bYE.saveArtistAlbumData()

_, _ = clock("Last Run")

# Spotify Code

In [63]:
%load_ext autoreload
%autoreload
from spotifyData import spotifyData
sd = spotifyData(minYear=1, maxYear=2021)
sd.parse()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
/Volumes/Piggy/Charts/data/spotify/categories
Found 993 files.


In [83]:
%load_ext autoreload
%autoreload
from spotifyData import spotifyData
sd = spotifyData(minYear=1, maxYear=2021)
sd.setDBRenames(manDB)
sd.setMultiDBRenames(multimanDB)
sd.setChartUsage(rank=[0,1,2,3,4,5,6,7,8])
sd.setFullChartData()
sd.setArtistAlbumData()
sd.saveArtistAlbumData()
sd.saveFullChartData()
_, _ = clock("Last Run")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
  Getting Chart For us
name = us
  Using 1 Charts
  Using 1 Charts
  Getting Chart For england
name = england
  Using 1 Charts
  Getting Chart For australia
name = australia
  Using 1 Charts
  Using 2 Charts
  Getting Chart For denmark
name = denmark
  Using 1 Charts
  Getting Chart For southafrica
name = southafrica
  Using 1 Charts
  Using 2 Charts
  Getting Chart For italy
name = italy
  Using 1 Charts
  Getting Chart For isreal
name = isreal
  Using 1 Charts
  Getting Chart For phillipines
name = phillipines
  Using 1 Charts
  Using 3 Charts
  Getting Chart For india
name = india
  Using 1 Charts
  Getting Chart For russia
name = russia
  Using 1 Charts
  Getting Chart For egypt
name = egypt
  Using 1 Charts
  Using 3 Charts
  Getting Chart For japan
name = japan
  Using 1 Charts
  Getting Chart For hongkong
name = hongkong
  Using 1 Charts
  Getting Chart For vietnam
name = vietnam
  Using 1 Ch

# Common Code

In [None]:
updateMultiDB(2)

# Load Chart Analysis

In [122]:
#mcm    = matchChartMusic(mdb)
recreate = True
if recreate:
    maindb = mainDB(mdb=None, create=True, debug=True)
    maindb.setDBArtists(recreate=False)
    maindb = mainDB(mdb=None, create=False, debug=False)
    maindb.loadDBDataMap()
else:
    maindb = mainDB(mdb=None, create=False, debug=False)
    maindb.loadDBDataMap()    
_, _ = clock("Last Run")

Setting Basic Database Objects
Database Records:
  Creating Database Records for Discogs
  Creating Database Records for AllMusic
  Creating Database Records for MusicBrainz
  Creating Database Records for LastFM
  Creating Database Records for RateYourMusic
  Creating Database Records for Deezer
Available DBs: Discogs, AllMusic, MusicBrainz, LastFM, RateYourMusic, Deezer
  Loading data from /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/mainArtistNameDB.p
  There are currently 7222 artist keys.
  There are currently 8420 renamed artist keys.
Adding 1 known multi delim artists.
Current Time is Thu Feb 04, 2021 14:32:29 for 
Loading ArtistID Data
Creating Pandas DataFrame for 773799 Artists
	Shape --> (773799, 1)
	Shape --> (773799, 2)
  Finding Real Artist Name
	Shape --> (773799, 4)
  Removing None Artist
	Shape --> (773799, 4)
  Finding Disc Artist Name
	Shape --> (773799, 5)
DataFrame Shape is (773799, 7)
Current Time is Thu Feb 04, 2021 14:32:50 for Done with 
Process [Done wit

Summary Statistics For DB: MusicBrainz
    Using Known Artists: False
    Found 204225 ID -> Name entries
    Found 172898 Name -> ID entries
    Found 0 Albums
Summary Statistics For DB: LastFM
    Using Known Artists: False
    Found 12433 ID -> Name entries
    Found 12328 Name -> ID entries
    Found 153907 Albums
Summary Statistics For DB: RateYourMusic
    Using Known Artists: False
    Found 657 ID -> Name entries
    Found 655 Name -> ID entries
    Found 0 Albums
Summary Statistics For DB: Deezer
    Using Known Artists: False
    Found 7131 ID -> Name entries
    Found 7096 Name -> ID entries
    Found 0 Albums
Current Time is Thu Feb 04, 2021 14:33:45 for Last Run


# Analyze Data

In [123]:
## Basic stuff
%load_ext autoreload
%autoreload
from chartArtistAlbumData import chartArtistAlbumData


singleArtistAlbumData = {}
manyArtistAlbumData   = {}

chartType = "Top40"
mType     = "Full"
cad = chartArtistAlbumData(chartType, ignoreMultiNames=False)
cad.createIndivArtistAlbumData()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
chartArtistAlbumData(Top40)
Found 62 files.
Adding 539 known multi delim artists.
  Assigning 539 known multi-name artists
  Loading data from /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/mainArtistNameDB.p
  There are currently 7222 artist keys.
  There are currently 8420 renamed artist keys.
  Loading data from /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/multiArtistNameDB.p
  There are currently 136 artist keys.
  There are currently 142 renamed artist keys.
There are 13104 unique artists in artist albums data
There are 13104 newly unique artists in artist albums data
Renamed 2 artists
Renamed 0 artists (multi)
There are 13104  unique artist entries in the artist albums data
There are 9156   single artist entries in the artist albums data
There are 3948   multi artist entries in the artist albums data
Renamed 671 multi artists
There are 9156   solo artist entries in the artist albums 

In [None]:
mdbmap = musicDBMap("Spotify")

In [None]:
[{'Vince Guaraldi Trio': 7471},{'Eliot Morris': 1821351},{'Ross Lynch': 3150221},
 {'The Cheetah Girls': 15341}, {'Pinkfong': 12919403}, {'Hannah Montana': 16805},
 {'Big Time Rush': 877741}, {'Cedarmont Kids': 1399450}, {'Kidz Bop Kids', 320265},
 {'The Countdown Kids': 1685087}]

# Create Master Match Class

In [124]:
def showMatchedStatus(chartType, mdbmaps, mdbmc):
    print("Matched {0} / {1} Artists for {2} ChartType".format(mdbmaps[chartType].getSize(), len(mdbmc.matchData[chartType]), chartType))
    
def showRemainingMatches(chartType, mdbmc):
    print('='*100)
    print("="*10,"Artists That Were Not Tested (Low Albums)",'='*10)
    toMatch   = mdbmc.getDataToMatch(chartType, maxValues=10000, maxAlbums=100000, minAlbums=0, useKnown=True, ignores=ignoresList)
    print("="*10,"Previously Tested Artists Without A Match",'='*10)
    toMatch   = mdbmc.getDataToMatch(chartType, maxValues=10000, maxAlbums=100000, minAlbums=0, useKnown=False, dbMatches=0, ignores=ignoresList)
    print('='*100)    
    
def analyzeMatches(maindb, mdbmaps, chartType, maxVal=1.0, diffVal=0.1):
    dbRenames = {}
    toget = {}
    for primaryKey,artistName in mdbmaps[chartType].getArtists().items():
        primaryArtistName = artistName
        artistData = mdbmaps[chartType].getArtistDataByKey(primaryKey)
        for db,dbID in artistData.getDict().items():
            if db in ["DatPiff", "MetalStorm", "LastFM", "CDandLP", "AceBootlegs", "RockCorner", "MusicStack", "RateYourMusic"]:
                continue
            if dbID is not None:
                secondaryArtistName = maindb.getArtistDBNameFromID(db, dbID)
                if secondaryArtistName is None:
                    if toget.get(db) is None:
                        toget[db] = []
                    toget[db].append(primaryArtistName)
                    continue
                s = SequenceMatcher(None, primaryArtistName, secondaryArtistName)
                ratio = s.ratio()
                maxRatio = maxVal
                if ratio >= maxRatio:
                    continue

                if ratio >= maxRatio-diffVal:
                    if dbRenames.get(secondaryArtistName) is not None:
                        if dbRenames[secondaryArtistName] == primaryArtistName:
                            continue
                        else:
                            pass
                    dbRenames[secondaryArtistName] = primaryArtistName
                    print("[{0: <30} {1: <4} {2: >30}] \t --> ({3}) {4} / {5} ({6})".format(primaryArtistName,round(ratio,2),secondaryArtistName,chartType,primaryKey,dbID,db))


    print("Found {0} artists to get.".format(len(toget)))
    print("Found {0} artists to rename.".format(len(dbRenames)))
    return toget, dbRenames    
    
def removeDBRenames(mdbmaps, chartType, dbRenames):
    for k,v in dbRenames.items():
        kKey = mdbmaps[chartType].getPrimaryKey(artistName=k, artistID=None)
        if kKey is not None:
            mdbmaps[chartType].removeArtistByKey(kKey)

        kKey = mdbmaps[chartType].getPrimaryKey(artistName=v, artistID=None)
        if kKey is not None:
            mdbmaps[chartType].removeArtistByKey(kKey)
    mdbmaps[chartType].save()
    
    
def analyzeRenames(manDB, dbRenames):
    redos = {}
    dels = []
    if len(dbRenames) == 0:
        print("Nothing to process.")
        return
    for oldername,bestname in dbRenames.items():
        if oldername in manDB.artistNameDB.keys() and bestname not in manDB.artistNameDB.keys():
            print(oldername,bestname)

            redos[bestname] = oldername
            dels.append(oldername)
            print('\t',oldername,'\t',bestname)

    print("# Renames: {0}".format(len(dbRenames)))
    print("# Dels: {0}".format(len(dels)))
    for oldername in dels:
        del dbRenames[oldername]

    print("# Redos: {0}".format(len(redos)))
    dbRenames.update(redos)
    print("# Renames: {0}".format(len(dbRenames)))
    saveFile(idata=dbRenames, ifile="relDBRenames2.yaml")




    def isAscii(ele):
        return len(ele) == len(ele.encode())

    dbRenames = getFile("relDBRenames2.yaml")
    redos = {}
    dels = []
    for k,v in dbRenames.items():    
        if isAscii(v) and not isAscii(k):
            if '’' in k or "“" in k:
                continue
            redos[v] = k
            dels.append(k)
            print('\t',k,'\t',v)
    #print(len(dbRenames))
    for k in dels:
        del dbRenames[k]

    #print(len(dbRenames))
    dbRenames.update(redos)
    #print(len(dbRenames))
    saveFile(idata=dbRenames, ifile="relDBRenames2.yaml")




    dbRenames = getFile("relDBRenames2.yaml")
    redos = {}
    dels = []
    for k,v in dbRenames.items():    
        if " the " in v and " The " in k:
            if k == v.replace(" the ", " The "):
                redos[v] = k
                dels.append(k)
                print('\t',k,'\t',v)

    #print(len(dbRenames))
    for k in dels:
        del dbRenames[k]

    #print(len(dbRenames))
    dbRenames.update(redos)
    #print(len(dbRenames))
    saveFile(idata=dbRenames, ifile="relDBRenames2.yaml")




    dbRenames = getFile("relDBRenames2.yaml")
    redos = {}
    dels = []
    for k,v in dbRenames.items():    
        if '’' in v or "“" in v:
            if not '’' in k and not "“" in v:
                redos[v] = k
                dels.append(k)
                print('\t',k,'\t',v)

    #print(len(dbRenames))
    for k in dels:
        del dbRenames[k]

    #print(len(dbRenames))
    dbRenames.update(redos)
    #print(len(dbRenames))


    saveFile(idata=dbRenames, ifile="relDBRenames2.yaml")








    def isAscii(ele):
        return len(ele) == len(ele.encode())

    dbRenames = getFile("relDBRenames2.yaml")
    redos = {}
    dels = []
    for k,v in dbRenames.items():    
        if isAscii(v) and not isAscii(k):
            if '’' in k or "“" in k:
                continue
            redos[v] = k
            dels.append(k)
            print('\t',k,'\t',v)
    #print(len(dbRenames))
    for k in dels:
        del dbRenames[k]

    #print(len(dbRenames))
    dbRenames.update(redos)
    #print(len(dbRenames))


    saveFile(idata=dbRenames, ifile="relDBRenames2.yaml")








    dbRenames = getFile("relDBRenames2.yaml")
    redos = {}
    dels = []
    for k,v in dbRenames.items():    
        if " the " in v and " The " in k:
            if k == v.replace(" the ", " The "):
                redos[v] = k
                dels.append(k)
                print('\t',k,'\t',v)

    #print(len(dbRenames))
    for k in dels:
        del dbRenames[k]

    #print(len(dbRenames))
    dbRenames.update(redos)
    #print(len(dbRenames))


    saveFile(idata=dbRenames, ifile="relDBRenames2.yaml")







    dbRenames = getFile("relDBRenames2.yaml")
    redos = {}
    dels = []
    for k,v in dbRenames.items():    
        if '’' in v or "“" in v:
            if not '’' in k and not "“" in v:
                redos[v] = k
                dels.append(k)
                print('\t',k,'\t',v)

    #print(len(dbRenames))
    for k in dels:
        del dbRenames[k]

    #print(len(dbRenames))
    dbRenames.update(redos)
    #print(len(dbRenames))


    saveFile(idata=dbRenames, ifile="relDBRenames2.yaml")
    #print(len(dbRenames))
    
    
        
    
def manualAppends(toMatch, chartType, minAlbums=0, add=True):
    if len(toMatch[chartType]) == 0:
        print("Nothing to append...")
        return
    
    toMatchDF = DataFrame({item[0]: {"ArtistName": item[1]["ArtistName"], "ArtistAlbums": item[1]["ArtistAlbums"]} for item in toMatch[chartType]}).T
    toMatchDF["NumAlbums"] = toMatchDF["ArtistAlbums"].apply(len)
    toMatchDF = toMatchDF.sort_values(by="NumAlbums", ascending=False)
    
    for idx,row in toMatchDF.iterrows():
        if add:
            artistName = row["ArtistName"]
            albumNames = row["ArtistAlbums"]
            nAlbums    = row["NumAlbums"]
            
            if nAlbums < minAlbums:
                continue
            print("add{0}(mdbmaps, {1}, {2}, {3})   ### [{4}]".format("AllMusic", "\"{0}\"".format(chartType), "\"{0}\"".format(idx), "\"{0}\"".format(""), artistName))
            print("add{0}(mdbmaps, {1}, {2}, {3})    ### [{4}]".format("Deezer", "\"{0}\"".format(chartType), "\"{0}\"".format(idx), "\"{0}\"".format(""), artistName))
            print("### {0}".format("  ,  ".join(albumNames)))
            for albumName in albumNames:
                print("### {0}  ---> [{1}]".format(albumName, cad.getAlbumsData(albumName)))
            print("\n")
        else:
            print("ignores.append(\"{0}\")".format(artistName))
            if " & " in artistName:
                continue
                print("ignores.append(\"{0}\")".format(artistName))

    print("mdbmaps[{0}].save()".format("\"{0}\"".format(chartType)))    
    





        
def copyMapData(mdbmap):
    return
    mdbmap.saveCopy()

def saveMapData(mdbmap, result_list):
    for item in result_list:
        primaryKey = item[0] 
        artistName = item[1]
        artistID   = item[2]
        mcs        = item[3]
        mdbmap.addArtistByKey(primaryKey, artistName=artistName, artistID=artistID)
        for db,mc in mcs.items():
            matchID    = mc.matchID
            matchScore = mc.matchScore
            if matchID is not None:
                mdbmap.addArtistDataByKey(primaryKey, db, matchID)

    mdbmap.save()
    

def matchItAll(mdbmaps, thresholds, useAlbums=True, mdbmc=None, db=None, toMatch=None):
    if toMatch is None:
        toMatch   = mdbmc.getDataToMatch(db, maxValues=10000, maxAlbums=50000)

    if len(toMatch) == 0:
        return

    num_processes = 3
    if useAlbums is True:
        func = matchDBArtistWithAlbums
    else:
        func = matchDBArtistWithoutAlbums
    pfunc = partial(func, **thresholds) # Giving some arguments for kwargs
    #argument_list = list(inputs.items()) # [random.randint(0, 100) for _ in range(num_jobs)]
    dbName = list(toMatch.keys())[0]
    copyMapData(mdbmaps[dbName])

    argument_list = toMatch[dbName]
    if len(argument_list) == 0:
        return
    print("Running imap multiprocessing for {0} artists ...".format(len(argument_list)))
    result_list = multiProc(func=pfunc, argument_list=argument_list,
                                           num_processes=num_processes)


    start, cmt = clock("Saving...")
    saveMapData(mdbmaps[dbName], result_list)
    elapsed(start, cmt)
    print("\nSleeping for 10 seconds...\n")
    #sleep(10)
    
    #mdbmc.matchMutualMaps()

    
def matchDBArtistWithoutAlbums(item, *args, **kwargs):
    primaryKey = item[0]
    artistData = item[1]
    artistName   = artistData["ArtistName"]
    artistID     = primaryKey
    artistAlbums = None
    mdbMatcher = matchDBArtist(maindb)
    mdbMatcher.setArtistInfo(artistName, artistID, artistAlbums)
    mdbMatcher.setThresholds(matchNumArtistName=kwargs['numArtistName'], matchArtistNameCutoff=kwargs['artistNameCutoff'],
                             matchArtistAlbumCutoff=kwargs['artistAlbumCutoff'], matchNumArtistAlbums=kwargs['numArtistAlbums'],
                             matchScore=kwargs['score']),
    mcs    = mdbMatcher.findPotentialArtistNameMatchesWithoutAlbums()
    retval = [primaryKey,artistName,artistID,mcs]
    return retval


def matchDBArtistWithAlbums(item, *args, **kwargs):    
    #time.sleep(0.0025)

    
    primaryKey = item[0]
    artistData = item[1]
    artistName   = artistData["ArtistName"]
    artistID     = primaryKey
    artistAlbums = artistData["ArtistAlbums"]
    
    mdbMatcher = matchDBArtist(maindb)
    mdbMatcher.setArtistInfo(artistName, artistID, artistAlbums)
    mdbMatcher.setThresholds(matchNumArtistName=kwargs['numArtistName'], matchArtistNameCutoff=kwargs['artistNameCutoff'], 
                             matchArtistAlbumCutoff=kwargs['artistAlbumCutoff'], matchNumArtistAlbums=kwargs['numArtistAlbums'],
                             matchScore=kwargs['score'])
    mcs    = mdbMatcher.findPotentialArtistAlbumMatches()
    retval = [primaryKey,artistName,artistID,mcs]
    return retval


def multiProc(func, argument_list, num_processes):
    pool = Pool(processes=num_processes)
    result_list_tqdm = []
    for result in tqdm(pool.imap(func=func, iterable=argument_list), total=len(argument_list)):
        result_list_tqdm.append(result)
    return result_list_tqdm

In [125]:
mdbmaps = {}
print("ChartType = {0}".format(chartType))
print("MType     = {0}".format(mType))
mdbmaps[chartType] = musicDBMap(chartType, init=True, copy=False)
mdbmc = masterDBMatchClass(maindb, mdbmaps)

manDB      = masterArtistNameDB("main", init=False)
multimanDB = masterArtistNameDB("multi", init=False)


def reMatch(mdbmaps=mdbmaps, mdbmc=mdbmc, mtype="Single"):
    if mtype == "Single":
        matchData = {mdbmaps[chartType].getPrimaryKey(artistName=artistName, artistID=None): {"ArtistName": artistName, "ArtistAlbums": artistAlbums} for artistName,artistAlbums in cad.getSingleArtistAlbumData().items()}
        mdbmc.setDBMatchData(chartType, matchData)
    elif mtype == "Many":
        matchData = {mdbmaps[chartType].getPrimaryKey(artistName=artistName, artistID=None): {"ArtistName": artistName, "ArtistAlbums": artistAlbums} for artistName,artistAlbums in cad.getManyArtistAlbumData().items()}
        mdbmc.setDBMatchData(chartType, matchData)
    elif mtype == "Full":
        matchData = {mdbmaps[chartType].getPrimaryKey(artistName=artistName, artistID=None): {"ArtistName": artistName, "ArtistAlbums": artistAlbums} for artistName,artistAlbums in cad.getFullArtistAlbumData().items()}
        mdbmc.setDBMatchData(chartType, matchData)
        
reMatch(mtype=mType)

ChartType = Top40
MType     = Full
  Loaded 0 previously matched entries
Loading Artist Names
  Loading data from /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/mainArtistNameDB.p
  There are currently 7222 artist keys.
  There are currently 8420 renamed artist keys.
  Loading data from /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/multiArtistNameDB.p
  There are currently 136 artist keys.
  There are currently 142 renamed artist keys.
  Setting matchData for Top40


## Match Using Names And Albums

In [142]:
ignoresList = ["chartIgnores.yaml"]
ignoresList = []

ignoresList = []
ignoresList.append("Artists Stand Up To Cancer")
ignoresList.append("Cast Of Rent")
ignoresList.append("2006 Broadway Cast Recording")
ignoresList.append("Original Broadway Cast Of Dear Evan Hansen")
ignoresList.append("Various")
ignoresList.append("Various Artists")
ignoresList.append("Read-Along")
ignoresList.append("Varios Artistas")
ignoresList.append("Artists Against Bullying")
ignoresList.append("Varios")
ignoresList.append("Sleep Music Lullabies")
ignoresList.append("Yoga Meditation Tribe")
ignoresList.append("The Cast Of Stomp")
ignoresList.append("Quincy Jones Feat. The Cast Of Stomp/The Yes/No Productions")
ignoresList.append("Walt Disney Read-Along")
ignoresList.append("Classical Lullabies For Babies Academy")
ignoresList.append("Diverse")
saveFile(idata=ignoresList, ifile="chartIgnoreArtists.yaml")

unknownList = []
unknownList.append("Amil-lion")
unknownList.append("Yung Lito")
unknownList.append("Idols")
unknownList.append("X Factor Finalists")
unknownList.append("R")
unknownList.append("Malena")
unknownList.append("Neales")
saveFile(idata=unknownList, ifile="chartUnknownArtists.yaml")

internationalList = []
internationalList.append("3robiu")
internationalList.append("Multi-interpretes")
internationalList.append("Mormon Tabernacle Choir/Orch. At Temple Square/Yoncheva/Mumford/Villazon/Terfel (Wilberg)")
internationalList.append("Villazon")
internationalList.append("Mumford")
internationalList.append("Vienna Philharmonic (Dudamel)")
internationalList.append("Louisville Orchestra (Abrams)")
internationalList += ['Eight Ranger', 'Yi Yang Qian Xi', "ONE N' ONLY", "MoLa", "Sus Invitados"]
internationalList += ['Juanma Y Su Tuna Para Todo El Ano', 'NYC', 'Nogizaka 48', 'Sho Kiryuin from Golden Bomber']
internationalList += ['The MONSTERS', 'ZERO-G', 'Huang Lige', 'Renzo', 'Bodan Shonen Dan', 'ST RISH', 'King Cream Soda']
internationalList += ['Good Morning America', 'Hotta Ke BAND', 'Feng Jianyu', 'Stanley Most']
internationalList += ['Cardiff City Fc', 'Leeds United Team', 'Monkey Hangerz']
internationalList += ['Good Child Foundation', 'Neon Brotherhood', 'Rogue Souljahz']
internationalList += ['GMC', 'Krossfade', 'Dave Howley', 'Toko', 'Y-not', 'Afro Music']
internationalList += ['Husvenne', 'Maca Boy', 'Skyndeep', 'TwitterXmassingle', 'Julieanne Dineen', 'Julie-Anne Dineen', 'Anybodies']
internationalList += ["Miro"] # Bulgaria
internationalList += ["Lama", 'Vintaj', 'D-flow', 'U. Dumanska', 'Leonard']

saveFile(idata=internationalList, ifile="chartInternationalArtists.yaml")

tonewUnknownList  = ['Lil Adrei', 'The 7 Dimensions', 'HIXTAPE', 'Porky Slim']
tonewUnknownList += ['SL', 'Sidemen', 'Mastermind', 'HOSH', 'Frosty']
saveFile(idata=tonewUnknownList, ifile="chartToNewArtists.yaml")

{'Gabo Parisi':9846490}
{'Rory & The Island': 1219224, 'Sean Og': 537158, "Hoo Leeger": 10908080}
{'The Suspects and Guests': 5061058, "Dennis Reed & Gap": 15080705}
{}

{"Eri Esittajia": 243610}

soundtrackList = ['Jul På Vesterbro']
saveFile(idata=soundtrackList, ifile="chartSoundtrackArtists.yaml")

applemusicList  = ['Dutch','Deno','DHT','Sadbh']
applemusicList += ['DDG', 'Lean Trap', 'MOLO', 'Amina']
applemusicList += ['Rocket Girls 101', "David Miles", "Cyril"]
applemusicList += ["Camur"]
saveFile(idata=applemusicList, ifile="chartAppleMusicArtists.yaml")

ignoresList = ["chartIgnoreArtists.yaml", "chartUnknownArtists.yaml", "chartInternationalArtists.yaml",
               "chartToNewArtists.yaml", "chartAppleMusicArtists.yaml", "chartSoundtrackArtists.yaml"]


In [127]:
# THOMAS
for minAlbums,minAlbumsData in getIterItems().items():
    print("="*10,minAlbums)
    maxAlbums = minAlbumsData["Max"]
    maxValues = minAlbumsData["Vals"]
    for i in range(10):
        toMatch   = mdbmc.getDataToMatch(chartType, maxValues=maxValues, maxAlbums=maxAlbums, minAlbums=minAlbums, ignores=ignoresList)
        if len(toMatch[chartType]) == 0:
            break
        matchItAll(mdbmaps, getThresholds(minAlbums), toMatch=toMatch)
reMatch(mtype=mType)
print("Done.\n\n")

Total                -> 11880
After Ignores        -> 11842
After Known          -> 11842
After MaxAlbums      -> 11842
After MinAlbums      -> 19
After MaxValues      -> 19
Running imap multiprocessing for 19 artists ...


100%|██████████| 19/19 [00:06<00:00,  2.88it/s]


Current Time is Thu Feb 04, 2021 14:47:01 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 4.4kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 4.4kB.
Current Time is Thu Feb 04, 2021 14:47:01 for Done with Saving...
Process [Done with Saving...] took 15.0 millseconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 11823
After MaxAlbums      -> 11823
After MinAlbums      -> 199
After MaxValues      -> 100
Running imap multiprocessing for 100 artists ...


100%|██████████| 100/100 [00:18<00:00,  5.39it/s]


Current Time is Thu Feb 04, 2021 14:47:21 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 26.7kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 26.7kB.
Current Time is Thu Feb 04, 2021 14:47:21 for Done with Saving...
Process [Done with Saving...] took 68.0 millseconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 11723
After MaxAlbums      -> 11723
After MinAlbums      -> 99
After MaxValues      -> 99
Running imap multiprocessing for 99 artists ...


100%|██████████| 99/99 [00:09<00:00, 10.67it/s]


Current Time is Thu Feb 04, 2021 14:47:32 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 47.3kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 47.3kB.
Current Time is Thu Feb 04, 2021 14:47:32 for Done with Saving...
Process [Done with Saving...] took 114.0 millseconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 11624
After MaxAlbums      -> 11624
After MinAlbums      -> 540
After MaxValues      -> 200
Running imap multiprocessing for 200 artists ...


100%|██████████| 200/200 [00:18<00:00, 10.89it/s]


Current Time is Thu Feb 04, 2021 14:47:52 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 89.7kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 89.7kB.
Current Time is Thu Feb 04, 2021 14:47:52 for Done with Saving...
Process [Done with Saving...] took 216.0 millseconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 11424
After MaxAlbums      -> 11424
After MinAlbums      -> 340
After MaxValues      -> 200
Running imap multiprocessing for 200 artists ...


100%|██████████| 200/200 [00:17<00:00, 11.53it/s]


Current Time is Thu Feb 04, 2021 14:48:10 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 130.8kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 130.8kB.
Current Time is Thu Feb 04, 2021 14:48:11 for Done with Saving...
Process [Done with Saving...] took 328.0 millseconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 11224
After MaxAlbums      -> 11224
After MinAlbums      -> 140
After MaxValues      -> 140
Running imap multiprocessing for 140 artists ...


100%|██████████| 140/140 [00:20<00:00,  6.88it/s]


Current Time is Thu Feb 04, 2021 14:48:32 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 159.2kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 159.2kB.
Current Time is Thu Feb 04, 2021 14:48:32 for Done with Saving...
Process [Done with Saving...] took 396.0 millseconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 11084
After MaxAlbums      -> 11084
After MinAlbums      -> 1179
After MaxValues      -> 250
Running imap multiprocessing for 250 artists ...


100%|██████████| 250/250 [00:25<00:00,  9.86it/s]


Current Time is Thu Feb 04, 2021 14:49:00 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 210.2kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 210.2kB.
Current Time is Thu Feb 04, 2021 14:49:00 for Done with Saving...
Process [Done with Saving...] took 516.0 millseconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 10834
After MaxAlbums      -> 10834
After MinAlbums      -> 929
After MaxValues      -> 250
Running imap multiprocessing for 250 artists ...


100%|██████████| 250/250 [00:20<00:00, 12.41it/s]


Current Time is Thu Feb 04, 2021 14:49:22 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 261.0kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 261.0kB.
Current Time is Thu Feb 04, 2021 14:49:22 for Done with Saving...
Process [Done with Saving...] took 646.0 millseconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 10584
After MaxAlbums      -> 10584
After MinAlbums      -> 679
After MaxValues      -> 250
Running imap multiprocessing for 250 artists ...


100%|██████████| 250/250 [00:22<00:00, 10.93it/s]


Current Time is Thu Feb 04, 2021 14:49:46 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 310.8kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 310.8kB.
Current Time is Thu Feb 04, 2021 14:49:47 for Done with Saving...
Process [Done with Saving...] took 781.0 millseconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 10334
After MaxAlbums      -> 10334
After MinAlbums      -> 429
After MaxValues      -> 250
Running imap multiprocessing for 250 artists ...


100%|██████████| 250/250 [00:30<00:00,  8.07it/s]


Current Time is Thu Feb 04, 2021 14:50:19 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 359.6kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 359.6kB.
Current Time is Thu Feb 04, 2021 14:50:24 for Done with Saving...
Process [Done with Saving...] took 5 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 10084
After MaxAlbums      -> 10084
After MinAlbums      -> 179
After MaxValues      -> 179
Running imap multiprocessing for 179 artists ...


100%|██████████| 179/179 [00:17<00:00, 10.20it/s]


Current Time is Thu Feb 04, 2021 14:50:43 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 394.2kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 394.2kB.
Current Time is Thu Feb 04, 2021 14:50:44 for Done with Saving...
Process [Done with Saving...] took 1 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 9905
After MaxAlbums      -> 9905
After MinAlbums      -> 1601
After MaxValues      -> 500
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [00:47<00:00, 10.50it/s]


Current Time is Thu Feb 04, 2021 14:51:34 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 490.2kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 490.2kB.
Current Time is Thu Feb 04, 2021 14:51:35 for Done with Saving...
Process [Done with Saving...] took 1 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 9405
After MaxAlbums      -> 9405
After MinAlbums      -> 1101
After MaxValues      -> 500
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [00:54<00:00,  9.19it/s]


Current Time is Thu Feb 04, 2021 14:52:30 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 582.7kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 582.7kB.
Current Time is Thu Feb 04, 2021 14:52:32 for Done with Saving...
Process [Done with Saving...] took 1 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 8905
After MaxAlbums      -> 8905
After MinAlbums      -> 601
After MaxValues      -> 500
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [00:59<00:00,  8.39it/s]


Current Time is Thu Feb 04, 2021 14:53:33 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 674.4kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 674.4kB.
Current Time is Thu Feb 04, 2021 14:53:39 for Done with Saving...
Process [Done with Saving...] took 5 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 8405
After MaxAlbums      -> 8405
After MinAlbums      -> 101
After MaxValues      -> 101
Running imap multiprocessing for 101 artists ...


100%|██████████| 101/101 [00:13<00:00,  7.59it/s]


Current Time is Thu Feb 04, 2021 14:53:53 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 693.7kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 693.7kB.
Current Time is Thu Feb 04, 2021 14:53:55 for Done with Saving...
Process [Done with Saving...] took 1 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 8304
After MaxAlbums      -> 8304
After MinAlbums      -> 1909
After MaxValues      -> 500
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [01:09<00:00,  7.21it/s]


Current Time is Thu Feb 04, 2021 14:55:06 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 778.2kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 778.2kB.
Current Time is Thu Feb 04, 2021 14:55:08 for Done with Saving...
Process [Done with Saving...] took 2 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 7804
After MaxAlbums      -> 7804
After MinAlbums      -> 1409
After MaxValues      -> 500
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [01:28<00:00,  5.62it/s]


Current Time is Thu Feb 04, 2021 14:56:38 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 861.3kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 861.3kB.
Current Time is Thu Feb 04, 2021 14:56:42 for Done with Saving...
Process [Done with Saving...] took 3 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 7304
After MaxAlbums      -> 7304
After MinAlbums      -> 909
After MaxValues      -> 500
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [00:56<00:00,  8.90it/s]


Current Time is Thu Feb 04, 2021 14:57:39 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 949.4kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 949.4kB.
Current Time is Thu Feb 04, 2021 14:57:42 for Done with Saving...
Process [Done with Saving...] took 2 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 6804
After MaxAlbums      -> 6804
After MinAlbums      -> 409
After MaxValues      -> 409
Running imap multiprocessing for 409 artists ...


100%|██████████| 409/409 [00:52<00:00,  7.82it/s]


Current Time is Thu Feb 04, 2021 14:58:35 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.0MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.0MB.
Current Time is Thu Feb 04, 2021 14:58:38 for Done with Saving...
Process [Done with Saving...] took 3 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 6395
After MaxAlbums      -> 6395
After MinAlbums      -> 6395
After MaxValues      -> 500
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [01:14<00:00,  6.69it/s]


Current Time is Thu Feb 04, 2021 14:59:56 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.1MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.1MB.
Current Time is Thu Feb 04, 2021 15:00:02 for Done with Saving...
Process [Done with Saving...] took 6 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 5895
After MaxAlbums      -> 5895
After MinAlbums      -> 5895
After MaxValues      -> 500
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [01:18<00:00,  6.40it/s]


Current Time is Thu Feb 04, 2021 15:01:22 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.2MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.2MB.
Current Time is Thu Feb 04, 2021 15:01:25 for Done with Saving...
Process [Done with Saving...] took 3 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 5395
After MaxAlbums      -> 5395
After MinAlbums      -> 5395
After MaxValues      -> 500
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [01:26<00:00,  5.77it/s]


Current Time is Thu Feb 04, 2021 15:02:53 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.3MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.3MB.
Current Time is Thu Feb 04, 2021 15:02:59 for Done with Saving...
Process [Done with Saving...] took 5 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 4895
After MaxAlbums      -> 4895
After MinAlbums      -> 4895
After MaxValues      -> 500
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [01:36<00:00,  5.20it/s]


Current Time is Thu Feb 04, 2021 15:04:36 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.4MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.4MB.
Current Time is Thu Feb 04, 2021 15:04:40 for Done with Saving...
Process [Done with Saving...] took 3 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 4395
After MaxAlbums      -> 4395
After MinAlbums      -> 4395
After MaxValues      -> 500
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [01:16<00:00,  6.50it/s]


Current Time is Thu Feb 04, 2021 15:05:58 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.5MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.5MB.
Current Time is Thu Feb 04, 2021 15:06:04 for Done with Saving...
Process [Done with Saving...] took 5 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 3895
After MaxAlbums      -> 3895
After MinAlbums      -> 3895
After MaxValues      -> 500
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [01:30<00:00,  5.53it/s]


Current Time is Thu Feb 04, 2021 15:07:35 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.6MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.6MB.
Current Time is Thu Feb 04, 2021 15:07:40 for Done with Saving...
Process [Done with Saving...] took 4 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 3395
After MaxAlbums      -> 3395
After MinAlbums      -> 3395
After MaxValues      -> 500
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [02:26<00:00,  3.42it/s]


Current Time is Thu Feb 04, 2021 15:10:07 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.6MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.6MB.
Current Time is Thu Feb 04, 2021 15:10:15 for Done with Saving...
Process [Done with Saving...] took 7 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 2895
After MaxAlbums      -> 2895
After MinAlbums      -> 2895
After MaxValues      -> 500
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [02:17<00:00,  3.64it/s]


Current Time is Thu Feb 04, 2021 15:12:34 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.7MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.7MB.
Current Time is Thu Feb 04, 2021 15:12:40 for Done with Saving...
Process [Done with Saving...] took 5 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 2395
After MaxAlbums      -> 2395
After MinAlbums      -> 2395
After MaxValues      -> 500
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [01:46<00:00,  4.68it/s]


Current Time is Thu Feb 04, 2021 15:14:28 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.8MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.8MB.
Current Time is Thu Feb 04, 2021 15:14:34 for Done with Saving...
Process [Done with Saving...] took 6 seconds.

Sleeping for 10 seconds...

Total                -> 11880
After Ignores        -> 11842
After Known          -> 1895
After MaxAlbums      -> 1895
After MinAlbums      -> 1895
After MaxValues      -> 500
Running imap multiprocessing for 500 artists ...


100%|██████████| 500/500 [02:07<00:00,  3.92it/s]


Current Time is Thu Feb 04, 2021 15:16:43 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.9MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 1.9MB.
Current Time is Thu Feb 04, 2021 15:16:49 for Done with Saving...
Process [Done with Saving...] took 5 seconds.

Sleeping for 10 seconds...

  Setting matchData for Top40
Done.




In [128]:
showMatchedStatus(chartType, mdbmaps, mdbmc)

Matched 10447 / 11880 Artists for Top40 ChartType


## Match Only Using Names

#### Show The Remaining Artists

In [129]:
showRemainingMatches(chartType, mdbmc)

Total                -> 11880
After Ignores        -> 11842
After Known          -> 1395
After MaxAlbums      -> 1395
After MinAlbums      -> 1395
After MaxValues      -> 1395
Total                -> 11880
After Ignores        -> 11842
After DB Matches     -> 4165
After MaxAlbums      -> 4165
After MinAlbums      -> 4165
After MaxValues      -> 4165


#### Try To Match Artists That Were Previously Tested Without A Match

In [130]:
for i in range(1):
    toMatch   = mdbmc.getDataToMatch(chartType, maxValues=10000, maxAlbums=100000, minAlbums=0, useKnown=False, dbMatches=0, ignores=ignoresList)
    if len(toMatch[chartType]) == 0:
        break
    matchItAll(mdbmaps, getThresholdsWithoutAlbums(1.0), toMatch=toMatch, useAlbums=False)
reMatch(mtype=mType)

Total                -> 11880
After Ignores        -> 11842
After DB Matches     -> 4165
After MaxAlbums      -> 4165
After MinAlbums      -> 4165
After MaxValues      -> 4165
Running imap multiprocessing for 4165 artists ...


100%|██████████| 4165/4165 [10:09<00:00,  6.83it/s]


Current Time is Thu Feb 04, 2021 15:27:24 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 2.0MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 2.0MB.
Current Time is Thu Feb 04, 2021 15:27:30 for Done with Saving...
Process [Done with Saving...] took 5 seconds.

Sleeping for 10 seconds...

  Setting matchData for Top40


In [131]:
showMatchedStatus(chartType, mdbmaps, mdbmc)
showRemainingMatches(chartType, mdbmc)

Matched 10447 / 11880 Artists for Top40 ChartType
Total                -> 11880
After Ignores        -> 11842
After Known          -> 1395
After MaxAlbums      -> 1395
After MinAlbums      -> 1395
After MaxValues      -> 1395
Total                -> 11880
After Ignores        -> 11842
After DB Matches     -> 1166
After MaxAlbums      -> 1166
After MinAlbums      -> 1166
After MaxValues      -> 1166


#### Try To Match Artists That Were Not Previously Tested (Low Albums)

In [132]:
toMatch   = mdbmc.getDataToMatch(chartType, maxValues=10000, maxAlbums=100000, minAlbums=0, useKnown=True, ignores=ignoresList)

Total                -> 11880
After Ignores        -> 11842
After Known          -> 1395
After MaxAlbums      -> 1395
After MinAlbums      -> 1395
After MaxValues      -> 1395


In [133]:
matchItAll(mdbmaps, getThresholdsWithoutAlbums(1.0), toMatch=toMatch, useAlbums=False)
reMatch(mtype=mType)

Running imap multiprocessing for 1395 artists ...


100%|██████████| 1395/1395 [02:41<00:00,  8.65it/s]


Current Time is Thu Feb 04, 2021 15:30:26 for Saving...
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 2.3MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 2.3MB.
Current Time is Thu Feb 04, 2021 15:30:33 for Done with Saving...
Process [Done with Saving...] took 6 seconds.

Sleeping for 10 seconds...

  Setting matchData for Top40


In [134]:
showRemainingMatches(chartType, mdbmc)

Total                -> 11880
After Ignores        -> 11842
After DB Matches     -> 1425
After MaxAlbums      -> 1425
After MinAlbums      -> 1425
After MaxValues      -> 1425


#### Extra Artists Info

In [135]:
from extraArtists import extraKnownArtists
extraKnownArtists(mdbmaps, chartType)
showMatchedStatus(chartType, mdbmaps, mdbmc)
showRemainingMatches(chartType, mdbmc)

Could not add artist data from PrimaryKey: 5edcd28a4505dc29e4edebd67052e614
Could not add artist data from PrimaryKey: 927d313b814225911b0cc45f6d101384
Could not add artist data from PrimaryKey: 8869a02f414bf8aa44ffa2491e057c18
Could not add artist data from PrimaryKey: ee06f338805166cc652a5ffa29bfcaa4
Could not add artist data from PrimaryKey: 8f7bba2ad693b7bd7a54e9f5561df5fd
Could not add artist data from PrimaryKey: 4101fb8a2e7d70d355cb78bb670cafb2
Could not add artist data from PrimaryKey: 0edfaa56ae9a7023ef89e6dc038b27e7
Could not add artist data from PrimaryKey: 50eab1669ad5105c900ba050d23f7b37
Could not add artist data from PrimaryKey: 0f4330d2f262dae9ff367c6c07b07ebd
Could not add artist data from PrimaryKey: 08de644904d4cf3503bdb90e1095eb0a
Could not add artist data from PrimaryKey: 2bb54463a4b2619f4cddb21626fd43f3
Could not add artist data from PrimaryKey: 5a56911227b3087c65fa0c64cfc22858
Could not add artist data from PrimaryKey: 94a91dcb424b476f725b107e3d561dd2
Could not ad

#### Unmatched Artists

In [136]:
toMatch   = mdbmc.getDataToMatch(chartType, maxValues=10000, maxAlbums=100000, minAlbums=0, useKnown=False, dbMatches=0, ignores=ignoresList)
len(toMatch[chartType])

Total                -> 11880
After Ignores        -> 11842
After DB Matches     -> 1394
After MaxAlbums      -> 1394
After MinAlbums      -> 1394
After MaxValues      -> 1394


1394

In [99]:
#[item[1]["ArtistName"] for item in toMatch[chartType]]

In [100]:
#toget

In [141]:
updateManDB(2)

  Loading data from /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/mainArtistNameDB.p
  Initializing a fresh DB for main
  There are currently 0 artist keys.
  There are currently 0 renamed artist keys.
  Forced reload of 7224 artist names
Saving 7224 artist keys to /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/mainArtistNameDB.p
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/mainArtistNameDB.p
  --> This file is 190.7kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/mainArtistNameDB.p
  --> This file is 190.7kB.
  Loading data from /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/mainArtistNameDB.p
  There are currently 7224 artist keys.
  There are currently 8423 renamed artist keys.
Current Time is Thu Feb 04, 2021 16:11:27 for Last Ran


In [137]:
#https://www.hungama.com/albums/lang/russian/popular/
manualAppends(toMatch, chartType, minAlbums=0, add=True)

addAllMusic(mdbmaps, "Top40", "629d2284888d95d5ae195ae03a61176c", "")   ### [Miro]
addDeezer(mdbmaps, "Top40", "629d2284888d95d5ae195ae03a61176c", "")    ### [Miro]
### Guba Kontrol Kogato  ,  Avgust E Septemvri  ,  Ubivame S Lyubov  ,  Angel Si Ti  ,  Power  ,  Vurxa Na Planinata  ,  G L Bo  ,  Svetqt E Moj  ,  Slagam Kraj  ,  I Ti Ne Mozhesh Da Me Spresh  ,  Skochi Nad Men  ,  Plamnali Dushi
### Guba Kontrol Kogato  ---> [['Miro']]
### Avgust E Septemvri  ---> [['Miro']]
### Ubivame S Lyubov  ---> [['Miro']]
### Angel Si Ti  ---> [['Miro']]
### Power  ---> [['Kanye West', 'Little Mix & Stormzy', 'UFO 361', 'Miro']]
### Vurxa Na Planinata  ---> [['Miro']]
### G L Bo  ---> [['Miro']]
### Svetqt E Moj  ---> [['Magi & Miro']]
### Slagam Kraj  ---> [['Miro, Krisko & Nevena']]
### I Ti Ne Mozhesh Da Me Spresh  ---> [['Divna, Miro & Krisko']]
### Skochi Nad Men  ---> [['Miro & Dim4ou']]
### Plamnali Dushi  ---> [['Tedi Katsarova & Miro']]


addAllMusic(mdbmaps, "Top40", "0abaf01f580932db364


addAllMusic(mdbmaps, "Top40", "6269dc570069e12a96dddc0a674e4ff2", "")   ### [Charlotte]
addDeezer(mdbmaps, "Top40", "6269dc570069e12a96dddc0a674e4ff2", "")    ### [Charlotte]
### Broken Heart  ,  Million Miles Away
### Broken Heart  ---> [['Charlotte']]
### Million Miles Away  ---> [['Charlotte']]


addAllMusic(mdbmaps, "Top40", "226f112b3c8db5a0b07e976b2a58cf91", "")   ### [Liliana]
addDeezer(mdbmaps, "Top40", "226f112b3c8db5a0b07e976b2a58cf91", "")    ### [Liliana]
### Ibiza For Dreams  ,  Girlfriend
### Ibiza For Dreams  ---> [['Diego Miranda & Liliana']]
### Girlfriend  ---> [['*NSYNC', 'Alicia Keys', 'B2K', 'Avril Lavigne', 'Bow Wow & Omarion', 'Icona Pop', 'The Darkness', 'Diego Miranda & Ana Free', 'Abraham Mateo']]


addAllMusic(mdbmaps, "Top40", "b0e8915fa1946cafcbb743ec3f4a6ea9", "")   ### [Charizma]
addDeezer(mdbmaps, "Top40", "b0e8915fa1946cafcbb743ec3f4a6ea9", "")    ### [Charizma]
### Waiting (here For You)  ,  Where Do You Go?
### Waiting (here For You)  ---> [['Charizm

addAllMusic(mdbmaps, "Top40", "b6788f28e079b590d50ce93e598d04a2", "")   ### [Claudia]
addDeezer(mdbmaps, "Top40", "b6788f28e079b590d50ce93e598d04a2", "")    ### [Claudia]
### Zalele
### Zalele  ---> [['Claudia, Asu & Ticy']]


addAllMusic(mdbmaps, "Top40", "2e3f02afa7a68bce9bbce351dda49015", "")   ### [Absolutni Jivotni]
addDeezer(mdbmaps, "Top40", "2e3f02afa7a68bce9bbce351dda49015", "")    ### [Absolutni Jivotni]
### Dai Znak Pak
### Dai Znak Pak  ---> [['Absolutni Jivotni & Mihaela Fileva']]


addAllMusic(mdbmaps, "Top40", "6271f8440a1dfb5527b2a40c78fa7a3f", "")   ### [A.Lorak]
addDeezer(mdbmaps, "Top40", "6271f8440a1dfb5527b2a40c78fa7a3f", "")    ### [A.Lorak]
### Zerkala
### Zerkala  ---> [['A.Lorak & G.Leps']]


addAllMusic(mdbmaps, "Top40", "62a0ff850af820c33d01694f615c4acc", "")   ### [Comite Caviar]
addDeezer(mdbmaps, "Top40", "62a0ff850af820c33d01694f615c4acc", "")    ### [Comite Caviar]
### Para Os Bracos Da Minha Mae
### Para Os Bracos Da Minha Mae  ---> [['Pedro Abrunhosa, 


addAllMusic(mdbmaps, "Top40", "7776ae1acd2b2a73f7a16bce8d0f6e44", "")   ### [Freddy Genius]
addDeezer(mdbmaps, "Top40", "7776ae1acd2b2a73f7a16bce8d0f6e44", "")    ### [Freddy Genius]
### Cannabus 2011
### Cannabus 2011  ---> [['Simena & Freddy Genius']]


addAllMusic(mdbmaps, "Top40", "4590938b7ddf7fa60bdefec905a27557", "")   ### [Simena]
addDeezer(mdbmaps, "Top40", "4590938b7ddf7fa60bdefec905a27557", "")    ### [Simena]
### Cannabus 2011
### Cannabus 2011  ---> [['Simena & Freddy Genius']]


addAllMusic(mdbmaps, "Top40", "fa44a92838d241225dba615e3802bafa", "")   ### [O-Bee]
addDeezer(mdbmaps, "Top40", "fa44a92838d241225dba615e3802bafa", "")    ### [O-Bee]
### All Around The World
### All Around The World  ---> [['Justin Bieber & Ludacris', 'Lionel Richie', 'O-Bee & Genevieve Jackson', 'Mindless Behavior']]


addAllMusic(mdbmaps, "Top40", "d51ae82a562a3fc1364d343c544479a6", "")   ### [Sophie]
addDeezer(mdbmaps, "Top40", "d51ae82a562a3fc1364d343c544479a6", "")    ### [Sophie]
### Romeo



addAllMusic(mdbmaps, "Top40", "50397a3700444ddb1a3e64e992f4342a", "")   ### [Cartel]
addDeezer(mdbmaps, "Top40", "50397a3700444ddb1a3e64e992f4342a", "")    ### [Cartel]
### Cartel
### Cartel  ---> [['Cartel']]


addAllMusic(mdbmaps, "Top40", "261705c47441e9190410314a058e8ee7", "")   ### [Stermann]
addDeezer(mdbmaps, "Top40", "261705c47441e9190410314a058e8ee7", "")    ### [Stermann]
### Das Schonste Ding Der Welt
### Das Schonste Ding Der Welt  ---> [['Stermann & Grissemann']]


addAllMusic(mdbmaps, "Top40", "d2900981348639b36dc2da79b31b9e75", "")   ### [Blake]
addDeezer(mdbmaps, "Top40", "d2900981348639b36dc2da79b31b9e75", "")    ### [Blake]
### Blake
### Blake  ---> [['Blake']]


addAllMusic(mdbmaps, "Top40", "8d695e1bf7ebe9de817bb8e26f735aea", "")   ### [Ray Quinn]
addDeezer(mdbmaps, "Top40", "8d695e1bf7ebe9de817bb8e26f735aea", "")    ### [Ray Quinn]
### Doing It My Way
### Doing It My Way  ---> [['Ray Quinn']]


addAllMusic(mdbmaps, "Top40", "4d76e136c5003c329a8ffefd8d967112", "")

In [144]:


mdbmaps["Top40"].save()

Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 2.3MB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicdb/dbTop40Map.p
  --> This file is 2.3MB.


# Renames

In [103]:
toget, dbRenames = analyzeMatches(maindb, mdbmaps, chartType, maxVal=1.0, diffVal=1.0)
analyzeRenames(manDB, dbRenames)

Artist ID [1210215] is not a member of artistIDToName.
Artist ID [0003712369] is not a member of artistIDToName.
Artist ID [0003886957] is not a member of artistIDToName.
Artist ID [0003350306] is not a member of artistIDToName.
Artist ID [6339] is not a member of artistIDToName.
[RH                             0.5                              Rh] 	 --> (Spotify) 9e511416cd94110a0daca4e09c3c717b / 869166 (Deezer)
Artist ID [351626] is not a member of artistIDToName.
Artist ID [0003895005] is not a member of artistIDToName.
[Simone & Simaria               0.94               Simone e Simaria] 	 --> (Spotify) 5142c5298a98f5e02d15bd1b37668219 / 0003427498 (AllMusic)
Artist ID [0003831796] is not a member of artistIDToName.
Artist ID [1204851] is not a member of artistIDToName.
Artist ID [0003572540] is not a member of artistIDToName.
[FOOL                           0.25                           Fool] 	 --> (Spotify) d8aa0eef0707f16dde1ed5d8dc4e88fc / 270477 (Deezer)
Artist ID [0003736648]

In [118]:
#toget

In [116]:
updateManDB(4)

  Loading data from /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/mainArtistNameDB.p
  There are currently 7198 artist keys.
  There are currently 8394 renamed artist keys.
Trying to add 26 renamed artist keys
There are currently 7198 artist keys.
There are currently 8394 renamed artist keys.
There are currently 7222 artist keys.
There are currently 8420 renamed artist keys.
Saving 7222 artist keys to /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/mainArtistNameDB.p
Saving data to /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/mainArtistNameDB.p
  --> This file is 190.6kB.
Saved data to /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/mainArtistNameDB.p
  --> This file is 190.6kB.
  Loading data from /Users/tgadfort/opt/anaconda3/envs/py37/musicnames/mainArtistNameDB.p
  There are currently 7222 artist keys.
  There are currently 8420 renamed artist keys.
Current Time is Thu Feb 04, 2021 14:30:26 for Last Ran


In [None]:
removeDBRenames(mdbmaps, chartType, dbRenames)

In [None]:
updateManDB(4)

In [None]:
cad.getAlbumsData("Rise")

In [None]:


mdbmaps["BillboardYE"].save()

In [None]:
cad.getAlbumsData("George Frideric Handel: Messiah")

In [None]:
updateManDB(2)

In [None]:
updateManDB(2)

In [None]:


mdbmaps["BillboardYE"].save()

In [None]:
updateManDB(2)

In [None]:
cad.getAlbumsData("Common Ground")

In [None]:
updateManDB(2)

# Analyze Matches

In [None]:
toget

In [None]:
updateManDB(2)

In [None]:
updateManDB(4)

In [None]:
print([item[1]["ArtistName"] for item in toMatch["Top40"]])

In [None]:
saveData = []
for fullName in sorted(list(cad.manyArtists.keys())):
    appendName = " & ".join(["[{0}]".format(manDB.renamed(indivName)) for indivName in sorted(list(cad.manyArtists[fullName]))])
    saveData.append([fullName, {"Guess": appendName, "Truth": ''}]) # = list(cad.manyArtists[fullName].keys())
saveFile(idata=saveData, ifile="tmp.yaml")

In [None]:
fixedData = getFile("tmp.yaml")

In [None]:
multis = []
multirenames = {}
for item in fixedData:
    if len(item[1]["Truth"]) > 0:
        name = item[1]["Truth"]
        if len(name.split("] & [")) == 1:
            multis.append(item[1]["Truth"][1:-1])
        else:
            rename = name[1:-1].split("] & [")
            multirenames[item[0]] = " ::: ".join(rename)
        
print("Found {0} renames".format(len(multirenames)))
print("Found {0} multi-artists".format(len(multis)))

#### Save MultiArtists (if found)

In [None]:
knownFilename = "../multiartist/knownMultiArtists.yaml"
knownMultis = getFile(knownFilename)
print(len(multis))
print(len(knownMultis))
knownMultis += multis
print(len(knownMultis))
knownMultis = sorted(list(set(knownMultis)))
print(len(knownMultis))
saveFile(idata=knownMultis, ifile=knownFilename)

#### Save multi renames (if needed)

In [None]:
multiManDB = masterArtistNameDB("multi", init=False)
#multiManDB.forceReload(getFile("main.yaml"))
multiManDB.addRenames(multirenames)

In [None]:
multiManDB = masterArtistNameDB("multi", init=False)
multiManDB.addRenames(multirenames)
multiManDB.save()
saveFile(idata=multiManDB.getRenames(), ifile="multi.yaml")
multiManDB = masterArtistNameDB("multi", init=True)
multiManDB.forceReload(getFile("multi.yaml"))
multiManDB.checkForRecursives()
multiManDB = masterArtistNameDB("multi", init=False)

In [None]:
#### Edit main.yaml (if needed)

In [None]:
mdbmaps["Billboard"].getDF()

In [None]:
mdf = mdbmaps["BillboardYE"].getDF()
mdf.shape
#mdf[mdf["DBMatches"] == 0]

# Multi Match

In [None]:
cad.getTypeArtistAlbumData("Joe Rene")

In [None]:
toMatch["Billboard"]

In [None]:
manualAppends(cad, mType, toMatch, chartType, minAlbums=0, add=True)

In [None]:
mdbmc.getMasterDF("Billboard")

In [None]:
matchItAll(mdbmaps, getThresholdsWithoutAlbums(0.85), toMatch=toMatch, useAlbums=False)
reMatch()

In [None]:
#singleArtistAlbumData["Billboard"]['Ernest']
saveFile(idata=[x[1]["ArtistName"] for x in toMatch["Top40"]], ifile="toget.p")

# Find Near Artist Name Matches

In [None]:
toget, dbRenames = analyzeMatches(maindb, mdbmaps, chartType, maxVal=1.0, diffVal=1.0)

In [None]:
if len(dbRenames) > 0:
    analyzeRenames(manDB, dbRenames)
    # Test for overap with manDB

    dbRenames = getFile(ifile="relDBRenames2.yaml")
    redos = {}
    dels  = []
    for oldername,bestname in dbRenames.items():
        if oldername in manDB.artistNameDB.keys(): # and bestname not in manDB.artistNameDB.keys():
            #print("Older=[{0}] \t Best=[{1}]".format(oldername,bestname))

            redos[bestname] = oldername
            dels.append(oldername)
            #print('PROBLEM ({0} in master DB): \t{1}  -->  {2}'.format(oldername, oldername, bestname))
            try:
                matchOlderData = mdbmaps[chartType].getArtistDataByName(oldername).getDict()
            except:
                matchOlderData = {}

            try:
                matchBestData  = mdbmaps[chartType].getArtistDataByName(bestname).getDict()
            except:
                matchBestData = {}

            #print("Older: {0}".format(matchOlderData))
            #print("Best:  {0}".format(matchBestData))
            #print("\n")
        else:
            print("{0}: {1}".format(oldername, bestname))

    print("# Renames: {0}".format(len(dbRenames)))
    print("# Dels: {0}".format(len(dels)))

In [None]:
manDB = masterArtistNameDB("main", init=True)
manDB.forceReload(getFile("main.yaml"))
manDB.addRenames(getFile("relDBRenames2.yaml"))
manDB.checkForRecursives()

In [None]:
manDB = masterArtistNameDB("main", init=True)
manDB.forceReload(getFile("main.yaml"))
manDB.addRenames(getFile("relDBRenames2.yaml"))
manDB.checkForRecursives()
manDB.save()
manDB = masterArtistNameDB("main", init=False)
saveFile(idata=manDB.dbRenames, ifile="main.yaml")
removeDBRenames(mdbmaps, chartType, dbRenames)

In [None]:
removeDBRenames(mdbmaps, chartType, dbRenames)

In [None]:
vals="""
[Sander Van Doom                0.9                Sander Van Doorn] 	 --> (Top40) 27813b44b4282de50171e13adb4ebcf9 / 183267 (Discogs)
"""

lines=vals.split("\n")
lines2=[x.split("({0}) ".format(chartType))[1] for x in lines if len(x) > 0]
names2=[x.split("({0}) ".format(chartType))[0] for x in lines if len(x) > 0]
lines3={x.split(" / ")[0]: x[x.find("(")+1:-1] for x in lines2}
names3=[x.split("    ")[0].strip()[1:] for x in names2]
if len(names3) != len(lines3):
    raise ValueError("Err")

for i,(k,v) in enumerate(lines3.items()):
    artistName = names3[i]
    #print("mdbmaps[chartType].addArtistDataByID(\'{0}\', \'{1}\', \'{2}\')".format(k,v,))
    print("### ----> {0}".format(artistName))
    print("mdbmaps[chartType].addArtistDataByID(\'{0}\', \'{1}\', None)".format(k,v))    
    albums = singleArtistAlbumData[chartType][artistName]
    print("### {0}".format("  ,  ".join(albums)))
    print("### ignores.append(\"{0}\")".format(artistName))
    print("\n")
    
    
#mdbmaps[chartType].addArtistDataByID("e309c2e3fc905eae304b71e10e82eb99", "AllMusic", '0000803469')
print("")
print("mdbmaps[chartType].save()")
print("saveFile(idata=list(set(ignores)), ifile=\"chartIgnores.yaml\")")

In [None]:
### ----> Sander Van Doom
mdbmaps[chartType].addArtistDataByID('27813b44b4282de50171e13adb4ebcf9', 'Discogs', None)
### Grasshopper
### ignores.append("Sander Van Doom")



mdbmaps[chartType].save()
saveFile(idata=list(set(ignores)), ifile="chartIgnores.yaml")

In [None]:
toget

****
****
****
****

# Multis

In [None]:
ignores.append('Daryl Hall & John Oates')

In [None]:
from multiArtist import multiartist
mularts  = multiartist(cutoff=0.9, discdata=None, exact=False)
knownMultiArtists = getFile("../multiartist/knownMultiArtists.yaml")
print(len(knownMultiArtists))
print(len(mdbmaps[chartType].getArtists()))
for idx,artistName in mdbmaps[chartType].getArtists().items():
    N = mularts.getArtistNames(artistName)
    if len(N) > 1:
        result = {subName: mdbmaps[chartType].isKnownByName(subName) for subName in N}
        if not any(result.values()):
            knownMultiArtists.append(artistName)
            #print(result)
        #print("ignores.append(\'{0}\')".format(artistName))

print(len(knownMultiArtists))
knownMultiArtists = list(set(knownMultiArtists))
print(len(knownMultiArtists))
        
#saveFile(idata = sorted(knownMultiArtists), ifile="../multiartist/knownMultiArtists.yaml")

In [None]:
ignores = getFile("chartIgnores.yaml")
dels = []
for idx,artistName in mdbmaps[chartType].getArtists().items():
    if artistName in ignores:
        dels.append([idx,artistName])
        
print(len(dels))
for idx,artistName in dels:
    mdbmaps[chartType].removeArtistByID(idx)
    
mdbmaps[chartType].save()

In [None]:
len(dels)

# Functions

In [None]:
class matchthresholds:
    def __init__(self):
        thresholds = {}
        thresholds[1000] = {'numArtistName': 1, 'artistNameCutoff': 0.95, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': int(200/8), 'score': 10.0}
        thresholds[500]  = {'numArtistName': 1, 'artistNameCutoff': 0.95, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': int(200/8), 'score': 5.0}
        thresholds[200]  = {'numArtistName': 1, 'artistNameCutoff': 0.95, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': int(200/8), 'score': 2.5}
        thresholds[100]  = {'numArtistName': 1, 'artistNameCutoff': 0.95, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': int(100/8), 'score': 1.5}
        thresholds[50]   = {'numArtistName': 2, 'artistNameCutoff': 0.95, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': int(50/8), 'score': 1.5}
        thresholds[20]   = {'numArtistName': 2, 'artistNameCutoff': 0.95, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': 3, 'score': 1.5}
        thresholds[10]   = {'numArtistName': 5, 'artistNameCutoff': 0.90, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': 2, 'score': 1.5}
        thresholds[5]    = {'numArtistName': 5, 'artistNameCutoff': 0.90, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': 2, 'score': 1.5}
        thresholds[3]    = {'numArtistName': 5, 'artistNameCutoff': 0.90, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': 2, 'score': 1.5}
        thresholds[2]    = {'numArtistName': 5, 'artistNameCutoff': 0.90, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': 1, 'score': 2.0}
        thresholds[1]    = {'numArtistName': 5, 'artistNameCutoff': 0.90, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': 1, 'score': 0.9}
        self.thresholds = thresholds

        iterItems = {20: {"Max": 50, "Vals": 100}}
        iterItems.update({10: {"Max": 20, "Vals": 200}})
        iterItems.update({50: {"Max": 10000, "Vals": 50}})
        iterItems.update({5: {"Max": 10, "Vals": 250}})
        iterItems.update({3: {"Max": 5, "Vals": 500}})
        iterItems.update({2: {"Max": 3, "Vals": 500}})
        iterItems.update({1: {"Max": 2, "Vals": 500}})
        self.iterItems = iterItems

        self.thresholdsNoAlbums = {'numArtistName': 5, 'artistNameCutoff': 0.9, 'artistAlbumCutoff': None, 'numArtistAlbums': None, 'score': None}
    
    def getThresholdsWithAlbums(self, minAlbums):
        if self.thresholds.get(minAlbums) is not None:
            return self.thresholds[minAlbums]
        
    def getThresholdsWithoutAlbums(self, cutoff):
        self.thresholdsNoAlbums["artistNameCutoff"] = cutoff
        return self.thresholdsNoAlbums
    
    def getIterItems(self):
        return self.iterItems
        

In [None]:
from functools import partial
from matchDBArtist import matchDBArtist

class multimatch:
    def __init__(self, maindb, mdbmc, debug=False):
        self.debug  = debug
        self.maindb = maindb
        self.mdbmc  = mdbmc
        
        self.mt = matchthresholds()
        
        self.toMatch = None
        self.dbName  = None
        self.maxMatches = None
        
        self.matchFunc = {True: self.matchDBArtistWithAlbums, False: self.matchDBArtistWithoutAlbums}
            
        
    def setMDBMap(self, dbName, mdbmap):
        self.mdbmap = mdbmap
        self.dbName = dbName
    
    
    def setDataToMatch(self, toMatch):
        if toMatch is not None:
            self.toMatch = toMatch
            
            
    def setMaxMatches(self, maxMatches):
        self.maxMatches = maxMatches
        
        
    ############################################################################################################
    ## Set Data To Be Matched
    ############################################################################################################
    def setData(self, useAlbums=True, minAlbums=None, maxValues=None, cutoff=None):
        self.useAlbums = useAlbums
        print("HI")
        if useAlbums is True:
            if minAlbums is not None:
                self.thresholds = self.mt.getThresholdsWithAlbums(minAlbums)
                iterItems = self.mt.getIterItems()[minAlbums]
                maxAlbums = iterItems["Max"]
                if maxValues is None:
                    maxValues = thresholds["Vals"]

                self.toMatch   = mdbmc.getDataToMatch(self.dbName, maxValues=maxValues, maxAlbums=maxAlbums, minAlbums=minAlbums, ignores=[])
                print("Found {0} Artists To Match With Albums and Using Thresholds: {1}".format(len(self.toMatch[self.dbName]), self.thresholds))
            else:
                raise ValueError("Must supply a minAlbums value")
        else:
            self.thresholds = self.mt.getThresholdsWithoutAlbums(cutoff)
            if maxValues is None:
                maxValues = 100000
            self.toMatch   = mdbmc.getDataToMatch(self.dbName, maxValues=maxValues, maxAlbums=100000, minAlbums=0, ignores=[])
            print("Found {0} Artists To Match Without Albums and Without Using Thresholds".format(len(self.toMatch[self.dbName])))
            
    
    


    def matchItAll(self):
        num_processes = 3
        func       = self.matchFunc[self.useAlbums]
        thresholds = self.thresholds
        pfunc = partial(func, **thresholds) # Giving some arguments for kwargs
        argument_list = self.toMatch[self.dbName]
        print("Args: ",argument_list)
        
        if len(argument_list) == 0:
            return
        print("Using {0} Matching Function".format(pfunc))
        print("Running imap multiprocessing for {0} artists ...".format(len(argument_list)))
        result_list = self.multiProc(func=pfunc, argument_list=argument_list,
                                               num_processes=num_processes)
        return result_list
        

    def matchDBArtistWithoutAlbums(self, item, *args, **kwargs):
        return 1

        print("Item ==>",item)
        1/0
        primaryKey   = item[0]
        artistData   = item[1]
        artistName   = artistData["ArtistName"]
        artistID     = primaryKey
        artistAlbums = None
        mdbMatcher = matchDBArtist(self.maindb)
        mdbMatcher.setArtistInfo(artistName, artistID, artistAlbums)
        mdbMatcher.setThresholds(matchNumArtistName=kwargs['numArtistName'], matchArtistNameCutoff=kwargs['artistNameCutoff'],
                                 matchArtistAlbumCutoff=kwargs['artistAlbumCutoff'], matchNumArtistAlbums=kwargs['numArtistAlbums'],
                                 matchScore=kwargs['score']),
        mcs    = mdbMatcher.findPotentialArtistNameMatchesWithoutAlbums()
        retval = [primaryKey,artistName,artistID,mcs]
        return retval


    def matchDBArtistWithAlbums(self, item, *args, **kwargs):    
        #time.sleep(0.0025)

        return 1
        print("Item ==>",item)
        1/0
        

        primaryKey   = item[0]
        artistData   = item[1]
        artistName   = artistData["ArtistName"]
        artistID     = primaryKey
        artistAlbums = artistData["ArtistAlbums"]

        mdbMatcher = matchDBArtist(self.maindb)
        mdbMatcher.setArtistInfo(artistName, artistID, artistAlbums)
        mdbMatcher.setThresholds(matchNumArtistName=kwargs['numArtistName'], matchArtistNameCutoff=kwargs['artistNameCutoff'], 
                                 matchArtistAlbumCutoff=kwargs['artistAlbumCutoff'], matchNumArtistAlbums=kwargs['numArtistAlbums'],
                                 matchScore=kwargs['score'])
        mcs    = mdbMatcher.findPotentialArtistAlbumMatches()
        retval = [primaryKey,artistName,artistID,mcs]
        return retval


    def multiProc(self, func, argument_list, num_processes):
        pool = Pool(processes=num_processes)
        result_list_tqdm = []
        for result in tqdm(pool.imap(func=func, iterable=argument_list), total=len(argument_list)):
            result_list_tqdm.append(result)
        return result_list_tqdm


In [None]:
mm = multimatch(maindb, mdbmc)
mm.setMDBMap("Billboard", mdbmaps["Billboard"])

In [None]:
mm.setData(useAlbums=True, minAlbums=10, maxValues=1)

In [None]:
mm.matchItAll()

In [None]:
ignoresList = ["chartIgnores.yaml"]
for minAlbums,minAlbumsData in getIterItems().items():
    print("="*10,minAlbums)
    maxAlbums = minAlbumsData["Max"]
    maxValues = minAlbumsData["Vals"]
    for i in range(10):
        toMatch   = mdbmc.getDataToMatch(chartType, maxValues=maxValues, maxAlbums=maxAlbums, minAlbums=minAlbums, ignores=ignoresList)
        if len(toMatch[chartType]) == 0:
            break
        matchItAll(mdbmaps, getThresholds(minAlbums), toMatch=toMatch)
        
reMatch()
print("Done.\n\n")

In [None]:
from billboardCharts import billboardCharts
from fileUtils import getBasename, getDirname, getBaseFilename
from timeUtils import clock, elapsed
from webUtils import getHTML, getWebData
from timeUtils import getDateTime, isDate
from listUtils import getFlatList
from ioUtils import saveJoblib, loadJoblib, saveFile, getFile
from os.path import join
from searchUtils import findExt
import urllib
from time import sleep
from collections import Counter
from artistIgnores import getArtistIgnores

from billboardCharts import billboardCharts
from top40Charts import top40Charts

class billboardFiles:
    def __init__(self, basedir):
        self.basedir = basedir
        self.source  = "Billoard"

    def findFiles(self):
        savedir = join(self.basedir, "data", "billboard", "results")
        self.files   = findExt(savedir, ext='.p')
        print("Found {0} {1} files.".format(len(self.files), self.source))
        self.files = {getBaseFilename(x).replace("/", " "): x for x in self.files}
        return self.files
    
    
        for ifile in self.files:
            fdata = getFile(ifile)
            for chartName, cnameResults in fdata.items():
                if chartName not in self.charts:
                    continue
    
    
class billboardFullChart:
    def __init__(self, basedir, charts):
        self.basedir = basedir
        self.source  = "Billboard"
        self.charts  = charts
        self.tfiles  = billboardFiles(self.basedir)
        self.files   = self.tfiles.findFiles()
        
        self.fullChartData = {}
        
    def filterFiles(self):
        files = {}
        for chartName, ifile in self.files.items():
            if chartName in self.charts:
                files[chartName] = ifile
        print("Keeping {0}/{1} Charts".format(len(files), len(self.files)))


class top40files:
    def __init__(self, basedir):
        self.basedir = basedir
        self.source  = "Top40"

    def findFiles(self):
        savedir = join(self.basedir, "data", "top40")
        self.files   = findExt(savedir, ext='.p')         
        print("Found {0} {1} files.".format(len(self.files), self.source))
        self.files = {getBaseFilename(x).replace("/", " "): x for x in self.files}
        return self.files
    
    
class top40FullChart:
    def __init__(self, basedir, charts):
        self.basedir = basedir
        self.source  = "Top40"
        self.charts  = charts
        self.tfiles  = top40files(self.basedir)
        self.files   = self.tfiles.findFiles()
        
        self.fullChartData = {}
        
    def filterFiles(self):
        files = {}
        for chartName, ifile in self.files.items():
            if chartName in self.charts:
                files[chartName] = ifile
        print("Keeping {0}/{1} Charts".format(len(files), len(self.files)))


class top40files:
    def __init__(self, basedir):
        self.basedir = basedir
        self.source  = "Top40"

    def findFiles(self):
        savedir = join(self.basedir, "data", "top40")
        self.files   = findExt(savedir, ext='.p')         
        print("Found {0} {1} files.".format(len(self.files), self.source))
        self.files = {getBaseFilename(x).replace("/", " "): x for x in self.files}
        return self.files
    
    
class top40FullChart:
    def __init__(self, basedir, charts):
        self.basedir = basedir
        self.source  = "Top40"
        self.charts  = charts
        self.tfiles  = top40files(self.basedir)
        self.files   = self.tfiles.findFiles()        
        self.fullChartData = {}
        
    def filterFiles(self):
        files = {}
        for chartName, ifile in self.files.items():
            if chartName in self.charts:
                files[chartName] = ifile
        print("Keeping {0}/{1} Charts".format(len(files), len(self.files)))
        
    
        
    def setFullChartData(self):
        fullChartData = {}
        renameStats   = Counter()
        
        self.findFiles()
        if len(self.files) == 0:
            raise ValueError("There are no files. Something is wrong...")
        self.files = {getBaseFilename(x).replace("/", " "): x for x in self.files}
        
        for chartName, ifile in self.files.items():
            if chartName not in self.charts:
                continue
            print("==> {0: <40}".format(chartName), end="\t")
            #t40chart = top40chart(chartID, chartName, chartURL)
            chartResults = getFile(ifile)

            for date, values in chartResults.items():
                if self.minYear is not None:
                    if getDateTime(date).year < int(self.minYear):
                        continue
                if self.maxYear is not None:
                    if getDateTime(date).year > int(self.maxYear):
                        continue

                        
                for i,item in enumerate(values):
                    artist = item["Artist"]
                    renamedArtist = artist
                    for testArtist in self.artistRenames.keys():
                        if artist.find(testArtist) != -1:
                            tmp = renamedArtist
                            renamedArtist = renamedArtist.replace(testArtist, self.artistRenames.get(testArtist))
                            #print("{0}  <---- From ---- {1}".format(renamedArtist, tmp))
                            renameStats[renamedArtist] += 1
                            artist = renamedArtist
                    
                    if self.dbRenames.get(artist) is not None:
                        renamedArtist = self.dbRenames[artist]
                        renameStats[renamedArtist] += 1
                        artist = renamedArtist
                    

                    artist = artist.replace("\r", "")                    
                    
                    ignoreStatus = getArtistIgnores(artist)
                    if ignoreStatus is False:
                        continue
                    
                    album  = item["Album"]
                    if album in ["Soundtrack"]:
                        continue

                    if fullChartData.get(artist) is None:
                        fullChartData[artist] = {"Songs": {}, "Albums": {}}
                    if chartName.endswith("Albums"):
                        key = "Albums"
                    else:
                        key = "Songs"
                    if fullChartData[artist][key].get(album) is None:
                        fullChartData[artist][key][album] = {}
                    if fullChartData[artist][key][album].get(chartName) is None:
                        fullChartData[artist][key][album][chartName] = {}
                    fullChartData[artist][key][album][chartName][date] = i
            print(len(fullChartData))
        self.fullChartData = fullChartData
        
        if self.artistRenames is not None:
            print("Renamed {0} artists".format(len(renameStats)))
            print("Most Common Artists:")
            for item in renameStats.most_common(5):
                print(item)


class chartData:
    def __init__(self, source, minYear=None, maxYear=None, country=None, debug=False):
        debug=False    
        self.source = source
        
        self.basedir  = "/Volumes/Piggy/Charts/"
        self.basename = source
        

        try:
            self.cts    = {"Billboard": billboardCharts(), "Top40": top40Charts()}[source]
            self.cFiles = {"Billboard": billboard40files(self.basedir), "Top40": top40files(self.basedir)}[source]
            self.files  = self.cFiles.findFiles()
        except:
            raise ValueError("Could not create charts data for {0}".format(source))

                        
        self.charts = []
            
        self.minYear   = minYear
        self.maxYear   = maxYear
        
        self.artistRenames   = {}
        self.dbRenames       = {}

        self.chartData       = {}
        self.fullChartData   = {}
        self.artistAlbumData = {}
        
        
    #####################################################################################################################################
    ## Artist Data
    #####################################################################################################################################
    def getArtists(self):
        return list(self.artistAlbumData.keys())
        
        
    
    #####################################################################################################################################
    ## Full Chart Data
    #####################################################################################################################################
    def getFullChartDataFilename(self):
        ifile="current{0}FullChartArtistAlbumData.p".format(self.basename)
        return ifile

    def getFullChartData(self):
        return getFile(self.getFullChartDataFilename())
        
    def saveFullChartData(self):
        print("Saving {0} Full Artist Data".format(len(self.fullChartData)))
        saveFile(idata=self.fullChartData, ifile=self.getFullChartDataFilename(), debug=True)        
        
        
    
    #####################################################################################################################################
    ## Artist Album Data
    #####################################################################################################################################
    def getArtistAlbumDataFilename(self):
        ifile="current{0}ArtistAlbumData.p".format(self.basename)
        return ifile
    
    def setArtistAlbumData(self):
        self.artistAlbumData = {artist: list(artistData["Songs"].keys()) + list(artistData["Albums"].keys()) for artist,artistData in self.fullChartData.items()}
        
    
    def getArtistAlbumData(self):
        return getFile(self.getArtistAlbumDataFilename())
        
    def saveArtistAlbumData(self):
        print("Saving {0} Artist Album Data to {1}".format(len(self.artistAlbumData), self.getArtistAlbumDataFilename()))
        saveFile(idata=self.artistAlbumData, ifile=self.getArtistAlbumDataFilename(), debug=True)  
        
        
    
    #####################################################################################################################################
    ## Rename Data
    #####################################################################################################################################        
    def setRenames(self, artistRenames):
        self.artistRenames = artistRenames
        
    def setDBRenames(self, dbRenames):
        self.dbRenames = dbRenames
        
        
    
    #####################################################################################################################################
    ## Find Files
    #####################################################################################################################################
    def setChartUsage(self, name=None, rank=None):
        if rank is not None:
            if isinstance(rank, list):
                for item in rank:
                    self.charts += self.cts.getChartsByRank(item)
            elif isinstance(rank, int):
                self.charts += self.cts.getChartsByRank(rank)
        elif name is not None:
            self.charts += self.cts.getCharts(name)
        else:
            self.charts = self.cts.getCharts(None)
        if name is None:
            name = "None"
        print("  Using Charts ({0}): {1}".format(name, self.charts))
        
        
        
                
    #####################################################################################################################################
    ## Set Artist Album Data
    #####################################################################################################################################
    def setArtistAlbumData(self):
        self.artistAlbumData = {artist: list(artistData["Songs"].keys()) + list(artistData["Albums"].keys()) for artist,artistData in self.fullChartData.items()}
        print("There are {0} unique artist entries".format(len(self.artistAlbumData)))
        

    #####################################################################################################################################
    ## Set Full Chart Data
    #####################################################################################################################################
    def setFullChartData(self):
        {"Billboard": self.setFullChartDataBillboard(), "Top40": self.setFullChartDataTop40()}[self.source]
    
    def setFullChartDataBillboard(self):        
        renameStats  = Counter()
        chartCounter = Counter()
        
        if len(self.files) == 0:
            raise ValueError("There are no files. Something is wrong...")
        
        for ifile in self.files:
            fdata = getFile(ifile)
            for chartName, cnameResults in fdata.items():
                if chartName not in self.charts:
                    continue
                
                for date, dResults in cnameResults.items():
                    if self.minYear is not None:
                        if getDateTime(date).year < int(self.minYear):
                            continue
                    if self.maxYear is not None:
                        if getDateTime(date).year > int(self.maxYear):
                            continue
                    stryear = getDateTime(date).year

                    artist = dResults["Artist"]

                    renamedArtist = artist
                    for testArtist in self.artistRenames.keys():
                        if artist.find(testArtist) != -1:
                            tmp = renamedArtist
                            renamedArtist = renamedArtist.replace(testArtist, self.artistRenames.get(testArtist))
                            #print("{0}  <---- From ---- {1}".format(renamedArtist, tmp))
                            renameStats[renamedArtist] += 1
                            artist = renamedArtist 
                    
                    if self.dbRenames.get(artist) is not None:
                        renamedArtist = self.dbRenames[artist]
                        renameStats[renamedArtist] += 1
                        artist = renamedArtist  

                    ignoreStatus = getArtistIgnores(artist)
                    if ignoreStatus is False:
                        continue


                    chartCounter[chartName] += 1

                    album  = dResults["Name"]

                    if self.chartData.get(artist) is None:
                        self.chartData[artist] = Counter()
                    self.chartData[artist][album] += 1
                    
                    if self.fullChartData.get(artist) is None:
                        self.fullChartData[artist] = {"Songs": {}, "Albums": {}}
                    if chartName.endswith("Albums"):
                        key = "Albums"
                    else:
                        key = "Songs"
                    if self.fullChartData[artist][key].get(album) is None:
                        self.fullChartData[artist][key][album] = {}
                    if self.fullChartData[artist][key][album].get(chartName) is None:
                        self.fullChartData[artist][key][album][chartName] = {}
                    self.fullChartData[artist][key][album][chartName][date] = 0
                #print("{0: <40}{1}".format("{0}-{1}".format(chartName,stryear),len(self.fullChartData)))
                
        
    def setFullChartDataTop40(self):
        fullChartData = {}
        renameStats   = Counter()
        
        #self.findFiles()
        if len(self.files) == 0:
            raise ValueError("There are no files. Something is wrong...")
        self.files = {getBaseFilename(x).replace("/", " "): x for x in self.files}
        
        for chartName, ifile in self.files.items():
            if chartName not in self.charts:
                continue
            print("==> {0: <40}".format(chartName), end="\t")
            #t40chart = top40chart(chartID, chartName, chartURL)
            chartResults = getFile(ifile)

            for date, values in chartResults.items():
                if self.minYear is not None:
                    if getDateTime(date).year < int(self.minYear):
                        continue
                if self.maxYear is not None:
                    if getDateTime(date).year > int(self.maxYear):
                        continue

                        
                for i,item in enumerate(values):
                    artist = item["Artist"]
                    renamedArtist = artist
                    for testArtist in self.artistRenames.keys():
                        if artist.find(testArtist) != -1:
                            tmp = renamedArtist
                            renamedArtist = renamedArtist.replace(testArtist, self.artistRenames.get(testArtist))
                            #print("{0}  <---- From ---- {1}".format(renamedArtist, tmp))
                            renameStats[renamedArtist] += 1
                            artist = renamedArtist
                    
                    if self.dbRenames.get(artist) is not None:
                        renamedArtist = self.dbRenames[artist]
                        renameStats[renamedArtist] += 1
                        artist = renamedArtist
                    

                    artist = artist.replace("\r", "")                    
                    
                    ignoreStatus = getArtistIgnores(artist)
                    if ignoreStatus is False:
                        continue
                    
                    album  = item["Album"]
                    if album in ["Soundtrack"]:
                        continue

                    if fullChartData.get(artist) is None:
                        fullChartData[artist] = {"Songs": {}, "Albums": {}}
                    if chartName.endswith("Albums"):
                        key = "Albums"
                    else:
                        key = "Songs"
                    if fullChartData[artist][key].get(album) is None:
                        fullChartData[artist][key][album] = {}
                    if fullChartData[artist][key][album].get(chartName) is None:
                        fullChartData[artist][key][album][chartName] = {}
                    fullChartData[artist][key][album][chartName][date] = i
            print(len(fullChartData))
        self.fullChartData = fullChartData
        
        if self.artistRenames is not None:
            print("Renamed {0} artists".format(len(renameStats)))
            print("Most Common Artists:")
            for item in renameStats.most_common(5):
                print(item)