# Chart Analysis Functions

In [1]:
## Basic stuff
%load_ext autoreload
%autoreload

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))
#IPython.Cell.options_default.cm_config.lineNumbers = true;

################################################################################
## Python Version
################################################################################
import sys


################################################################################
## General Stuff
################################################################################
from multiprocessing import Pool
from tqdm import tqdm


################################################################################
## Util Stuff
################################################################################
from timeUtils import clock, elapsed
from ioUtils import saveFile, getFile


################################################################################
## Music DB
################################################################################
from mainDB import mainDB
from musicDBMap import musicDBMap
from masterDBMatchClass import masterDBMatchClass
from matchDBArtist import matchDBArtist


################################################################################
## Music Names
################################################################################
from masterArtistNameDB import masterArtistNameDB


################################################################################
## Chart Stuff
################################################################################
from artistIgnores import getArtistIgnores
from billboardData import billboardData
from top40Data import top40Data
from chartArtistAlbumData import chartArtistAlbumData
from chartUtils import *


################################################################################
## Pandas Stuff
################################################################################
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

print("Python: {0}".format(sys.version))
import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))



Python: 3.7.7 (default, Mar 26 2020, 10:32:53) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
Notebook Last Run Initiated: 2021-01-15 00:29:13.845413


# Rename Input Data

In [None]:
manDB      = masterArtistNameDB("main", init=False)
multimanDB = masterArtistNameDB("multi", init=False)

def updateManDB(step=1):
    if step == 1:
        tmpmanDB = masterArtistNameDB("main", init=True)
        tmpmanDB.forceReload(getFile("main.yaml"))
        #manDB.addRenames(getFile("relDBRenames2.yaml"))
        tmpmanDB.checkForRecursives()

    if step == 2:
        tmpmanDB = masterArtistNameDB("main", init=True)
        tmpmanDB.forceReload(getFile("main.yaml"))
        #manDB.addRenames(getFile("relDBRenames2.yaml"))
        tmpmanDB.save()
        
        manDB = masterArtistNameDB("main", init=False)
        saveFile(idata=manDB.getRenames(), ifile="main.yaml")
        
    if step == 3:
        tmpmanDB = masterArtistNameDB("main", init=False)
        tmpmanDB.addRenames(getFile("relDBRenames2.yaml"))
        tmpmanDB.checkForRecursives()
        
    if step == 4:
        tmpmanDB = masterArtistNameDB("main", init=False)
        tmpmanDB.addRenames(getFile("relDBRenames2.yaml"))
        tmpmanDB.save()
        
        manDB = masterArtistNameDB("main", init=False)
        saveFile(idata=manDB.getRenames(), ifile="main.yaml")
        
    
    _, _ = clock("Last Ran")

In [None]:
#updateManDB(3)

# Top40 Data

In [None]:
%load_ext autoreload
%autoreload
from top40Data import top40Data
td = top40Data(minYear=1, maxYear=2021)
td.setChartUsage(rank=[0,1,2])
td.setDBRenames(manDB)
td.setFullChartData()
td.setArtistAlbumData()
td.saveArtistAlbumData()
td.saveFullChartData()

_, _ = clock("Last Run")

# Billboard Data

In [None]:
%load_ext autoreload
%autoreload
from billboardData import billboardData
bd = billboardData(minYear=1, maxYear=2021)
bd.setChartUsage(rank=[0,1,2,3])
bd.setFullChartData()
bd.setArtistAlbumData()

bd.saveArtistAlbumData()
bd.saveFullChartData()

_, _ = clock("Last Run")

# BillboardYE Data

In [3]:
%load_ext autoreload
%autoreload
from billboardYE import billboardYE
bYE = billboardYE(minYear=1, maxYear=2021)
bYE.setChartUsage(rank=[0,1,2])
bYE.setFullChartData()
bYE.setArtistAlbumData()

bYE.saveFullChartData()
bYE.saveArtistAlbumData()

_, _ = clock("Last Run")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
  Getting Chart For hot
  Using 5 Charts
  Using 5 Charts
  Getting Chart For adult
  Using 4 Charts
  Using 4 Charts
  Getting Chart For alternative
  Using 7 Charts
  Getting Chart For countryMusic
  Using 15 Charts
  Getting Chart For rock
  Using 17 Charts
  Getting Chart For rnb
  Using 37 Charts
  Using 76 Charts
  Using Charts (None): ['hot-100-songs', 'hot-100-artists-male', 'hot-100-artists', 'hot-100-artists-female', 'hot-100-artists-duo-group', 'adult-contemporary-artists', 'adult-pop-songs-artists', 'adult-pop-songs', 'adult-contemporary-songs', 'adult-alternative-songs', 'alternative-digital-song-sales-yearend', 'alternative-songs', 'alternative-songs-artists', 'adult-alternative-songs-artists', 'top-alternative-album-artists', 'top-alternative-albums', 'country-digital-songs', 'country-artists-duo-group', 'hot-country-songs-artists', 'top-country-albums', 'country-streaming-songs-artis

# Common Code

In [None]:
def copyMapData(mdbmap):
    return
    mdbmap.saveCopy()

def saveMapData(mdbmap, result_list):
    for item in result_list:
        primaryKey = item[0] 
        artistName = item[1]
        artistID   = item[2]
        mcs        = item[3]
        mdbmap.addArtistByKey(primaryKey, artistName=artistName, artistID=artistID)
        for db,mc in mcs.items():
            matchID    = mc.matchID
            matchScore = mc.matchScore
            if matchID is not None:
                mdbmap.addArtistDataByKey(primaryKey, db, matchID)

    mdbmap.save()
    
    

def extraKnownArtists(mdbmaps, chartType):
    if chartType == "Billboard":
        return extraAddsForBillboard(mdbmaps, chartType)
    if chartType == "BillboardYE":
        return extraAddsForBillboardYE(mdbmaps, chartType)
    if chartType == "Top40":
        return extraAddsForTop40(mdbmaps, chartType)
    

def extraAddsForBillboard(mdbmaps, chartType):
    if chartType == "Billboard":
        addAllMusic(mdbmaps, "Billboard", "fa00044058206494d4335094632fe4eb", "mn0000771069")   ### Boss
        ### Deeper  ,  Recipe Of A Hoe

        addAllMusic(mdbmaps, "Billboard", "a6fbf98a79a048834d50ce68d67ad442", "mn0000621112")   ### Mya
        addDiscogs(mdbmaps, "Billboard", "a6fbf98a79a048834d50ce68d67ad442", "28738")    ### Mya
        ### Case Of The Ex (Whatcha Gonna Do)

        addAllMusic(mdbmaps, "Billboard", "d6cac33cc3555e4e1f5ce0d2b3cae450", "mn0001481616")   ### [Connor Christian & Southern Gothic]
        addDiscogs(mdbmaps, "Billboard", "d6cac33cc3555e4e1f5ce0d2b3cae450", "4558282")    ### [Connor Christian & Southern Gothic]
        ### New Hometown

        addDiscogs(mdbmaps, "Billboard", "808c8b3a9909c7d7a3e5753792d1b995", "150433")    ### [LV]
        ### Gangsta's Paradise (From "Dangerous Minds")

        addAllMusic(mdbmaps, "Billboard", "89eab0113dd350e54cc04867d4cbcc04", "mn0000184098")   ### [John McDermott]
        addDiscogs(mdbmaps, "Billboard", "89eab0113dd350e54cc04867d4cbcc04", "1436273")    ### [John McDermott]
        ### The Irish Tenors: Home For Christmas  ,  The Irish Tenors: Live In Belfast  ,  The Irish Tenors: Ellis Island  ,  The Irish Tenors: Live In Belfast  ,  The Irish Tenors: Ellis Island

        addDiscogs(mdbmaps, "Billboard", "265d96f09b9e09bd2ea57aa249d40bd6", "4995184")    ### [New Life]
        ### Blessed By Association

        addDiscogs(mdbmaps, "Billboard", "b66b1bfa1a8b0725ba1f139b87ad3a4c", "92281")    ### [Loon]
        ### I Need A Girl (Part Two)  ,  I Don't Wanna Know

        addAllMusic(mdbmaps, "Billboard", "832b5faffd094a5ab3b22f67d40db4b7", "mn0001049094")   ### [RKM]
        ### Masterpiece: Nuestra Obra Maestra

        addDiscogs(mdbmaps, "Billboard", "8d31f7e1853f2b2a38cb37cdc92ac03d", "1813760")    ### [Lil Peanut]
        ### Lean Wit It, Rock Wit It

        addDiscogs(mdbmaps, "Billboard", "5903a04a9d6f7e1f2434dd13def9c352", "1702929")    ### [Nayer]
        ### Give Me Everything  ,  Hey Mama

        addAllMusic(mdbmaps, "Billboard", "ce6a59b7abc5d76a683bf2aa188adf74", "mn0000178007")   ### [Russell Moore]
        ### Timeless Hits From The Past: Bluegrassed

        addAllMusic(mdbmaps, "Billboard", "0b1e4a71bbd133e8050ba86a4492b04b", "mn0003744509")   ### [Ranna Royce]
        ### The Whoodlum Ball

        addAllMusic(mdbmaps, "Billboard", "8a34e0d5cd9db90fc3f598b5f615854d", "mn0003712915")   ### [Smith And Hay]
        ### The Whoodlum Ball
        
        addAllMusic(mdbmaps, "Billboard", "4ce403673af609f4b3edb411124ac331", "mn0003259234")   ### [OCD: Moosh & Twist]
        
        addAllMusic(mdbmaps, "Billboard", "5dab93d7de7fdc41568fc96e979119e3", "mn0003740957")   ### [Dave Rowland]
        ### The Door Is Always Open  ,  Tear Time  ,  Golden Tears

        addAllMusic(mdbmaps, "Billboard", "ba6cb719ca53bf4f57ecdcd6bd342bf5", "mn0000233423")   ### [Magoo]
        addDiscogs(mdbmaps, "Billboard", "ba6cb719ca53bf4f57ecdcd6bd342bf5", "177491")    ### [Magoo]
        ### Up Jumps Da Boogie  ,  Promiscuous  ,  Give It To Me  ,  The Way I Are  ,  Apologize  ,  Say Something

        addAllMusic(mdbmaps, "Billboard", "d7069e334141b398d6204597eb9c1401", "mn0002908858")   ### [Myles Kennedy And The Conspirators]
        ### Apocalyptic Love  ,  You're A Lie  ,  World On Fire  ,  Living The Dream

        addAllMusic(mdbmaps, "Billboard", "d80e9d8084288fd5741a6aa3d66f4791", "mn0000469634")   ### [Rev. F.C. Barnes]
        ### Rough Side Of The Mountain

        addDiscogs(mdbmaps, "Billboard", "716be2f511125c5a2f9b1f551625796a", "391684")    ### [Rev. J.Cleveland]
        ### Having Church

        addAllMusic(mdbmaps, "Billboard", "e491dd7ccd6078de34241c26285b3a67", "mn0001214814")   ### [V.I.P. Music]
        ### Stand!

        addAllMusic(mdbmaps, "Billboard", "efad7602aac6e53f2a5ff7245e82b2e3", "mn0000326045")   ### [Radical For Christ]
        addDiscogs(mdbmaps, "Billboard", "efad7602aac6e53f2a5ff7245e82b2e3", "598023")    ### [Radical For Christ]
        ### (Pages Of Life) Chapters I & II  ,  Purpose By DesignaddAllMusic(mdbmaps, "Billboard", "2560abe831fb977fd5f9fb8cf376f754", "mn0000387219")   ### [Norma Jean]

        addAllMusic(mdbmaps, "Billboard", "26f06a68470482961ce0948b25a130df", "mn0000774908")   ### [J. Moss]
        ### We Must Praise  ,  V2...

        addAllMusic(mdbmaps, "Billboard", "25a9d8b57fd0df9d2e6d6702976b5cdd", "mn0000593009")   ### [Youth For Christ]
        ### The Struggle Is Over

        addAllMusic(mdbmaps, "Billboard", "9d63b29d7ff8a3801b434b371008fef2", "mn0000454193")   ### [pureNRG]
        ### The Real Thing

        addAllMusic(mdbmaps, "Billboard", "0facb34dfb2037ab9c12a1edfa4d5207", "mn0000131293")   ### [Passion Worship Band]
        ### Passion: Awakening

        addAllMusic(mdbmaps, "Billboard", "26c7048587d4a152411db397edf8813b", "mn0003547921")   ### [Bishop T.D. Jakes]
        addDiscogs(mdbmaps, "Billboard", "26c7048587d4a152411db397edf8813b", "448274")    ### [Bishop T.D. Jakes]

        addAllMusic(mdbmaps, "Billboard", "f48a161c0063d474c2eab5dc2e5195cd", "mn0003442077")   ### [Thomas Miles aka Nephew Tommy]
        ### Presents: Prank Phone Calls Vol 5: Church Folks Gotta Laugh Too  ,  Presents: Prank Phone Calls: Church Folks Gotta Laugh Too Vol 2

        addAllMusic(mdbmaps, "Billboard", "efee2dea144675a9daea006446b60011", "mn0002817738")   ### [Brian Brushwood Justin Robert Young]
        ### Night Attack 2: Enjoy The Garden  ,  Night Attack (Live)

        addAllMusic(mdbmaps, "Billboard", "8a8f1ba4fa40b97993e05516b6cbd260", "mn0003232100")   ### [Deitrick Haddon's LXW (League Of Xtraordinary Worshippers)]
        ### Deitrick Haddon's LXW

        addAllMusic(mdbmaps, "Billboard", "7ca86f084b469e552a767a3b582bed15", "mn0001081792")   ### [Swoope]
        ### Sinema

        addAllMusic(mdbmaps, "Billboard", "84c774e5a41773f191b62bbc09774dab", "mn0003465054")   ### [Braiden Sunshine]
        ### Amazing Grace

        addAllMusic(mdbmaps, "Billboard", "62fa94e3fb88cdeb88e50b1f2ae64cfe", "mn0000563196")   ### [Myron Butler & Levi]
        ### On Purpose

        addAllMusic(mdbmaps, "Billboard", "514877cce2b057728c36e0c14a26f0bc", "mn0003586280")   ### [My Dad Wrote A Porno]
        ### My Dad Wrote A Christmas Porno

        addAllMusic(mdbmaps, "Billboard", "3a799a523bc6e3e6e953894a176b7d97", "mn0003874962")   ### [Fresh Start Worship]
        ### Mention

        addAllMusic(mdbmaps, "Billboard", "7f2a3d27565fbcf78578a36856d29954", "mn0000403784")   ### [LeCrae]
        ### Live & Let Live

        addAllMusic(mdbmaps, "Billboard", "d8f8476e6164eabe4fb4ab3ba3f3b7d9", "mn0002558731")   ### [Suzy Rock]
        ### Fuego

        addAllMusic(mdbmaps, "Billboard", "60021b6a1be2c390a233cce11db4b5b9", "mn0002867667")   ### [Them Idiots]
        ### Whirled Tour

        addAllMusic(mdbmaps, "Billboard", "99c44622cf1137d8ffff21a66c3d3028", "mn0000175286")   ### [D. Parton]
        ### Higher Medley

        addAllMusic(mdbmaps, "Billboard", "66694ccf7b6dfaed0f263e8f9569ef71", "mn0003288960")   ### [Dimitri McDowell]
        addDiscogs(mdbmaps, "Billboard", "66694ccf7b6dfaed0f263e8f9569ef71", "4123531")    ### [Dimitri McDowell]
        ### Sweet Victory

        addAllMusic(mdbmaps, "Billboard", "30f2dacc35fad2a476eb8db246f243f5", "mn0002447683")   ### [Mr. Talkbox]
        ### Feel It

        addAllMusic(mdbmaps, "Billboard", "7008c483e43dff7585e75c0588dce00f", "mn0000494388")   ### [GabeReal]
        ### Eye Of The Storm

        addAllMusic(mdbmaps, "Billboard", "4831b39973a020abf296356f3abfe702", "mn0001467056")   ### [Ryan Stevenson]
        ### Eye Of The Storm

        addAllMusic(mdbmaps, "Billboard", "fc61047e5a0c49d67bc6c28515f740f6", "mn0003520491")   ### [Lindy Conant]
        ### Every Nation

        addAllMusic(mdbmaps, "Billboard", "5cf648bdbb963bf9021e2ccfa0b5e811", "mn0003528162")   ### [The Circuit Riders]
        ### Every Nation

        addAllMusic(mdbmaps, "Billboard", "31c5c26ecfd994d64599a8c74fcb0e0c", "mn0003595666")   ### [GEI]
        ### Hang On

        addAllMusic(mdbmaps, "Billboard", "e7fdd33db429c91d51fbb11f05b6b708", "mn0002743423")   ### [Le'Andria]
        ### Grace

        addAllMusic(mdbmaps, "Billboard", "3f4e690b965dfa3c6690758607cabd39", "mn0003662860")   ### [Vincent Bohanan]
        ### We Win: The Kingdom Declaration

        addAllMusic(mdbmaps, "Billboard", "17ade4a4fbc717fa2101363ee7fd37c3", "mn0000178135")   ### [The Edwin Hawkins' Singers]
        ### Oh Happy Day



        mdbmaps[chartType].save()
        
        
def extraAddsForTop40(mdbmaps, chartType):
    if chartType == "Top40":
        addAllMusic(mdbmaps, "Top40", "00d5d59798eeffef591f272260b0bbcc", "mn0000124572")   ### [Chuck-N-Blood]
        ### My Dogs

        addAllMusic(mdbmaps, "Top40", "21da687c5162d957e0e8aaa39eff1e80", "mn0003750925")   ### [The Carters]
        ### Apes**t  ,  Everything Is Love

        addAllMusic(mdbmaps, "Top40", "c00c9a5d2e3053716de9b79898cb5691", "mn0001225951")   ### [Mustard]
        ### Perfect Ten  ,  Ballin'

        addAllMusic(mdbmaps, "Top40", "78f1feb9204bce3173d42798bc9bcb91", "mn0001808325")   ### [Kobe Bryant]
        ### Hold Me

        addAllMusic(mdbmaps, "Top40", "5edcd28a4505dc29e4edebd67052e614", "mn0000795894")   ### [Dolla]
        ### Cry For Me

        addAllMusic(mdbmaps, "Top40", "5a888aa5d5b0835533b2a73dd895b461", "mn0002528230")   ### [DCUP]
        ### We No Speak Americano

        addAllMusic(mdbmaps, "Top40", "7bcb752a835787affb80072d61387b22", "mn0001411030")   ### [Hollis]
        ### White Walls

        addAllMusic(mdbmaps, "Top40", "c0051ae12f6048a807213c44b434aa1d", "mn0003348925")   ### [Jennifer Lawrence]
        ### The Hanging Tree

        addAllMusic(mdbmaps, "Top40", "1f9b34897f6440e2484f1ac5e00953f7", "mn0003462822")   ### [Gnash]
        ### I Hate U, I Love U  ,  Lights Down Low

        addAllMusic(mdbmaps, "Top40", "9d337e01e83fc2b9d9fcbd6930bfb107", "mn0003715465")   ### [Casper Magico]
        ### Te Bote  ,  Otro Trago

        mdbmaps["Top40"].save()        

        
def extraAddsForBillboardYE(mdbmaps, chartType):
    if chartType == "BillboardYE":
        addAllMusic(mdbmaps, "BillboardYE", "4e47b741a3360c9b898fc24d54d61d63", "mn0003075926")   ### MØ
        addDiscogs(mdbmaps, "BillboardYE", "4e47b741a3360c9b898fc24d54d61d63", "1883733")    ### MØ
        ### Lean On  ,  Turn Down For What

        addDiscogs(mdbmaps, "BillboardYE", "98f86798fd598bdad1422b426c0dc38e", "164904")    ### Ali
        ### Grillz
        
        addAllMusic(mdbmaps, "BillboardYE", "6f7b92661d32c01bf942e69bef32a55f", "mn0003750925")   ### The Carters
        addDiscogs(mdbmaps, "BillboardYE", "6f7b92661d32c01bf942e69bef32a55f", "6543907")    ### The Carters
        ### 

        addAllMusic(mdbmaps, "BillboardYE", "de6c70bbf926eb8c9dc3b59c65d7086d", "mn0003848150")   ### Surfaces
        addDiscogs(mdbmaps, "BillboardYE", "de6c70bbf926eb8c9dc3b59c65d7086d", "6890536")    ### Surfaces
        ### 

        mdbmaps["BillboardYE"].save()
        
        
        

def matchItAll(mdbmaps, thresholds, useAlbums=True, mdbmc=None, db=None, toMatch=None):
    if toMatch is None:
        toMatch   = mdbmc.getDataToMatch(db, maxValues=10000, maxAlbums=50000)

    if len(toMatch) == 0:
        return

    num_processes = 3
    if useAlbums is True:
        func = matchDBArtistWithAlbums
    else:
        func = matchDBArtistWithoutAlbums
    pfunc = partial(func, **thresholds) # Giving some arguments for kwargs
    #argument_list = list(inputs.items()) # [random.randint(0, 100) for _ in range(num_jobs)]
    dbName = list(toMatch.keys())[0]
    copyMapData(mdbmaps[dbName])

    argument_list = toMatch[dbName]
    if len(argument_list) == 0:
        return
    print("Running imap multiprocessing for {0} artists ...".format(len(argument_list)))
    result_list = multiProc(func=pfunc, argument_list=argument_list,
                                           num_processes=num_processes)


    start, cmt = clock("Saving...")
    saveMapData(mdbmaps[dbName], result_list)
    elapsed(start, cmt)
    print("\nSleeping for 10 seconds...\n")
    #sleep(10)
    
    #mdbmc.matchMutualMaps()

    
def matchDBArtistWithoutAlbums(item, *args, **kwargs):
    primaryKey = item[0]
    artistData = item[1]
    artistName   = artistData["ArtistName"]
    artistID     = primaryKey
    artistAlbums = None
    mdbMatcher = matchDBArtist(maindb)
    mdbMatcher.setArtistInfo(artistName, artistID, artistAlbums)
    mdbMatcher.setThresholds(matchNumArtistName=kwargs['numArtistName'], matchArtistNameCutoff=kwargs['artistNameCutoff'],
                             matchArtistAlbumCutoff=kwargs['artistAlbumCutoff'], matchNumArtistAlbums=kwargs['numArtistAlbums'],
                             matchScore=kwargs['score']),
    mcs    = mdbMatcher.findPotentialArtistNameMatchesWithoutAlbums()
    retval = [primaryKey,artistName,artistID,mcs]
    return retval


def matchDBArtistWithAlbums(item, *args, **kwargs):    
    #time.sleep(0.0025)

    
    primaryKey = item[0]
    artistData = item[1]
    artistName   = artistData["ArtistName"]
    artistID     = primaryKey
    artistAlbums = artistData["ArtistAlbums"]
    
    mdbMatcher = matchDBArtist(maindb)
    mdbMatcher.setArtistInfo(artistName, artistID, artistAlbums)
    mdbMatcher.setThresholds(matchNumArtistName=kwargs['numArtistName'], matchArtistNameCutoff=kwargs['artistNameCutoff'], 
                             matchArtistAlbumCutoff=kwargs['artistAlbumCutoff'], matchNumArtistAlbums=kwargs['numArtistAlbums'],
                             matchScore=kwargs['score'])
    mcs    = mdbMatcher.findPotentialArtistAlbumMatches()
    retval = [primaryKey,artistName,artistID,mcs]
    return retval


def multiProc(func, argument_list, num_processes):
    pool = Pool(processes=num_processes)
    result_list_tqdm = []
    for result in tqdm(pool.imap(func=func, iterable=argument_list), total=len(argument_list)):
        result_list_tqdm.append(result)
    return result_list_tqdm

# Load Chart Analysis

In [None]:
#mcm    = matchChartMusic(mdb)
recreate = False
if recreate:
    maindb = mainDB(mdb=None, create=True, debug=True)
    maindb.setDBArtists(recreate=False)
    maindb = mainDB(mdb=None, create=False, debug=False)
    maindb.loadDBDataMap()
else:
    maindb = mainDB(mdb=None, create=False, debug=False)
    maindb.loadDBDataMap()    
_, _ = clock("Last Run")

# Analyze Data

In [None]:
## Basic stuff
%load_ext autoreload
%autoreload
from chartArtistAlbumData import chartArtistAlbumData


singleArtistAlbumData = {}
manyArtistAlbumData   = {}

chartType = "BillboardYE"
mType     = "Full"
cad = chartArtistAlbumData(chartType, ignoreMultiNames=False)
cad.createIndivArtistAlbumData()

In [None]:
if False:
    multiManDB = masterArtistNameDB("multi", init=True)
    multiManDB.forceReload(getFile("multi.yaml"))
    multiManDB.addRenames(multirenames)
    multiManDB.save()
    #multiManDB.checkForRecursives()

# Create Master Match Class

In [None]:
mdbmaps = {}
print("ChartType = {0}".format(chartType))
mdbmaps[chartType] = musicDBMap(chartType, init=False, copy=False)
mdbmc = masterDBMatchClass(maindb, mdbmaps)


def reMatch(mdbmaps=mdbmaps, mdbmc=mdbmc, mtype="Single"):
    if mtype == "Single":
        matchData = {mdbmaps[chartType].getPrimaryKey(artistName=artistName, artistID=None): {"ArtistName": artistName, "ArtistAlbums": artistAlbums} for artistName,artistAlbums in cad.getSingleArtistAlbumData().items()}
        mdbmc.setDBMatchData(chartType, matchData)
    elif mtype == "Many":
        matchData = {mdbmaps[chartType].getPrimaryKey(artistName=artistName, artistID=None): {"ArtistName": artistName, "ArtistAlbums": artistAlbums} for artistName,artistAlbums in cad.getManyArtistAlbumData().items()}
        mdbmc.setDBMatchData(chartType, matchData)
    elif mtype == "Full":
        matchData = {mdbmaps[chartType].getPrimaryKey(artistName=artistName, artistID=None): {"ArtistName": artistName, "ArtistAlbums": artistAlbums} for artistName,artistAlbums in cad.getFullArtistAlbumData().items()}
        mdbmc.setDBMatchData(chartType, matchData)
        
def showMatchedStatus(chartType, mdbmaps, mdbmc):
    print("Matched {0} / {1} Artists for {2} ChartType".format(mdbmaps[chartType].getSize(), len(mdbmc.matchData[chartType]), chartType))
    
reMatch(mtype=mType)
#reMatch()
#mbDF = mdbmc.getMasterDF(chartType)

## Match Using Names And Albums

In [None]:
def manualAppends(toMatch, chartType, minAlbums=0, add=True):
    for item in toMatch[chartType]:
        if add:
            albums = item[1]["ArtistAlbums"]
            if len(albums) < minAlbums:
                continue
            print("add{0}(mdbmaps, {1}, {2}, {3})   ### [{4}]".format("AllMusic", "\"{0}\"".format(chartType), "\"{0}\"".format(item[0]), "\"{0}\"".format(""), item[1]["ArtistName"]))
            print("add{0}(mdbmaps, {1}, {2}, {3})    ### [{4}]".format("Discogs", "\"{0}\"".format(chartType), "\"{0}\"".format(item[0]), "\"{0}\"".format(""), item[1]["ArtistName"]))
            print("### {0}\n".format("  ,  ".join(albums)))
        else:
            print("ignores.append(\"{0}\")".format(item[1]["ArtistName"]))
            if " & " in item[1]["ArtistName"]:
                continue
                print("ignores.append(\"{0}\")".format(item[1]["ArtistName"]))

    print("mdbmaps[{0}].save()".format("\"{0}\"".format(chartType)))

In [None]:
for minAlbums,minAlbumsData in getIterItems().items():
    print("="*10,minAlbums)
    maxAlbums = minAlbumsData["Max"]
    maxValues = minAlbumsData["Vals"]
    for i in range(10):
        toMatch   = mdbmc.getDataToMatch(chartType, maxValues=maxValues, maxAlbums=maxAlbums, minAlbums=minAlbums, ignores=ignoresList)
        if len(toMatch[chartType]) == 0:
            break
        matchItAll(mdbmaps, getThresholds(minAlbums), toMatch=toMatch)
reMatch(mtype=mType)
print("Done.\n\n")

In [None]:
showMatchedStatus(chartType, mdbmaps, mdbmc)

## Match Only Using Names

#### Show The Remaining Artists

In [None]:
def showRemainingMatches(chartType, mdbmc):
    print('='*100)
    print("="*10,"Artists That Were Not Tested (Low Albums)",'='*10)
    toMatch   = mdbmc.getDataToMatch(chartType, maxValues=10000, maxAlbums=100000, minAlbums=0, useKnown=True, ignores=ignoresList)
    print("="*10,"Previously Tested Artists Without A Match",'='*10)
    toMatch   = mdbmc.getDataToMatch(chartType, maxValues=10000, maxAlbums=100000, minAlbums=0, useKnown=False, dbMatches=0, ignores=ignoresList)
    print('='*100)
showRemainingMatches(chartType, mdbmc)

#### Try To Match Artists That Were Previously Tested Without A Match

In [None]:
for i in range(1):
    toMatch   = mdbmc.getDataToMatch(chartType, maxValues=10000, maxAlbums=100000, minAlbums=0, useKnown=False, dbMatches=0, ignores=ignoresList)
    if len(toMatch[chartType]) == 0:
        break
    matchItAll(mdbmaps, getThresholdsWithoutAlbums(1.0), toMatch=toMatch, useAlbums=False)
reMatch(mtype=mType)

In [None]:
showMatchedStatus(chartType, mdbmaps, mdbmc)

#### Try To Match Artists That Were Not Previously Tested (Low Albums)

In [None]:
toMatch   = mdbmc.getDataToMatch(chartType, maxValues=10000, maxAlbums=100000, minAlbums=0, useKnown=True, ignores=ignoresList)

In [None]:
matchItAll(mdbmaps, getThresholdsWithoutAlbums(1.0), toMatch=toMatch, useAlbums=False)
reMatch(mtype=mType)

In [None]:
showRemainingMatches(chartType, mdbmc)

#### Extra Artists Info

In [None]:
extraKnownArtists(mdbmaps, chartType)
showMatchedStatus(chartType, mdbmaps, mdbmc)
showRemainingMatches(chartType, mdbmc)

#### Unmatched Artists

In [None]:
toMatch   = mdbmc.getDataToMatch(chartType, maxValues=10000, maxAlbums=100000, minAlbums=0, useKnown=False, dbMatches=0, ignores=ignoresList)
len(toMatch[chartType])

In [None]:
manualAppends(toMatch, chartType, minAlbums=0, add=True)

In [None]:

ignoresList = ["chartIgnores.yaml"]
ignoresList = []

ignoresList = []
ignoresList.append("Artists Stand Up To Cancer")
ignoresList.append("Cast Of Rent")
saveFile(idata=ignoresList, ifile="chartIgnoreArtists.yaml")

unknownList = []
unknownList.append("Amil-lion")
unknownList.append("Yung Lito")
unknownList.append("Dennis Reed & Gap")
unknownList.append("Idols")
unknownList.append("X Factor Finalists")
saveFile(idata=unknownList, ifile="chartUnknownArtists.yaml")

ignoresList = ["chartIgnoreArtists.yaml", "chartUnknownArtists.yaml"]


In [None]:
cad.getAlbumsData("Stand By Me")

In [None]:
updateManDB(2)

In [None]:
addAllMusic(mdbmaps, "Top40", "00d5d59798eeffef591f272260b0bbcc", "mn0000124572")   ### [Chuck-N-Blood]
### My Dogs

addAllMusic(mdbmaps, "Top40", "21da687c5162d957e0e8aaa39eff1e80", "mn0003750925")   ### [The Carters]
### Apes**t  ,  Everything Is Love

addAllMusic(mdbmaps, "Top40", "c00c9a5d2e3053716de9b79898cb5691", "mn0001225951")   ### [Mustard]
### Perfect Ten  ,  Ballin'

addAllMusic(mdbmaps, "Top40", "78f1feb9204bce3173d42798bc9bcb91", "mn0001808325")   ### [Kobe Bryant]
### Hold Me

addAllMusic(mdbmaps, "Top40", "5edcd28a4505dc29e4edebd67052e614", "mn0000795894")   ### [Dolla]
### Cry For Me

addAllMusic(mdbmaps, "Top40", "5a888aa5d5b0835533b2a73dd895b461", "mn0002528230")   ### [DCUP]
### We No Speak Americano

addAllMusic(mdbmaps, "Top40", "7bcb752a835787affb80072d61387b22", "mn0001411030")   ### [Hollis]
### White Walls

addAllMusic(mdbmaps, "Top40", "c0051ae12f6048a807213c44b434aa1d", "mn0003348925")   ### [Jennifer Lawrence]
### The Hanging Tree

addAllMusic(mdbmaps, "Top40", "1f9b34897f6440e2484f1ac5e00953f7", "mn0003462822")   ### [Gnash]
### I Hate U, I Love U  ,  Lights Down Low

addAllMusic(mdbmaps, "Top40", "9d337e01e83fc2b9d9fcbd6930bfb107", "mn0003715465")   ### [Casper Magico]
### Te Bote  ,  Otro Trago

mdbmaps["Top40"].save()

In [None]:
addAllMusic(mdbmaps, "Billboard", "f48a161c0063d474c2eab5dc2e5195cd", "mn0003442077")   ### [Thomas Miles aka Nephew Tommy]
### Presents: Prank Phone Calls Vol 5: Church Folks Gotta Laugh Too  ,  Presents: Prank Phone Calls: Church Folks Gotta Laugh Too Vol 2

addAllMusic(mdbmaps, "Billboard", "efee2dea144675a9daea006446b60011", "mn0002817738")   ### [Brian Brushwood Justin Robert Young]
### Night Attack 2: Enjoy The Garden  ,  Night Attack (Live)

addAllMusic(mdbmaps, "Billboard", "8a8f1ba4fa40b97993e05516b6cbd260", "mn0003232100")   ### [Deitrick Haddon's LXW (League Of Xtraordinary Worshippers)]
### Deitrick Haddon's LXW

addAllMusic(mdbmaps, "Billboard", "7ca86f084b469e552a767a3b582bed15", "mn0001081792")   ### [Swoope]
### Sinema

addAllMusic(mdbmaps, "Billboard", "84c774e5a41773f191b62bbc09774dab", "mn0003465054")   ### [Braiden Sunshine]
### Amazing Grace

addAllMusic(mdbmaps, "Billboard", "62fa94e3fb88cdeb88e50b1f2ae64cfe", "mn0000563196")   ### [Myron Butler & Levi]
### On Purpose

addAllMusic(mdbmaps, "Billboard", "514877cce2b057728c36e0c14a26f0bc", "mn0003586280")   ### [My Dad Wrote A Porno]
### My Dad Wrote A Christmas Porno

addAllMusic(mdbmaps, "Billboard", "3a799a523bc6e3e6e953894a176b7d97", "mn0003874962")   ### [Fresh Start Worship]
### Mention

addAllMusic(mdbmaps, "Billboard", "7f2a3d27565fbcf78578a36856d29954", "mn0000403784")   ### [LeCrae]
### Live & Let Live

addAllMusic(mdbmaps, "Billboard", "d8f8476e6164eabe4fb4ab3ba3f3b7d9", "mn0002558731")   ### [Suzy Rock]
### Fuego

addAllMusic(mdbmaps, "Billboard", "60021b6a1be2c390a233cce11db4b5b9", "mn0002867667")   ### [Them Idiots]
### Whirled Tour

addAllMusic(mdbmaps, "Billboard", "99c44622cf1137d8ffff21a66c3d3028", "mn0000175286")   ### [D. Parton]
### Higher Medley

addAllMusic(mdbmaps, "Billboard", "66694ccf7b6dfaed0f263e8f9569ef71", "mn0003288960")   ### [Dimitri McDowell]
addDiscogs(mdbmaps, "Billboard", "66694ccf7b6dfaed0f263e8f9569ef71", "4123531")    ### [Dimitri McDowell]
### Sweet Victory

addAllMusic(mdbmaps, "Billboard", "30f2dacc35fad2a476eb8db246f243f5", "mn0002447683")   ### [Mr. Talkbox]
### Feel It

addAllMusic(mdbmaps, "Billboard", "7008c483e43dff7585e75c0588dce00f", "mn0000494388")   ### [GabeReal]
### Eye Of The Storm

addAllMusic(mdbmaps, "Billboard", "4831b39973a020abf296356f3abfe702", "mn0001467056")   ### [Ryan Stevenson]
### Eye Of The Storm

addAllMusic(mdbmaps, "Billboard", "fc61047e5a0c49d67bc6c28515f740f6", "mn0003520491")   ### [Lindy Conant]
### Every Nation

addAllMusic(mdbmaps, "Billboard", "5cf648bdbb963bf9021e2ccfa0b5e811", "mn0003528162")   ### [The Circuit Riders]
### Every Nation

addAllMusic(mdbmaps, "Billboard", "31c5c26ecfd994d64599a8c74fcb0e0c", "mn0003595666")   ### [GEI]
### Hang On

addAllMusic(mdbmaps, "Billboard", "e7fdd33db429c91d51fbb11f05b6b708", "mn0002743423")   ### [Le'Andria]
### Grace

addAllMusic(mdbmaps, "Billboard", "3f4e690b965dfa3c6690758607cabd39", "mn0003662860")   ### [Vincent Bohanan]
### We Win: The Kingdom Declaration

addAllMusic(mdbmaps, "Billboard", "17ade4a4fbc717fa2101363ee7fd37c3", "mn0000178135")   ### [The Edwin Hawkins' Singers]
### Oh Happy Day

mdbmaps["Billboard"].save()

In [None]:
addAllMusic(mdbmaps, "Billboard", "d6cac33cc3555e4e1f5ce0d2b3cae450", "mn0001481616")   ### [Connor Christian & Southern Gothic]
addDiscogs(mdbmaps, "Billboard", "d6cac33cc3555e4e1f5ce0d2b3cae450", "4558282")    ### [Connor Christian & Southern Gothic]
### New Hometown

addDiscogs(mdbmaps, "Billboard", "808c8b3a9909c7d7a3e5753792d1b995", "150433")    ### [LV]
### Gangsta's Paradise (From "Dangerous Minds")

addAllMusic(mdbmaps, "Billboard", "89eab0113dd350e54cc04867d4cbcc04", "mn0000184098")   ### [John McDermott]
addDiscogs(mdbmaps, "Billboard", "89eab0113dd350e54cc04867d4cbcc04", "1436273")    ### [John McDermott]
### The Irish Tenors: Home For Christmas  ,  The Irish Tenors: Live In Belfast  ,  The Irish Tenors: Ellis Island  ,  The Irish Tenors: Live In Belfast  ,  The Irish Tenors: Ellis Island

addDiscogs(mdbmaps, "Billboard", "265d96f09b9e09bd2ea57aa249d40bd6", "4995184")    ### [New Life]
### Blessed By Association

addDiscogs(mdbmaps, "Billboard", "b66b1bfa1a8b0725ba1f139b87ad3a4c", "92281")    ### [Loon]
### I Need A Girl (Part Two)  ,  I Don't Wanna Know

addAllMusic(mdbmaps, "Billboard", "832b5faffd094a5ab3b22f67d40db4b7", "mn0001049094")   ### [RKM]
### Masterpiece: Nuestra Obra Maestra

addDiscogs(mdbmaps, "Billboard", "8d31f7e1853f2b2a38cb37cdc92ac03d", "1813760")    ### [Lil Peanut]
### Lean Wit It, Rock Wit It

addDiscogs(mdbmaps, "Billboard", "5903a04a9d6f7e1f2434dd13def9c352", "1702929")    ### [Nayer]
### Give Me Everything  ,  Hey Mama

addAllMusic(mdbmaps, "Billboard", "ce6a59b7abc5d76a683bf2aa188adf74", "mn0000178007")   ### [Russell Moore]
### Timeless Hits From The Past: Bluegrassed

addAllMusic(mdbmaps, "Billboard", "0b1e4a71bbd133e8050ba86a4492b04b", "mn0003744509")   ### [Ranna Royce]
### The Whoodlum Ball

addAllMusic(mdbmaps, "Billboard", "8a34e0d5cd9db90fc3f598b5f615854d", "mn0003712915")   ### [Smith And Hay]
### The Whoodlum Ball

mdbmaps["Billboard"].save()

In [None]:
addAllMusic(mdbmaps, "Top40", "5a888aa5d5b0835533b2a73dd895b461", "mn0002528230")   ### [DCUP]
### We No Speak Americano

addAllMusic(mdbmaps, "Top40", "9d337e01e83fc2b9d9fcbd6930bfb107", "mn0003715465")   ### [Casper Magico]
### Te Bote  ,  Otro Trago

mdbmaps["Top40"].save()

In [None]:
1/0
addAllMusic(mdbmaps, "Top40", "b67c4a39d846b0e14fcda8299193701a", "mn0000417717")   ### [Raybion Bros.]
addDiscogs(mdbmaps, "Top40", "b67c4a39d846b0e14fcda8299193701a", "2188194")    ### [Raybion Bros.]
### Butterfly Kisses

addAllMusic(mdbmaps, "Top40", "00d5d59798eeffef591f272260b0bbcc", "mn0000124572")   ### [Chuck-N-Blood]
### My Dogs

addAllMusic(mdbmaps, "Top40", "866a64f5a5e684f61aa573af7f63bc57", "mn0000233423")   ### [Magoo]
### Up Jumps Da Boogie

addAllMusic(mdbmaps, "Top40", "78f1feb9204bce3173d42798bc9bcb91", "mn0001808325")   ### [Kobe Bryant]
### Hold Me

addAllMusic(mdbmaps, "Top40", "d92e471232aa6a0bc4a03a49f16b0997", "mn0000186401")   ### [Karen O.]
addDiscogs(mdbmaps, "Top40", "d92e471232aa6a0bc4a03a49f16b0997", "245778")    ### [Karen O.]
### Hello Tomorrow

addAllMusic(mdbmaps, "Top40", "5edcd28a4505dc29e4edebd67052e614", "mn0000795894")   ### [Dolla]

addAllMusic(mdbmaps, "Top40", "7bcb752a835787affb80072d61387b22", "mn0001411030")   ### [Hollis]
### White Walls

addAllMusic(mdbmaps, "Top40", "c0051ae12f6048a807213c44b434aa1d", "mn0003348925")   ### [Jennifer Lawrence]
addDiscogs(mdbmaps, "Top40", "c0051ae12f6048a807213c44b434aa1d", "4210193")    ### [Jennifer Lawrence]
### The Hanging Tree

addAllMusic(mdbmaps, "Top40", "083b67cc423fe5482eaf1412aebb65b3", "mn0003323773")   ### [Lookas]
addDiscogs(mdbmaps, "Top40", "083b67cc423fe5482eaf1412aebb65b3", "3619379")    ### [Lookas]
### GDFR

addAllMusic(mdbmaps, "Top40", "f37bd04269e23d0c4c10c435a4f1dd7f", "mn0002943184")   ### [Mnek]
### Never Forget You  ,  So Good

addAllMusic(mdbmaps, "Top40", "1f9b34897f6440e2484f1ac5e00953f7", "mn0003462822")   ### [Gnash]
### I Hate U, I Love U  ,  Lights Down Low

addAllMusic(mdbmaps, "Top40", "f731f61bae9f9216f33f0cf76d403fee", "mn0000351749")   ### [Royce Da 5'9]
addDiscogs(mdbmaps, "Top40", "f731f61bae9f9216f33f0cf76d403fee", "40792")    ### [Royce Da 5'9]
### Not Alike

addAllMusic(mdbmaps, "Top40", "296504ec4590a1522dcf6afb838fd6f4", "mn0003332851")   ### [London On Da Track]
### Numbers

mdbmaps["Top40"].save()

In [None]:
cad.getAlbumsData("Common Ground")

# Analyze Matches

In [None]:
def analyzeMatches(maindb, mdbmaps, chartType, maxVal=1.0, diffVal=0.1):
    dbRenames = {}
    toget = {}
    for primaryKey,artistName in mdbmaps[chartType].getArtists().items():
        primaryArtistName = artistName
        artistData = mdbmaps[chartType].getArtistDataByKey(primaryKey)
        for db,dbID in artistData.getDict().items():
            if db in ["DatPiff", "MetalStorm", "LastFM", "CDandLP", "AceBootlegs", "RockCorner", "MusicStack", "RateYourMusic"]:
                continue
            if dbID is not None:
                secondaryArtistName = maindb.getArtistDBNameFromID(db, dbID)
                if secondaryArtistName is None:
                    if toget.get(db) is None:
                        toget[db] = []
                    toget[db].append(primaryArtistName)
                    continue
                s = SequenceMatcher(None, primaryArtistName, secondaryArtistName)
                ratio = s.ratio()
                maxRatio = maxVal
                if ratio >= maxRatio:
                    continue

                if ratio >= maxRatio-diffVal:
                    if dbRenames.get(secondaryArtistName) is not None:
                        if dbRenames[secondaryArtistName] == primaryArtistName:
                            continue
                        else:
                            pass
                    dbRenames[secondaryArtistName] = primaryArtistName
                    print("[{0: <30} {1: <4} {2: >30}] \t --> ({3}) {4} / {5} ({6})".format(primaryArtistName,round(ratio,2),secondaryArtistName,chartType,primaryKey,dbID,db))


    print("Found {0} artists to get.".format(len(toget)))
    print("Found {0} artists to rename.".format(len(dbRenames)))
    return toget, dbRenames

In [None]:
toget, dbRenames = analyzeMatches(maindb, mdbmaps, chartType, maxVal=1.0, diffVal=1.0)

In [None]:
analyzeRenames(manDB, dbRenames)

In [None]:
updateManDB(2)

In [None]:
toget

In [None]:
removeDBRenames(mdbmaps, "Billboard", dbRenames)

In [None]:
def removeDBRenames(mdbmaps, chartType, dbRenames):
    for k,v in dbRenames.items():
        kKey = mdbmaps[chartType].getPrimaryKey(artistName=k, artistID=None)
        if kKey is not None:
            mdbmaps[chartType].removeArtistByKey(kKey)

        kKey = mdbmaps[chartType].getPrimaryKey(artistName=v, artistID=None)
        if kKey is not None:
            mdbmaps[chartType].removeArtistByKey(kKey)
    mdbmaps[chartType].save()
    
    
def analyzeRenames(manDB, dbRenames):
    redos = {}
    dels = []
    if len(dbRenames) == 0:
        print("Nothing to process.")
        return
    for oldername,bestname in dbRenames.items():
        if oldername in manDB.artistNameDB.keys() and bestname not in manDB.artistNameDB.keys():
            print(oldername,bestname)

            redos[bestname] = oldername
            dels.append(oldername)
            print('\t',oldername,'\t',bestname)

    print("# Renames: {0}".format(len(dbRenames)))
    print("# Dels: {0}".format(len(dels)))
    for oldername in dels:
        del dbRenames[oldername]

    print("# Redos: {0}".format(len(redos)))
    dbRenames.update(redos)
    print("# Renames: {0}".format(len(dbRenames)))
    saveFile(idata=dbRenames, ifile="relDBRenames2.yaml")




    def isAscii(ele):
        return len(ele) == len(ele.encode())

    dbRenames = getFile("relDBRenames2.yaml")
    redos = {}
    dels = []
    for k,v in dbRenames.items():    
        if isAscii(v) and not isAscii(k):
            if '’' in k or "“" in k:
                continue
            redos[v] = k
            dels.append(k)
            print('\t',k,'\t',v)
    #print(len(dbRenames))
    for k in dels:
        del dbRenames[k]

    #print(len(dbRenames))
    dbRenames.update(redos)
    #print(len(dbRenames))
    saveFile(idata=dbRenames, ifile="relDBRenames2.yaml")




    dbRenames = getFile("relDBRenames2.yaml")
    redos = {}
    dels = []
    for k,v in dbRenames.items():    
        if " the " in v and " The " in k:
            if k == v.replace(" the ", " The "):
                redos[v] = k
                dels.append(k)
                print('\t',k,'\t',v)

    #print(len(dbRenames))
    for k in dels:
        del dbRenames[k]

    #print(len(dbRenames))
    dbRenames.update(redos)
    #print(len(dbRenames))
    saveFile(idata=dbRenames, ifile="relDBRenames2.yaml")




    dbRenames = getFile("relDBRenames2.yaml")
    redos = {}
    dels = []
    for k,v in dbRenames.items():    
        if '’' in v or "“" in v:
            if not '’' in k and not "“" in v:
                redos[v] = k
                dels.append(k)
                print('\t',k,'\t',v)

    #print(len(dbRenames))
    for k in dels:
        del dbRenames[k]

    #print(len(dbRenames))
    dbRenames.update(redos)
    #print(len(dbRenames))


    saveFile(idata=dbRenames, ifile="relDBRenames2.yaml")








    def isAscii(ele):
        return len(ele) == len(ele.encode())

    dbRenames = getFile("relDBRenames2.yaml")
    redos = {}
    dels = []
    for k,v in dbRenames.items():    
        if isAscii(v) and not isAscii(k):
            if '’' in k or "“" in k:
                continue
            redos[v] = k
            dels.append(k)
            print('\t',k,'\t',v)
    #print(len(dbRenames))
    for k in dels:
        del dbRenames[k]

    #print(len(dbRenames))
    dbRenames.update(redos)
    #print(len(dbRenames))


    saveFile(idata=dbRenames, ifile="relDBRenames2.yaml")








    dbRenames = getFile("relDBRenames2.yaml")
    redos = {}
    dels = []
    for k,v in dbRenames.items():    
        if " the " in v and " The " in k:
            if k == v.replace(" the ", " The "):
                redos[v] = k
                dels.append(k)
                print('\t',k,'\t',v)

    #print(len(dbRenames))
    for k in dels:
        del dbRenames[k]

    #print(len(dbRenames))
    dbRenames.update(redos)
    #print(len(dbRenames))


    saveFile(idata=dbRenames, ifile="relDBRenames2.yaml")







    dbRenames = getFile("relDBRenames2.yaml")
    redos = {}
    dels = []
    for k,v in dbRenames.items():    
        if '’' in v or "“" in v:
            if not '’' in k and not "“" in v:
                redos[v] = k
                dels.append(k)
                print('\t',k,'\t',v)

    #print(len(dbRenames))
    for k in dels:
        del dbRenames[k]

    #print(len(dbRenames))
    dbRenames.update(redos)
    #print(len(dbRenames))


    saveFile(idata=dbRenames, ifile="relDBRenames2.yaml")
    #print(len(dbRenames))
    
    
    

In [None]:
saveData = []
for fullName in sorted(list(cad.manyArtists.keys())):
    appendName = " & ".join(["[{0}]".format(manDB.renamed(indivName)) for indivName in sorted(list(cad.manyArtists[fullName]))])
    saveData.append([fullName, {"Guess": appendName, "Truth": ''}]) # = list(cad.manyArtists[fullName].keys())
saveFile(idata=saveData, ifile="tmp.yaml")

In [None]:
fixedData = getFile("tmp.yaml")

In [None]:
multis = []
multirenames = {}
for item in fixedData:
    if len(item[1]["Truth"]) > 0:
        name = item[1]["Truth"]
        if len(name.split("] & [")) == 1:
            multis.append(item[1]["Truth"][1:-1])
        else:
            rename = name[1:-1].split("] & [")
            multirenames[item[0]] = " ::: ".join(rename)
        
print("Found {0} renames".format(len(multirenames)))
print("Found {0} multi-artists".format(len(multis)))

#### Save MultiArtists (if found)

In [None]:
knownFilename = "../multiartist/knownMultiArtists.yaml"
knownMultis = getFile(knownFilename)
print(len(multis))
print(len(knownMultis))
knownMultis += multis
print(len(knownMultis))
knownMultis = sorted(list(set(knownMultis)))
print(len(knownMultis))
saveFile(idata=knownMultis, ifile=knownFilename)

#### Save multi renames (if needed)

In [None]:
multiManDB = masterArtistNameDB("multi", init=False)
#multiManDB.forceReload(getFile("main.yaml"))
multiManDB.addRenames(multirenames)

In [None]:
multiManDB = masterArtistNameDB("multi", init=False)
multiManDB.addRenames(multirenames)
multiManDB.save()
saveFile(idata=multiManDB.getRenames(), ifile="multi.yaml")
multiManDB = masterArtistNameDB("multi", init=True)
multiManDB.forceReload(getFile("multi.yaml"))
multiManDB.checkForRecursives()
multiManDB = masterArtistNameDB("multi", init=False)

In [None]:
#### Edit main.yaml (if needed)

In [None]:
mdbmaps["Billboard"].getDF()

In [None]:
mdf = mdbmaps["BillboardYE"].getDF()
mdf.shape
#mdf[mdf["DBMatches"] == 0]

# Multi Match

In [None]:
cad.getTypeArtistAlbumData("Joe Rene")

In [None]:
toMatch["Billboard"]

In [None]:
manualAppends(cad, mType, toMatch, chartType, minAlbums=0, add=True)

In [None]:
mdbmc.getMasterDF("Billboard")

In [None]:
matchItAll(mdbmaps, getThresholdsWithoutAlbums(0.85), toMatch=toMatch, useAlbums=False)
reMatch()

In [None]:
#singleArtistAlbumData["Billboard"]['Ernest']
saveFile(idata=[x[1]["ArtistName"] for x in toMatch["Top40"]], ifile="toget.p")

# Find Near Artist Name Matches

In [None]:
toget, dbRenames = analyzeMatches(maindb, mdbmaps, chartType, maxVal=1.0, diffVal=1.0)

In [None]:
if len(dbRenames) > 0:
    analyzeRenames(manDB, dbRenames)
    # Test for overap with manDB

    dbRenames = getFile(ifile="relDBRenames2.yaml")
    redos = {}
    dels  = []
    for oldername,bestname in dbRenames.items():
        if oldername in manDB.artistNameDB.keys(): # and bestname not in manDB.artistNameDB.keys():
            #print("Older=[{0}] \t Best=[{1}]".format(oldername,bestname))

            redos[bestname] = oldername
            dels.append(oldername)
            #print('PROBLEM ({0} in master DB): \t{1}  -->  {2}'.format(oldername, oldername, bestname))
            try:
                matchOlderData = mdbmaps[chartType].getArtistDataByName(oldername).getDict()
            except:
                matchOlderData = {}

            try:
                matchBestData  = mdbmaps[chartType].getArtistDataByName(bestname).getDict()
            except:
                matchBestData = {}

            #print("Older: {0}".format(matchOlderData))
            #print("Best:  {0}".format(matchBestData))
            #print("\n")
        else:
            print("{0}: {1}".format(oldername, bestname))

    print("# Renames: {0}".format(len(dbRenames)))
    print("# Dels: {0}".format(len(dels)))

In [None]:
manDB = masterArtistNameDB("main", init=True)
manDB.forceReload(getFile("main.yaml"))
manDB.addRenames(getFile("relDBRenames2.yaml"))
manDB.checkForRecursives()

In [None]:
manDB = masterArtistNameDB("main", init=True)
manDB.forceReload(getFile("main.yaml"))
manDB.addRenames(getFile("relDBRenames2.yaml"))
manDB.checkForRecursives()
manDB.save()
manDB = masterArtistNameDB("main", init=False)
saveFile(idata=manDB.dbRenames, ifile="main.yaml")
removeDBRenames(mdbmaps, chartType, dbRenames)

In [None]:
removeDBRenames(mdbmaps, chartType, dbRenames)

In [None]:
vals="""
[Sander Van Doom                0.9                Sander Van Doorn] 	 --> (Top40) 27813b44b4282de50171e13adb4ebcf9 / 183267 (Discogs)
"""

lines=vals.split("\n")
lines2=[x.split("({0}) ".format(chartType))[1] for x in lines if len(x) > 0]
names2=[x.split("({0}) ".format(chartType))[0] for x in lines if len(x) > 0]
lines3={x.split(" / ")[0]: x[x.find("(")+1:-1] for x in lines2}
names3=[x.split("    ")[0].strip()[1:] for x in names2]
if len(names3) != len(lines3):
    raise ValueError("Err")

for i,(k,v) in enumerate(lines3.items()):
    artistName = names3[i]
    #print("mdbmaps[chartType].addArtistDataByID(\'{0}\', \'{1}\', \'{2}\')".format(k,v,))
    print("### ----> {0}".format(artistName))
    print("mdbmaps[chartType].addArtistDataByID(\'{0}\', \'{1}\', None)".format(k,v))    
    albums = singleArtistAlbumData[chartType][artistName]
    print("### {0}".format("  ,  ".join(albums)))
    print("### ignores.append(\"{0}\")".format(artistName))
    print("\n")
    
    
#mdbmaps[chartType].addArtistDataByID("e309c2e3fc905eae304b71e10e82eb99", "AllMusic", '0000803469')
print("")
print("mdbmaps[chartType].save()")
print("saveFile(idata=list(set(ignores)), ifile=\"chartIgnores.yaml\")")

In [None]:
### ----> Sander Van Doom
mdbmaps[chartType].addArtistDataByID('27813b44b4282de50171e13adb4ebcf9', 'Discogs', None)
### Grasshopper
### ignores.append("Sander Van Doom")



mdbmaps[chartType].save()
saveFile(idata=list(set(ignores)), ifile="chartIgnores.yaml")

In [None]:
toget

****
****
****
****

# Multis

In [None]:
ignores.append('Daryl Hall & John Oates')

In [None]:
from multiArtist import multiartist
mularts  = multiartist(cutoff=0.9, discdata=None, exact=False)
knownMultiArtists = getFile("../multiartist/knownMultiArtists.yaml")
print(len(knownMultiArtists))
print(len(mdbmaps[chartType].getArtists()))
for idx,artistName in mdbmaps[chartType].getArtists().items():
    N = mularts.getArtistNames(artistName)
    if len(N) > 1:
        result = {subName: mdbmaps[chartType].isKnownByName(subName) for subName in N}
        if not any(result.values()):
            knownMultiArtists.append(artistName)
            #print(result)
        #print("ignores.append(\'{0}\')".format(artistName))

print(len(knownMultiArtists))
knownMultiArtists = list(set(knownMultiArtists))
print(len(knownMultiArtists))
        
#saveFile(idata = sorted(knownMultiArtists), ifile="../multiartist/knownMultiArtists.yaml")

In [None]:
ignores = getFile("chartIgnores.yaml")
dels = []
for idx,artistName in mdbmaps[chartType].getArtists().items():
    if artistName in ignores:
        dels.append([idx,artistName])
        
print(len(dels))
for idx,artistName in dels:
    mdbmaps[chartType].removeArtistByID(idx)
    
mdbmaps[chartType].save()

In [None]:
len(dels)

# Functions

In [None]:
class matchthresholds:
    def __init__(self):
        thresholds = {}
        thresholds[1000] = {'numArtistName': 1, 'artistNameCutoff': 0.95, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': int(200/8), 'score': 10.0}
        thresholds[500]  = {'numArtistName': 1, 'artistNameCutoff': 0.95, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': int(200/8), 'score': 5.0}
        thresholds[200]  = {'numArtistName': 1, 'artistNameCutoff': 0.95, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': int(200/8), 'score': 2.5}
        thresholds[100]  = {'numArtistName': 1, 'artistNameCutoff': 0.95, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': int(100/8), 'score': 1.5}
        thresholds[50]   = {'numArtistName': 2, 'artistNameCutoff': 0.95, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': int(50/8), 'score': 1.5}
        thresholds[20]   = {'numArtistName': 2, 'artistNameCutoff': 0.95, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': 3, 'score': 1.5}
        thresholds[10]   = {'numArtistName': 5, 'artistNameCutoff': 0.90, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': 2, 'score': 1.5}
        thresholds[5]    = {'numArtistName': 5, 'artistNameCutoff': 0.90, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': 2, 'score': 1.5}
        thresholds[3]    = {'numArtistName': 5, 'artistNameCutoff': 0.90, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': 2, 'score': 1.5}
        thresholds[2]    = {'numArtistName': 5, 'artistNameCutoff': 0.90, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': 1, 'score': 2.0}
        thresholds[1]    = {'numArtistName': 5, 'artistNameCutoff': 0.90, 'artistAlbumCutoff': 0.9, 'numArtistAlbums': 1, 'score': 0.9}
        self.thresholds = thresholds

        iterItems = {20: {"Max": 50, "Vals": 100}}
        iterItems.update({10: {"Max": 20, "Vals": 200}})
        iterItems.update({50: {"Max": 10000, "Vals": 50}})
        iterItems.update({5: {"Max": 10, "Vals": 250}})
        iterItems.update({3: {"Max": 5, "Vals": 500}})
        iterItems.update({2: {"Max": 3, "Vals": 500}})
        iterItems.update({1: {"Max": 2, "Vals": 500}})
        self.iterItems = iterItems

        self.thresholdsNoAlbums = {'numArtistName': 5, 'artistNameCutoff': 0.9, 'artistAlbumCutoff': None, 'numArtistAlbums': None, 'score': None}
    
    def getThresholdsWithAlbums(self, minAlbums):
        if self.thresholds.get(minAlbums) is not None:
            return self.thresholds[minAlbums]
        
    def getThresholdsWithoutAlbums(self, cutoff):
        self.thresholdsNoAlbums["artistNameCutoff"] = cutoff
        return self.thresholdsNoAlbums
    
    def getIterItems(self):
        return self.iterItems
        

In [None]:
from functools import partial
from matchDBArtist import matchDBArtist

class multimatch:
    def __init__(self, maindb, mdbmc, debug=False):
        self.debug  = debug
        self.maindb = maindb
        self.mdbmc  = mdbmc
        
        self.mt = matchthresholds()
        
        self.toMatch = None
        self.dbName  = None
        self.maxMatches = None
        
        self.matchFunc = {True: self.matchDBArtistWithAlbums, False: self.matchDBArtistWithoutAlbums}
            
        
    def setMDBMap(self, dbName, mdbmap):
        self.mdbmap = mdbmap
        self.dbName = dbName
    
    
    def setDataToMatch(self, toMatch):
        if toMatch is not None:
            self.toMatch = toMatch
            
            
    def setMaxMatches(self, maxMatches):
        self.maxMatches = maxMatches
        
        
    ############################################################################################################
    ## Set Data To Be Matched
    ############################################################################################################
    def setData(self, useAlbums=True, minAlbums=None, maxValues=None, cutoff=None):
        self.useAlbums = useAlbums
        print("HI")
        if useAlbums is True:
            if minAlbums is not None:
                self.thresholds = self.mt.getThresholdsWithAlbums(minAlbums)
                iterItems = self.mt.getIterItems()[minAlbums]
                maxAlbums = iterItems["Max"]
                if maxValues is None:
                    maxValues = thresholds["Vals"]

                self.toMatch   = mdbmc.getDataToMatch(self.dbName, maxValues=maxValues, maxAlbums=maxAlbums, minAlbums=minAlbums, ignores=[])
                print("Found {0} Artists To Match With Albums and Using Thresholds: {1}".format(len(self.toMatch[self.dbName]), self.thresholds))
            else:
                raise ValueError("Must supply a minAlbums value")
        else:
            self.thresholds = self.mt.getThresholdsWithoutAlbums(cutoff)
            if maxValues is None:
                maxValues = 100000
            self.toMatch   = mdbmc.getDataToMatch(self.dbName, maxValues=maxValues, maxAlbums=100000, minAlbums=0, ignores=[])
            print("Found {0} Artists To Match Without Albums and Without Using Thresholds".format(len(self.toMatch[self.dbName])))
            
    
    


    def matchItAll(self):
        num_processes = 3
        func       = self.matchFunc[self.useAlbums]
        thresholds = self.thresholds
        pfunc = partial(func, **thresholds) # Giving some arguments for kwargs
        argument_list = self.toMatch[self.dbName]
        print("Args: ",argument_list)
        
        if len(argument_list) == 0:
            return
        print("Using {0} Matching Function".format(pfunc))
        print("Running imap multiprocessing for {0} artists ...".format(len(argument_list)))
        result_list = self.multiProc(func=pfunc, argument_list=argument_list,
                                               num_processes=num_processes)
        return result_list
        

    def matchDBArtistWithoutAlbums(self, item, *args, **kwargs):
        return 1

        print("Item ==>",item)
        1/0
        primaryKey   = item[0]
        artistData   = item[1]
        artistName   = artistData["ArtistName"]
        artistID     = primaryKey
        artistAlbums = None
        mdbMatcher = matchDBArtist(self.maindb)
        mdbMatcher.setArtistInfo(artistName, artistID, artistAlbums)
        mdbMatcher.setThresholds(matchNumArtistName=kwargs['numArtistName'], matchArtistNameCutoff=kwargs['artistNameCutoff'],
                                 matchArtistAlbumCutoff=kwargs['artistAlbumCutoff'], matchNumArtistAlbums=kwargs['numArtistAlbums'],
                                 matchScore=kwargs['score']),
        mcs    = mdbMatcher.findPotentialArtistNameMatchesWithoutAlbums()
        retval = [primaryKey,artistName,artistID,mcs]
        return retval


    def matchDBArtistWithAlbums(self, item, *args, **kwargs):    
        #time.sleep(0.0025)

        return 1
        print("Item ==>",item)
        1/0
        

        primaryKey   = item[0]
        artistData   = item[1]
        artistName   = artistData["ArtistName"]
        artistID     = primaryKey
        artistAlbums = artistData["ArtistAlbums"]

        mdbMatcher = matchDBArtist(self.maindb)
        mdbMatcher.setArtistInfo(artistName, artistID, artistAlbums)
        mdbMatcher.setThresholds(matchNumArtistName=kwargs['numArtistName'], matchArtistNameCutoff=kwargs['artistNameCutoff'], 
                                 matchArtistAlbumCutoff=kwargs['artistAlbumCutoff'], matchNumArtistAlbums=kwargs['numArtistAlbums'],
                                 matchScore=kwargs['score'])
        mcs    = mdbMatcher.findPotentialArtistAlbumMatches()
        retval = [primaryKey,artistName,artistID,mcs]
        return retval


    def multiProc(self, func, argument_list, num_processes):
        pool = Pool(processes=num_processes)
        result_list_tqdm = []
        for result in tqdm(pool.imap(func=func, iterable=argument_list), total=len(argument_list)):
            result_list_tqdm.append(result)
        return result_list_tqdm


In [None]:
mm = multimatch(maindb, mdbmc)
mm.setMDBMap("Billboard", mdbmaps["Billboard"])

In [None]:
mm.setData(useAlbums=True, minAlbums=10, maxValues=1)

In [None]:
mm.matchItAll()

In [None]:
ignoresList = ["chartIgnores.yaml"]
for minAlbums,minAlbumsData in getIterItems().items():
    print("="*10,minAlbums)
    maxAlbums = minAlbumsData["Max"]
    maxValues = minAlbumsData["Vals"]
    for i in range(10):
        toMatch   = mdbmc.getDataToMatch(chartType, maxValues=maxValues, maxAlbums=maxAlbums, minAlbums=minAlbums, ignores=ignoresList)
        if len(toMatch[chartType]) == 0:
            break
        matchItAll(mdbmaps, getThresholds(minAlbums), toMatch=toMatch)
        
reMatch()
print("Done.\n\n")

In [None]:
from billboardCharts import billboardCharts
from fileUtils import getBasename, getDirname, getBaseFilename
from timeUtils import clock, elapsed
from webUtils import getHTML, getWebData
from timeUtils import getDateTime, isDate
from listUtils import getFlatList
from ioUtils import saveJoblib, loadJoblib, saveFile, getFile
from os.path import join
from searchUtils import findExt
import urllib
from time import sleep
from collections import Counter
from artistIgnores import getArtistIgnores

from billboardCharts import billboardCharts
from top40Charts import top40Charts

class billboardFiles:
    def __init__(self, basedir):
        self.basedir = basedir
        self.source  = "Billoard"

    def findFiles(self):
        savedir = join(self.basedir, "data", "billboard", "results")
        self.files   = findExt(savedir, ext='.p')
        print("Found {0} {1} files.".format(len(self.files), self.source))
        self.files = {getBaseFilename(x).replace("/", " "): x for x in self.files}
        return self.files
    
    
        for ifile in self.files:
            fdata = getFile(ifile)
            for chartName, cnameResults in fdata.items():
                if chartName not in self.charts:
                    continue
    
    
class billboardFullChart:
    def __init__(self, basedir, charts):
        self.basedir = basedir
        self.source  = "Billboard"
        self.charts  = charts
        self.tfiles  = billboardFiles(self.basedir)
        self.files   = self.tfiles.findFiles()
        
        self.fullChartData = {}
        
    def filterFiles(self):
        files = {}
        for chartName, ifile in self.files.items():
            if chartName in self.charts:
                files[chartName] = ifile
        print("Keeping {0}/{1} Charts".format(len(files), len(self.files)))


class top40files:
    def __init__(self, basedir):
        self.basedir = basedir
        self.source  = "Top40"

    def findFiles(self):
        savedir = join(self.basedir, "data", "top40")
        self.files   = findExt(savedir, ext='.p')         
        print("Found {0} {1} files.".format(len(self.files), self.source))
        self.files = {getBaseFilename(x).replace("/", " "): x for x in self.files}
        return self.files
    
    
class top40FullChart:
    def __init__(self, basedir, charts):
        self.basedir = basedir
        self.source  = "Top40"
        self.charts  = charts
        self.tfiles  = top40files(self.basedir)
        self.files   = self.tfiles.findFiles()
        
        self.fullChartData = {}
        
    def filterFiles(self):
        files = {}
        for chartName, ifile in self.files.items():
            if chartName in self.charts:
                files[chartName] = ifile
        print("Keeping {0}/{1} Charts".format(len(files), len(self.files)))


class top40files:
    def __init__(self, basedir):
        self.basedir = basedir
        self.source  = "Top40"

    def findFiles(self):
        savedir = join(self.basedir, "data", "top40")
        self.files   = findExt(savedir, ext='.p')         
        print("Found {0} {1} files.".format(len(self.files), self.source))
        self.files = {getBaseFilename(x).replace("/", " "): x for x in self.files}
        return self.files
    
    
class top40FullChart:
    def __init__(self, basedir, charts):
        self.basedir = basedir
        self.source  = "Top40"
        self.charts  = charts
        self.tfiles  = top40files(self.basedir)
        self.files   = self.tfiles.findFiles()        
        self.fullChartData = {}
        
    def filterFiles(self):
        files = {}
        for chartName, ifile in self.files.items():
            if chartName in self.charts:
                files[chartName] = ifile
        print("Keeping {0}/{1} Charts".format(len(files), len(self.files)))
        
    
        
    def setFullChartData(self):
        fullChartData = {}
        renameStats   = Counter()
        
        self.findFiles()
        if len(self.files) == 0:
            raise ValueError("There are no files. Something is wrong...")
        self.files = {getBaseFilename(x).replace("/", " "): x for x in self.files}
        
        for chartName, ifile in self.files.items():
            if chartName not in self.charts:
                continue
            print("==> {0: <40}".format(chartName), end="\t")
            #t40chart = top40chart(chartID, chartName, chartURL)
            chartResults = getFile(ifile)

            for date, values in chartResults.items():
                if self.minYear is not None:
                    if getDateTime(date).year < int(self.minYear):
                        continue
                if self.maxYear is not None:
                    if getDateTime(date).year > int(self.maxYear):
                        continue

                        
                for i,item in enumerate(values):
                    artist = item["Artist"]
                    renamedArtist = artist
                    for testArtist in self.artistRenames.keys():
                        if artist.find(testArtist) != -1:
                            tmp = renamedArtist
                            renamedArtist = renamedArtist.replace(testArtist, self.artistRenames.get(testArtist))
                            #print("{0}  <---- From ---- {1}".format(renamedArtist, tmp))
                            renameStats[renamedArtist] += 1
                            artist = renamedArtist
                    
                    if self.dbRenames.get(artist) is not None:
                        renamedArtist = self.dbRenames[artist]
                        renameStats[renamedArtist] += 1
                        artist = renamedArtist
                    

                    artist = artist.replace("\r", "")                    
                    
                    ignoreStatus = getArtistIgnores(artist)
                    if ignoreStatus is False:
                        continue
                    
                    album  = item["Album"]
                    if album in ["Soundtrack"]:
                        continue

                    if fullChartData.get(artist) is None:
                        fullChartData[artist] = {"Songs": {}, "Albums": {}}
                    if chartName.endswith("Albums"):
                        key = "Albums"
                    else:
                        key = "Songs"
                    if fullChartData[artist][key].get(album) is None:
                        fullChartData[artist][key][album] = {}
                    if fullChartData[artist][key][album].get(chartName) is None:
                        fullChartData[artist][key][album][chartName] = {}
                    fullChartData[artist][key][album][chartName][date] = i
            print(len(fullChartData))
        self.fullChartData = fullChartData
        
        if self.artistRenames is not None:
            print("Renamed {0} artists".format(len(renameStats)))
            print("Most Common Artists:")
            for item in renameStats.most_common(5):
                print(item)


class chartData:
    def __init__(self, source, minYear=None, maxYear=None, country=None, debug=False):
        debug=False    
        self.source = source
        
        self.basedir  = "/Volumes/Piggy/Charts/"
        self.basename = source
        

        try:
            self.cts    = {"Billboard": billboardCharts(), "Top40": top40Charts()}[source]
            self.cFiles = {"Billboard": billboard40files(self.basedir), "Top40": top40files(self.basedir)}[source]
            self.files  = self.cFiles.findFiles()
        except:
            raise ValueError("Could not create charts data for {0}".format(source))

                        
        self.charts = []
            
        self.minYear   = minYear
        self.maxYear   = maxYear
        
        self.artistRenames   = {}
        self.dbRenames       = {}

        self.chartData       = {}
        self.fullChartData   = {}
        self.artistAlbumData = {}
        
        
    #####################################################################################################################################
    ## Artist Data
    #####################################################################################################################################
    def getArtists(self):
        return list(self.artistAlbumData.keys())
        
        
    
    #####################################################################################################################################
    ## Full Chart Data
    #####################################################################################################################################
    def getFullChartDataFilename(self):
        ifile="current{0}FullChartArtistAlbumData.p".format(self.basename)
        return ifile

    def getFullChartData(self):
        return getFile(self.getFullChartDataFilename())
        
    def saveFullChartData(self):
        print("Saving {0} Full Artist Data".format(len(self.fullChartData)))
        saveFile(idata=self.fullChartData, ifile=self.getFullChartDataFilename(), debug=True)        
        
        
    
    #####################################################################################################################################
    ## Artist Album Data
    #####################################################################################################################################
    def getArtistAlbumDataFilename(self):
        ifile="current{0}ArtistAlbumData.p".format(self.basename)
        return ifile
    
    def setArtistAlbumData(self):
        self.artistAlbumData = {artist: list(artistData["Songs"].keys()) + list(artistData["Albums"].keys()) for artist,artistData in self.fullChartData.items()}
        
    
    def getArtistAlbumData(self):
        return getFile(self.getArtistAlbumDataFilename())
        
    def saveArtistAlbumData(self):
        print("Saving {0} Artist Album Data to {1}".format(len(self.artistAlbumData), self.getArtistAlbumDataFilename()))
        saveFile(idata=self.artistAlbumData, ifile=self.getArtistAlbumDataFilename(), debug=True)  
        
        
    
    #####################################################################################################################################
    ## Rename Data
    #####################################################################################################################################        
    def setRenames(self, artistRenames):
        self.artistRenames = artistRenames
        
    def setDBRenames(self, dbRenames):
        self.dbRenames = dbRenames
        
        
    
    #####################################################################################################################################
    ## Find Files
    #####################################################################################################################################
    def setChartUsage(self, name=None, rank=None):
        if rank is not None:
            if isinstance(rank, list):
                for item in rank:
                    self.charts += self.cts.getChartsByRank(item)
            elif isinstance(rank, int):
                self.charts += self.cts.getChartsByRank(rank)
        elif name is not None:
            self.charts += self.cts.getCharts(name)
        else:
            self.charts = self.cts.getCharts(None)
        if name is None:
            name = "None"
        print("  Using Charts ({0}): {1}".format(name, self.charts))
        
        
        
                
    #####################################################################################################################################
    ## Set Artist Album Data
    #####################################################################################################################################
    def setArtistAlbumData(self):
        self.artistAlbumData = {artist: list(artistData["Songs"].keys()) + list(artistData["Albums"].keys()) for artist,artistData in self.fullChartData.items()}
        print("There are {0} unique artist entries".format(len(self.artistAlbumData)))
        

    #####################################################################################################################################
    ## Set Full Chart Data
    #####################################################################################################################################
    def setFullChartData(self):
        {"Billboard": self.setFullChartDataBillboard(), "Top40": self.setFullChartDataTop40()}[self.source]
    
    def setFullChartDataBillboard(self):        
        renameStats  = Counter()
        chartCounter = Counter()
        
        if len(self.files) == 0:
            raise ValueError("There are no files. Something is wrong...")
        
        for ifile in self.files:
            fdata = getFile(ifile)
            for chartName, cnameResults in fdata.items():
                if chartName not in self.charts:
                    continue
                
                for date, dResults in cnameResults.items():
                    if self.minYear is not None:
                        if getDateTime(date).year < int(self.minYear):
                            continue
                    if self.maxYear is not None:
                        if getDateTime(date).year > int(self.maxYear):
                            continue
                    stryear = getDateTime(date).year

                    artist = dResults["Artist"]

                    renamedArtist = artist
                    for testArtist in self.artistRenames.keys():
                        if artist.find(testArtist) != -1:
                            tmp = renamedArtist
                            renamedArtist = renamedArtist.replace(testArtist, self.artistRenames.get(testArtist))
                            #print("{0}  <---- From ---- {1}".format(renamedArtist, tmp))
                            renameStats[renamedArtist] += 1
                            artist = renamedArtist 
                    
                    if self.dbRenames.get(artist) is not None:
                        renamedArtist = self.dbRenames[artist]
                        renameStats[renamedArtist] += 1
                        artist = renamedArtist  

                    ignoreStatus = getArtistIgnores(artist)
                    if ignoreStatus is False:
                        continue


                    chartCounter[chartName] += 1

                    album  = dResults["Name"]

                    if self.chartData.get(artist) is None:
                        self.chartData[artist] = Counter()
                    self.chartData[artist][album] += 1
                    
                    if self.fullChartData.get(artist) is None:
                        self.fullChartData[artist] = {"Songs": {}, "Albums": {}}
                    if chartName.endswith("Albums"):
                        key = "Albums"
                    else:
                        key = "Songs"
                    if self.fullChartData[artist][key].get(album) is None:
                        self.fullChartData[artist][key][album] = {}
                    if self.fullChartData[artist][key][album].get(chartName) is None:
                        self.fullChartData[artist][key][album][chartName] = {}
                    self.fullChartData[artist][key][album][chartName][date] = 0
                #print("{0: <40}{1}".format("{0}-{1}".format(chartName,stryear),len(self.fullChartData)))
                
        
    def setFullChartDataTop40(self):
        fullChartData = {}
        renameStats   = Counter()
        
        #self.findFiles()
        if len(self.files) == 0:
            raise ValueError("There are no files. Something is wrong...")
        self.files = {getBaseFilename(x).replace("/", " "): x for x in self.files}
        
        for chartName, ifile in self.files.items():
            if chartName not in self.charts:
                continue
            print("==> {0: <40}".format(chartName), end="\t")
            #t40chart = top40chart(chartID, chartName, chartURL)
            chartResults = getFile(ifile)

            for date, values in chartResults.items():
                if self.minYear is not None:
                    if getDateTime(date).year < int(self.minYear):
                        continue
                if self.maxYear is not None:
                    if getDateTime(date).year > int(self.maxYear):
                        continue

                        
                for i,item in enumerate(values):
                    artist = item["Artist"]
                    renamedArtist = artist
                    for testArtist in self.artistRenames.keys():
                        if artist.find(testArtist) != -1:
                            tmp = renamedArtist
                            renamedArtist = renamedArtist.replace(testArtist, self.artistRenames.get(testArtist))
                            #print("{0}  <---- From ---- {1}".format(renamedArtist, tmp))
                            renameStats[renamedArtist] += 1
                            artist = renamedArtist
                    
                    if self.dbRenames.get(artist) is not None:
                        renamedArtist = self.dbRenames[artist]
                        renameStats[renamedArtist] += 1
                        artist = renamedArtist
                    

                    artist = artist.replace("\r", "")                    
                    
                    ignoreStatus = getArtistIgnores(artist)
                    if ignoreStatus is False:
                        continue
                    
                    album  = item["Album"]
                    if album in ["Soundtrack"]:
                        continue

                    if fullChartData.get(artist) is None:
                        fullChartData[artist] = {"Songs": {}, "Albums": {}}
                    if chartName.endswith("Albums"):
                        key = "Albums"
                    else:
                        key = "Songs"
                    if fullChartData[artist][key].get(album) is None:
                        fullChartData[artist][key][album] = {}
                    if fullChartData[artist][key][album].get(chartName) is None:
                        fullChartData[artist][key][album][chartName] = {}
                    fullChartData[artist][key][album][chartName][date] = i
            print(len(fullChartData))
        self.fullChartData = fullChartData
        
        if self.artistRenames is not None:
            print("Renamed {0} artists".format(len(renameStats)))
            print("Most Common Artists:")
            for item in renameStats.most_common(5):
                print(item)