# Top 40 Charts Functions

In [235]:
## Basic stuff
%load_ext autoreload
%autoreload
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))

## Python Version
import sys
from glob import glob
from os import getcwd
from os.path import join
from fileUtils import getBasename, getDirname, getBaseFilename
from fsUtils import isFile, isDir, moveFile, removeFile, mkDir, setFile, moveFile
from searchUtils import findDirs, findExt, findNearest

################################################################################
## General Stuff
################################################################################
from timeUtils import clock, elapsed
from webUtils import getHTML, getWebData
from timeUtils import getDateTime, isDate
from listUtils import getFlatList
from ioUtils import saveJoblib, loadJoblib, saveFile, getFile
import urllib
from time import sleep

################################################################################
## Database Stuff
################################################################################
from dbBase import dbBase
from mainDB import mainDB
from multiArtist import multiartist
from matchAlbums import matchAlbums
from masterdb import masterdb



################################################################################
## Music Stuff
################################################################################
from myMusicDBMap import myMusicDBMap
from musicBase import myMusicBase
from matchMyMusic import matchMyMusic
from matchMusicName import myMusicName
from mergeDB import searchForMutualDBEntries, searchForMutualArtistDBEntries

################################################################################
## Chart Stuff
################################################################################
from top40charts import top40, top40chart, top40starter
from fullCharts import fullCharts
from matchChartMusic import matchChartMusic

################################################################################
## Pandas Stuff
################################################################################
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

print("Python: {0}".format(sys.version))

from pandas import date_range, DataFrame
basedir = getcwd()
print("Basedir = {0}".format(basedir))

import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Python: 3.7.7 (default, Mar 26 2020, 10:32:53) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
Basedir = /Users/tgadfort/Documents/code/charts
Notebook Last Run Initiated: 2020-08-07 21:20:14.957050


In [236]:
chartType = "Top40"
chartType = "Billboard"

***
***
***

# Pool Information

In [237]:
from multiprocessing import Pool    
from time import sleep
from copy import deepcopy

results = {}

def process(artist):
    albums = artistAlbumData[artist]
    mcm.setChartArtistData(artist, albums)
    retval = mcm.matchChartArtist(ratioCut=0.85, returnData=True)
    return retval

def poolProc(nProc, artists):
    pool = Pool(processes=nProc)
    result = pool.map_async(process, artists)

    while not result.ready():
        sleep(10)
        print("")
        return result.get()

    

def processByName(artist):
    mcm.setChartArtistData(artist, [])
    retval = mcm.matchChartArtistByName(ratioCut=0.95, returnData=True)
    return retval

def poolProcByName(nProc, artists):
    pool = Pool(processes=nProc)
    result = pool.map_async(processByName, artists)

    while not result.ready():
        sleep(10)
        print("")
        return result.get()
    
    
def processThe(artist):
    albums = artistAlbumData[artist]
    if artist.startswith("The "):
        newArtist = artist[4:]
    else:
        newArtist = "The {0}".format(artist)
    mcm.setChartArtistData(newArtist, albums)
    retval = mcm.matchChartArtist(ratioCut=0.85, returnData=True)
    return retval

def poolProcThe(nProc, artists):
    pool = Pool(processes=nProc)
    result = pool.map_async(processThe, artists)

    while not result.ready():
        sleep(10)
        print("")
        return result.get()

# Retrieve Temporarily Save Results

In [394]:
def getMDB():
    mdb = myMusicDBMap(debug=False)
    mdb.getFullDBData()
    return mdb

def getMA():
    multiDelimArtists=open("../multiartist/multiDelimArtists.dat").readlines()
    multiDelimArtists = [x.replace("\n", "") for x in multiDelimArtists]
    multiDelimArtists[:3]

    mularts  = multiartist(cutoff=0.9, discdata=None, exact=False)
    mularts.setKnownMultiDelimArtists(multiDelimArtists)
    return mularts

def getMCM(mdb=None):
    if mdb is None:
        mdb = getMDB()
    mcm = matchChartMusic(mdb)
    return mcm

In [None]:
mdb     = getMDB()
mcm     = getMCM(mdb)
mularts = getMA()
_, _ = clock("Last Run")

Counter({'Discogs': 3996, 'AllMusic': 3939, 'MusicBrainz': 3731, 'LastFM': 2857, 'DatPiff': 535, 'RockCorner': 480, 'AceBootlegs': 175, 'CDandLP': 172, 'RateYourMusic': 56, 'MusicStack': 5, 'MetalStorm': 0})


In [None]:
def updateDataFrame(dictVal):
    df = DataFrame(dictVal)
    dfT = df.T
    dfT["Matched"] = df.count(axis = 0)
    artistAlbums   = {artist: {"Num": len(artistAlbumData[artist]), "Albums": artistAlbumData[artist]} for artist in list(dfT.index)}
    artistAlbumsDF = DataFrame(artistAlbums).T
    dfT = dfT.join(artistAlbumsDF)
    
    print("Number of Entries in Stuff:         {0}".format((dfT.shape)))
    print("Number of Zero Matches in Stuff:    {0}".format((dfT[dfT["Matched"] == 0].shape)))
    print("Number of NonZero Matches in Stuff: {0}".format((dfT[dfT["Matched"] > 0].shape)))    
    print("")
    return dfT.copy(deep=True)

In [None]:
def updateArtistAlumData():
    artistRenames = {}
    artistRenameData = getFile("renames2.yaml")
    print("There are {0} artist renames".format(len(artistRenameData)))
    
    artistAlbumData = getFile(ifile="currentArtistAlbumData.p", debug=True)
    print("There are {0} artist album entries".format(len(artistAlbumData)))
    
    for artist,rename in artistRenameData.items():
        if artistAlbumData.get(rename) is None and artistAlbumData.get(artist) is not None:
            artistAlbumData[rename] = artistAlbumData[artist]
            del artistAlbumData[artist]
            continue
            
        if artistAlbumData.get(rename) is not None and artistAlbumData.get(artist) is None:
            continue
            
        if artistAlbumData.get(rename) is None and artistAlbumData.get(artist) is None:
            #print("Neither {0} nor {1} exist".format(rename, artist))
            continue

        if artistAlbumData.get(rename) is not None and artistAlbumData.get(artist) is not None:
            tmp = artistAlbumData[artist] + artistAlbumData[rename]
            artistAlbumData[rename] = tmp
            print("Removing [{0}] and keeping [{1}]".format(artist, rename))
            del artistAlbumData[artist]
            
    print("There are {0} artist album entries".format(len(artistAlbumData)))    
    saveFile(idata=artistAlbumData, ifile="currentArtistAlbumData.p", debug=True)

In [None]:
def removeUnmatchedEntries():
    dels = []
    for latestArtist in list(matchedChartResults.keys()):
        matches = sum([True for x in matchedChartResults[latestArtist].values() if x is not None])
        if matches == 0:
            dels.append(latestArtist)

    print(len(matchedChartResults))
    matchedChartResults = {k: v for k,v in matchedChartResults.items() if k not in dels}
    print(len(dels))
    print(len(matchedChartResults))
    saveFile(idata=matchedChartResults, ifile="matchedChartArtists.p", debug=True)

In [None]:
def saveMatchedChartArtists(matchedChartResults):
    if chartType == "Billboard":
        print("Saving {0} artists to {1}".format(len(matchedChartResults), "matchedBillboardChartArtists.p"))
        saveFile(idata=matchedChartResults, ifile="matchedBillboardChartArtists.p", debug=True)
    elif chartType == "Top40":
        print("Saving {0} artists to {1}".format(len(matchedChartResults), "matchedChartArtists.p"))
        saveFile(idata=matchedChartResults, ifile="matchedChartArtists.p", debug=True)
    else:
        raise ValueError("Could not find data for chart type: {0}".format(chartType))


def getMatchedChartArtists(ctype=None):
    if ctype is None:
        ctype = chartType
    if ctype == "Billboard":
        matchedChartResults = getFile(ifile="matchedBillboardChartArtists.p", debug=True)
        print("There are {0} previously matched artists".format(len(matchedChartResults)))
    elif ctype == "Top40":
        matchedChartResults = getFile(ifile="matchedChartArtists.p", debug=True)
        print("There are {0} previously matched artists".format(len(matchedChartResults)))
    else:
        raise ValueError("Could not find data for chart type: {0}".format(ctype))
    return matchedChartResults

In [None]:
def getArtistRenames():
    artistRenames = getFile("masterRename.yaml")
    return artistRenames

In [None]:
def getArtistAlbumData():
    if chartType == "Billboard":
        artistAlbumData = getFile(ifile="currentBillboardArtistAlbumData.p", debug=True)
    elif chartType == "Top40":
        artistAlbumData = getFile(ifile="currentArtistAlbumData.p", debug=True)
    else:
        raise ValueError("Could not find data for chart type: {0}".format(chartType))
    return artistAlbumData

def getFullChartArtistAlbumData():
    if chartType == "Billboard":
        fullChartData = getFile(ifile="currentBillboardFullChartArtistAlbumData.p", debug=True)
    elif chartType == "Top40":
        fullChartData = getFile(ifile="currentFullChartArtistAlbumData.p", debug=True)
    else:
        raise ValueError("Could not find data for chart type: {0}".format(chartType))
    return fullChartData

# Load Chart Data

In [None]:
artistAlbumData = getArtistAlbumData()
allArtists      = list(artistAlbumData.keys())
print("There are {0} artist album entries".format(len(artistAlbumData)))

fullChartData = getFullChartArtistAlbumData()

manyArtists   = [artist for artist in list(artistAlbumData.keys()) if len(mularts.getArtistNames(artist)) > 1]
manyArtistAlbums = {artist: {"Artists": mularts.getArtistNames(artist)} for artist in manyArtists}
print("There are {0} many artist entries".format(len(manyArtists)))

singleArtists   = [artist for artist in list(artistAlbumData.keys()) if len(mularts.getArtistNames(artist)) == 1]
print("There are {0} single artist entries".format(len(singleArtists)))

_, _ = clock("Last Run")

In [None]:
matchedChartResults = getMatchedChartArtists()
print("="*100)
print("There are {0} matched chart artists".format(len(matchedChartResults)))
print("="*50,"Single","="*50)

singleArtistStatus     = {singleArtist: matchedChartResults.get(singleArtist) is not None for singleArtist in singleArtists}
singleArtistAlbums     = {}
for singleArtist in singleArtists:
    singleArtistAlbums[singleArtist] = artistAlbumData[singleArtist]
matchedSingleArtists   = [singleArtist for singleArtist,isMatched in singleArtistStatus.items() if isMatched is True]
unMatchedSingleArtists = [singleArtist for singleArtist,isMatched in singleArtistStatus.items() if isMatched is False]
print("There are {0} single Artists".format(len(singleArtistStatus)))
print("There are {0} matched Artists".format(len(matchedSingleArtists)))
print("There are {0} unMatched Artists".format(len(unMatchedSingleArtists)))
print("="*50,"Many","="*50)

artistRenames = getArtistRenames()
manySingleArtists      = {manyArtist: list(mularts.getArtistNames(manyArtist).keys()) for manyArtist in manyArtists}
manySingleArtistStatus = {}
manySingleArtistAlbums = {}
for manyArtist, manySingleArtistValues in manySingleArtists.items():
    for manySingleArtist in manySingleArtistValues:
        renamedManySingleArtist = artistRenames.get(manySingleArtist)
        if renamedManySingleArtist is not None:
            print("\t{0}  <---- From ---- {1}".format(renamedManySingleArtist, manySingleArtist))
            manySingleArtist = renamedManySingleArtist
            
        if artistAlbumData.get(manySingleArtist) is None:
            artistAlbumData[manySingleArtist] = []
        artistAlbumData[manySingleArtist] += artistAlbumData[manyArtist]
        if manySingleArtistStatus.get(manySingleArtist) is None:
            manySingleArtistStatus[manySingleArtist] = matchedChartResults.get(manySingleArtist) is not None
            manySingleArtistAlbums[manySingleArtist] = {}
        manySingleArtistAlbums[manySingleArtist][manyArtist] = artistAlbumData[manyArtist]

matchedSingleManyArtists   = [singleArtist for singleArtist,isMatched in manySingleArtistStatus.items() if isMatched is True]
unMatchedSingleManyArtists = [singleArtist for singleArtist,isMatched in manySingleArtistStatus.items() if isMatched is False]

print("There are {0} many Artists".format(len(manySingleArtists)))
print("There are {0} many single Artists".format(len(manySingleArtistStatus)))
print("There are {0} matched many single Artists".format(len(matchedSingleManyArtists)))
print("There are {0} unMatched many single Artists".format(len(unMatchedSingleManyArtists)))
print("="*100)
_, _ = clock("Last Run")

In [None]:
for i,artist in enumerate(unMatchedSingleArtists):
    print(i,'\t',artist)
    print("\t---> ",singleArtistAlbums[artist])

In [None]:
for artist in unMatchedSingleManyArtists:
    print(artist)
    print("\t",manySingleArtistAlbums[artist])

In [333]:
#unMatchedSingleArtists + unMatchedSingleManyArtists

***
***
***
***

# Find Near Matches

In [None]:
previouslyKnownArtists = getMatchedChartArtists("Top40").keys()
result = {}
for i,artist in enumerate(unMatchedSingleArtists + unMatchedSingleManyArtists):
    print(i,'\t',artist)
    retvals = findNearest(artist, previouslyKnownArtists, num=2, cutoff=0.75)
    print('\t',retvals)
    if len(retvals) == 2:
        nretvals = [x for x in retvals if x != artist]
        num = len(artistAlbumData[artist])
        if num >= 2:
            print("{0: <40}{1: <10}{2}".format(artist,num,nretvals[0]))
            #print("renames[\"{0}\"] = \"\"".format(val))
            result[artist] = {"Num": num, "Possible": nretvals[0]}

In [None]:
results

# Match Anyone Not Previously Matched

In [366]:
tmp   = unMatchedSingleManyArtists
tmp   = unMatchedSingleArtists
nProc = min([8, len(tmp)])
start, cmt = clock("Trying N={0} and L={1}".format(nProc, len(tmp)))
retval = poolProcByName(nProc, tmp)
retval = poolProc(nProc, tmp)
elapsed(start, cmt)

Current Time is Fri Aug 07, 2020 22:06:44 for Trying N=8 and L=23


Current Time is Fri Aug 07, 2020 22:07:12 for Done with Trying N=8 and L=23
Process [Done with Trying N=8 and L=23] took 27 seconds.


In [367]:
if retval is None:
    latestResults = {}
else:
    if len(retval) > 0:
        latestResults = dict(zip(tmp, retval))
    else:
        latestResults = {}
newSaveData = {**matchedChartResults, **latestResults}
_, _ = clock("Last Run")

Current Time is Fri Aug 07, 2020 22:07:12 for Last Run


In [368]:
DataFrame(latestResults).T

Unnamed: 0,Discogs,AllMusic,MusicBrainz,AceBootlegs,RateYourMusic,LastFM,DatPiff,RockCorner,CDandLP,MusicStack,MetalStorm
KC And The Sunshine Band,64664.0,299668.0,52589099262163635067731866339227987510,,,,,,,,
The Captain & Tennille,151466.0,,,,,,,,,,
Sylvia (r&b),,,,,,,,,,,
Waylon & Willie,,,,,,,,,,,
Thirty Eight Special,,,,,,,,,,,
Gladys Knight And The Pips,30135.0,667169.0,252644507021926366955894659356250765049,,,,,,,,
Ashford & Simpson,15875.0,619423.0,,,,,,,,,
"Willie, Waylon, Johnny & Kris",,,,,,,,,,,
Rene & Angela,518518.0,,112767965573293628078328439323590760824,,,,,,,,
Samuele,,,,,,,,,,,


In [369]:
update=True
for latestArtist,latestArtistResults in latestResults.items():
    matches = sum([True for x in latestArtistResults.values() if x is not None])
    if matches == 0:
        continue
    if matchedChartResults.get(latestArtist) is None:
        pass
        update = True
        print("   Adding artist {0}".format(latestArtist))
        matchedChartResults[latestArtist] = latestArtistResults
    else:
        for db,dbID in latestArtistResults.items():
            if dbID is not None:
                if matchedChartResults[latestArtist].get(db) is None:
                    pass
                    update = True
                    print(latestArtist)
                    print("      Setting {0} ID to {1}".format(db,dbID))
                    matchedChartResults[latestArtist][db] = str(dbID)
                else:
                    if str(matchedChartResults[latestArtist][db]) != str(dbID):
                        raise ValueError("Error with db {0} for artist {1}, possibles [{{2}},{3}]".format(db, latestArtist, dbID, matchedChartResults[latestArtist][db]))
                        
if update:
    saveMatchedChartArtists(matchedChartResults)

   Adding artist KC And The Sunshine Band
   Adding artist The Captain & Tennille
   Adding artist Gladys Knight And The Pips
   Adding artist Rene & Angela
   Adding artist Prince And The New Power Generation
Saving 5363 artists to matchedBillboardChartArtists.p
Saving data to matchedBillboardChartArtists.p
  --> This file is 283.6kB.
Saved data to matchedBillboardChartArtists.p
  --> This file is 283.6kB.


In [None]:
if False:
    latestResults["Zayn Malik"]["Discogs"] = '4852274'
    latestResults['Lauren Duski']["AllMusic"] = '0003629407'
    latestResults['Dylan Scott']["Discogs"] = '4298619'
    latestResults['Dylan Scott']["AllMusic"] = '0003281059'
    latestResults['Blueface']["Discogs"] = '7279800'
    latestResults['Morgan Wallen']["Discogs"] = '6628105'
    latestResults['Morgan Wallen']["AllMusic"] = '0003351348'
    latestResults['Trevor Daniel']["Discogs"] = '7662034'
    
#latestResults['Jxl']["Discogs"] = '5911'
#for artist in DataFrame(latestResults).T.index:
#    print("latestResults[\"{0}\"][\"AllMusic\"] = ''".format(artist))

# Rename Matched Artists

In [None]:
for artist,artistData in matchedChartResults.items():
    for db,dbmatch in artistData.items():
        if dbmatch is not None:
            dbArtist = mdb.getArtistFromDBID(db,dbmatch)
            if artist != dbArtist:
                print("{0: <30}{1: <40}{2: <20}{3}".format(artist,dbmatch,db,dbArtist))

In [None]:

#    print(artist)
dbArtistData

In [None]:
for i,artist in enumerate(unMatchedSingleArtists + unMatchedSingleManyArtists):
    retvals = findNearest(artist, myArtists, num=2, cutoff=0.9)
    print(i,'\t',artist,'\t',retvals)

In [None]:
mularts = getMA()
artist = "Drake And Future"
mularts.getArtistNames(artist)
mularts.delims

In [None]:

print(fullChartData[artist])
print(artistAlbumData[artist])

In [None]:
matchedChartResults[""]

   Adding artist Like Mike
Saving 5086 artists to matchedBillboardChartArtists.p
Saving data to matchedBillboardChartArtists.p
  --> This file is 259.4kB.
Saved data to matchedBillboardChartArtists.p
  --> This file is 259.4kB.


# Rank Chart Data

In [None]:
mmb = myMusicBase(debug=True)
mmb.findArtistAlbums(count=False)

In [None]:
from listUtils import getFlatList
from searchUtils import findNearest
def getMatchedAlbums(artistName):
    artistAlbums = mmb.getArtistAlbumsByArtist(artistName)
    if artistAlbums.get("Match") is not None:
        matchedAlbums = getFlatList(artistAlbums["Match"].getAlbums().values())
        return matchedAlbums
    else:
        return None

def isKnownAlbum(artistName, albumName):
    albums  = getMatchedAlbums(artistName)
    if albums is None:
        return None
    matches = findNearest(albumName, albums, num=1, cutoff=0.8)
    if len(matches) == 1:
        return True
    return False

def isKnownArtist(artistName):
    allArtists = mmb.getArtists()
    matches    = findNearest(artistName, allArtists, num=1, cutoff=0.80)    
    if len(matches) == 1:
        return matches[0]
    return None

In [None]:
from collections import Counter
albums = Counter()
songs  = Counter()

In [None]:
def albumKey(artist, album):
    key = "   :::   ".join([artist, album])
    return key
    
def songKey(artist, song):
    key = "   :::   ".join([artist, song])
    return key

for artist, artistData in fullChartData.items():
    artistAlbums = artistData["Albums"]
    for album,albumData in artistAlbums.items():
        key = albumKey(artist, album)
        albums[key] = 0
        for chart,albumChartData in albumData.items():
            albums[key] += len(albumChartData.values())

In [None]:
def findArtistRenames(albums):
    for item in albums.most_common():
        cnt = item[1]
        artist,album = item[0].split("   :::   ")
        knownArtist = isKnownArtist(artist)
        if knownArtist is None:
            continue
        if artist == knownArtist:
            continue
        print(": ".join([artist,knownArtist]))
findArtistRenames(albums)

In [None]:
for item in albums.most_common():
    cnt = item[1]
    if cnt < 2:
        continue
    artist,album = item[0].split("   :::   ")
    knownArtist = isKnownArtist(artist)
    
    known = isKnownAlbum(artist,album)
    if known is None:
        known = "Get"
    else:
        if known is True:
            known = ""
        else:
            known = "--->"
    if len(known) > 0:
        print("{0: <5}{1: <8}{2: <35}{3: <50}".format(cnt,known,artist,album))

***
***

In [None]:
for artist in matchedChartResults.keys():
    mb = matchedChartResults[artist]["AllMusic"]
    if mb is not None:
        try:
            int(mb)
        except:
            print(artist,mb)
            mb = str(mb[2:])
            print(artist,mb)
            matchedChartResults[artist]["AllMusic"] = mb

In [None]:
print("Saving {0} artists to {1}".format(len(matchedChartResults), "matchedChartArtists.p"))
saveFile(idata=matchedChartResults, ifile="matchedChartArtists.p", debug=True)

In [None]:
dfLatest = updateDataFrame(latestResults)
dfLatest
#dfTnew   = updateDataFrame(newSaveData)
#dfTnew

In [None]:
latestResults

In [None]:
dfT      = updateDataFrame(matchedChartResults)
dfLatest = updateDataFrame(latestResults)
dfTnew   = updateDataFrame(newSaveData)
_, _ = clock("Last Run")

In [None]:
print("There are {0} updated matched artists".format(len(newSaveData)))
saveFile(idata=newSaveData, ifile="matchedChartArtists.p", debug=True)
_, _ = clock("Last Run")

In [None]:
matchedChartResults = getFile(ifile="matchedChartArtists.p", debug=True)
dfT = updateDataFrame(matchedChartResults)

results = dfT[dfT["Matched"] == 0]
print("There are {0} un matched entries.".format(results.shape[0]))
saveFile(idata=results, ifile="NoChartMatches.p", debug=True)
results = dfT[dfT["Matched"] > 0]
print("There are {0} matched entries.".format(results.shape[0]))
saveFile(idata=results, ifile="FewChartMatches.p", debug=True)
_, _ = clock("Last Run")

***
***
***

# Loop over unmatched artists

In [None]:
unmatchedArtists = getFile(ifile="NoChartMatches.p", debug=True)
print("There are {0} previously unmatched artists".format(unmatchedArtists.shape))
unmatchedArtists

In [None]:
for artist in unmatchedArtists.list:
    if artist.count("'") >= 2:
        print(artist)

In [None]:
unmatchedArtistsToMatch = list(unmatchedArtists.index)
nProc = 8
tmp   = unmatchedArtistsToMatch
start, cmt = clock("Trying N={0} and L={1}".format(nProc, len(tmp)))
retval = poolProc(nProc, tmp)
elapsed(start, cmt)

In [None]:
if len(retval) > 0:
    latestResults = dict(zip(tmp, retval))
else:
    latestResults = {}
newSaveData = {**matchedChartResults, **latestResults}
_, _ = clock("Last Run")

In [None]:
dfT      = updateDataFrame(matchedChartResults)
dfLatest = updateDataFrame(latestResults)
dfTnew   = updateDataFrame(newSaveData)
_, _ = clock("Last Run")

In [None]:
print("There are {0} updated matched artists".format(len(newSaveData)))
saveFile(idata=newSaveData, ifile="matchedChartArtists.p", debug=True)
_, _ = clock("Last Run")

In [None]:
matchedChartResults = getFile(ifile="matchedChartArtists.p", debug=True)
dfT      = updateDataFrame(matchedChartResults)
results = dfT[dfT["Matched"] == 0]
print("There are {0} un matched entries.".format(results.shape[0]))
saveFile(idata=results, ifile="NoChartMatches.p", debug=True)
results = dfT[dfT["Matched"] > 0]
print("There are {0} matched entries.".format(results.shape[0]))
saveFile(idata=results, ifile="FewChartMatches.p", debug=True)
_, _ = clock("Last Run")

***
***
***

# Test 'The'

In [None]:
unmatchedArtists = getFile(ifile="NoChartMatches.p", debug=True)
print("There are {0} previously unmatched artists".format(unmatchedArtists.shape))
unmatchedArtists

In [None]:
nProc = 8
tmp   = list(unmatchedArtists.index)
start, cmt = clock("Trying N={0} and L={1}".format(nProc, len(tmp)))
retval = poolProcThe(nProc, tmp)
elapsed(start, cmt)

In [None]:
df = DataFrame(dict(zip(tmp, retval)))
dfT = df.T
dfT["Matched"] = df.count(axis = 0)
artistAlbums   = {artist: {"Num": len(artistAlbumData[artist]), "Albums": artistAlbumData[artist]} for artist in list(dfT.index)}
artistAlbumsDF = DataFrame(artistAlbums).T
dfT = dfT.join(artistAlbumsDF)

In [None]:
theArtists = list(dfT[dfT["Matched"] > 0].index)
vals = {artist: "The {0}".format(artist) for artist in theArtists}
saveFile(idata=vals, ifile="tmp.yaml")

In [None]:
# ELO

# Rename Code

In [None]:
renames = {}

singleArtists = [artist for artist in list(artistAlbumData.keys()) if len(mularts.getArtistNames(artist)) == 1]
result = {}
for artist in singleArtists:
    retvals = findNearest(artist, artistAlbumData.keys(), num=2, cutoff=0.95)
    if len(retvals) == 2:
        nretvals = [x for x in retvals if x != artist]
        num = len(artistAlbumData[artist])
        if num >= 2:
            print("{0: <40}{1: <10}{2}".format(artist,num,nretvals[0]))
            #print("renames[\"{0}\"] = \"\"".format(val))
            result[artist] = {"Num": num, "Possible": nretvals[0]}

In [None]:
renames = {}
for artist in result.keys():
    possible = result[artist]["Possible"]
    if result.get(possible) is not None:
        if result[possible]["Num"] > result[artist]["Num"]:
            continue
    if renames.get(artist) is None:
        renames[artist] = []
    if possible not in renames[artist]:
        renames[artist].append(possible)
    #print("renames[\"{0}\"] = \"{1}\"".format(possible, artist))
saveFile(idata=renames, ifile="renames.yaml", debug=True)

In [None]:
matchedChartResults = getFile(ifile="matchedChartArtists.p", debug=True)

***
***
***

## Artist Renames

# Multi Artists Information

In [None]:
matchedChartResults = getFile(ifile="matchedChartArtists.p", debug=True)
from pandas import DataFrame
df = DataFrame(matchedChartResults)
dfT = df.T
print("Size of Artists:                        {0}".format(dfT.shape))
dfT["Matched"] = df.count(axis = 0)

results = dfT[dfT["Matched"] == 0]
saveFile(idata=results, ifile="NoChartMatches.p", debug=True)
results = dfT[dfT["Matched"] == 1]
saveFile(idata=results, ifile="FewChartMatches.p", debug=True)

print("Number of Zero Matches in New Stuff:    {0}".format((dfT[dfT["Matched"] == 0].shape)))
print("Number of NonZero Matches in New Stuff: {0}".format((dfT[dfT["Matched"] > 0].shape)))

In [None]:
matchedChartResults = getFile(ifile="matchedChartArtists.p", debug=True)
from pandas import DataFrame
df = DataFrame(matchedChartResults)
dfT = df.T
dfT["Matched"] = df.count(axis = 0)

In [None]:
renames = {}

singleArtists = [artist for artist in list(artistAlbumData.keys()) if len(mularts.getArtistNames(artist)) == 1]
result = {}
for artist in singleArtists:
    retvals = findNearest(artist, artistAlbumData.keys(), num=2, cutoff=0.95)
    if len(retvals) == 2:
        nretvals = [x for x in retvals if x != artist]
        num = len(artistAlbumData[artist])
        if num >= 2:
            print("{0: <40}{1: <10}{2}".format(artist,num,nretvals[0]))
            #print("renames[\"{0}\"] = \"\"".format(val))
            result[artist] = {"Num": num, "Possible": nretvals[0]}

In [None]:
matchedChartResults = getFile(ifile="matchedChartArtists.p", debug=True)
from pandas import DataFrame
df = DataFrame(matchedChartResults)
dfT = df.T
dfT["Matched"] = df.count(axis = 0)


artistAlbums   = {artist: {"Num": len(artistAlbumData[artist]), "Albums": artistAlbumData[artist]} for artist in list(dfT.index)}
artistAlbumsDF = DataFrame(artistAlbums).T
dfT = dfT.join(artistAlbumsDF)
print("Size of Chart Aritsts:                  {0}".format(dfT.shape))


results = dfT[dfT["Matched"] == 0]
saveFile(idata=results, ifile="NoChartMatches.p", debug=True)
results = dfT[dfT["Matched"] == 1]
saveFile(idata=results, ifile="FewChartMatches.p", debug=True)

print("Number of Zero Matches in New Stuff:    {0}".format((dfT[dfT["Matched"] == 0].shape)))
print("Number of NonZero Matches in New Stuff: {0}".format((dfT[dfT["Matched"] > 0].shape)))

In [None]:
matchedChartResults = getFile(ifile="matchedChartArtists.p", debug=True)
from pandas import DataFrame
df = DataFrame(matchedChartResults)
dfT = df.T
dfT["Matched"] = df.count(axis = 0)


artistAlbums   = {artist: {"Num": len(artistAlbumData[artist]), "Albums": artistAlbumData[artist]} for artist in list(dfT.index)}
artistAlbumsDF = DataFrame(artistAlbums).T
dfT = dfT.join(artistAlbumsDF)
print("Size of Chart Aritsts:                  {0}".format(dfT.shape))


results = dfT[dfT["Matched"] == 0]
saveFile(idata=results, ifile="NoChartMatches.p", debug=True)
results = dfT[dfT["Matched"] == 1]
saveFile(idata=results, ifile="FewChartMatches.p", debug=True)

print("Number of Zero Matches in New Stuff:    {0}".format((dfT[dfT["Matched"] == 0].shape)))
print("Number of NonZero Matches in New Stuff: {0}".format((dfT[dfT["Matched"] > 0].shape)))

In [None]:
notAnArtist = ["Soundtrack", 'Various', 'Varios']
noMatches = getFile("NoChartMatches.p")
noMatches.sort_values("Num", ascending=False, inplace=True)
ranking = noMatches[["Num", "Albums"]]
ranking = ranking[~ranking.index.isin(notAnArtist)]
toget = list(ranking[ranking["Num"] > 1].index)
print("Here are {0} artists to get".format(len(toget)))
saveFile(idata=toget, ifile="../discogs/unmatchedArtists4.p")

# Merge No Matches

In [None]:
noChartMatches = getFile("NoChartMatches.p")
len(noChartMatches)

In [None]:
x = "The Hello"
x[4:]

# Pool/Process

In [None]:
latestResults = dict(zip(tmp, retval))

In [None]:
from pandas import DataFrame
df = DataFrame(matchedChartResults)
dfT = df.T
dfT["Matched"] = df.count(axis = 0)
print("Number of Zero Matches: {0}".format((dfT[dfT["Matched"] == 0].shape)))

dfLatest = DataFrame(latestResults)
dfLatestT = dfLatest.T
dfLatestT["Matched"] = dfLatest.count(axis = 0)
print("Number of Zero Matches in New Stuff: {0}".format((dfLatestT[dfLatestT["Matched"] == 0].shape)))
print("Number of NonZero Matches in New Stuff: {0}".format((dfLatestT[dfLatestT["Matched"] > 0].shape)))

In [None]:
newSaveData = {**matchedChartResults, **latestResults}

df = DataFrame(newSaveData)
dfT = df.T
dfT["Matched"] = df.count(axis = 0)
print("Number of Zero Matches in Updated Stuff: {0}".format((dfT[dfT["Matched"] == 0].shape)))
print("Number of NonZero Matches in Updated Stuff: {0}".format((dfT[dfT["Matched"] > 0].shape)))

results = dfT[dfT["Matched"] == 0]
saveFile(idata=results, ifile="NoChartMatches.p", debug=True)
results = dfT[dfT["Matched"] == 1]
saveFile(idata=results, ifile="FewChartMatches.p", debug=True)

print("Number of Zero Matches in New Stuff:    {0}".format((dfT[dfT["Matched"] == 0].shape)))
print("Number of NonZero Matches in New Stuff: {0}".format((dfT[dfT["Matched"] > 0].shape)))

_, _ = clock("Last Run")

In [None]:
saveFile(idata=newSaveData, ifile="matchedChartArtists.p", debug=True)
_, _ = clock("Last Run")

# Multi Artists

In [None]:
manyArtists   = [artist for artist in list(artistAlbumData.keys()) if len(mularts.getArtistNames(artist)) > 1]
manyArtistAlbums = {artist: {"Artists": mularts.getArtistNames(artist)} for artist in manyArtists}

In [None]:
DataFrame(manyArtistAlbums).T.head()

In [None]:
from collections import Counter
cntr = Counter()
for k,artists in manyArtistAlbums.items():
    for artist in artists["Artists"]:
        cntr[artist] += 1

In [None]:

artistAlbums   = {artist: {"Num": len(artistAlbumData[artist]), "Albums": artistAlbumData[artist]} for artist in list(dfT.index)}

In [None]:
from pandas import Series
multis = Series(cntr).sort_values(ascending=False).to_dict()
multis = {k: {"Chart Counts": v, "Num": artistAlbumData.get(k)} for k,v in multis.items()}
for k in multis.keys():
    if multis[k]["Num"] is not None:
        multis[k]["Num"] = len(multis[k]["Num"])
    else:
        multis[k]["Num"] = 0

multisDF = DataFrame(multis).T
noMultis = multisDF[multisDF["Num"] == 0]

print("Here are {0} artists to get".format(len(noMultis.index)))
saveFile(idata=list(noMultis.index), ifile="../discogs/unmatchedArtists_Multi.p")


In [None]:
noMultis

In [None]:
singleArtists = [artist for artist in list(artistAlbumData.keys()) if len(mularts.getArtistNames(artist)) == 1]
artistAlbums  = {artist: {"Num": len(artistAlbumData[artist]), "Albums": artistAlbumData[artist]} for artist in singleArtists}
len(artistAlbums)

In [None]:
manyArtists   = [artist for artist in list(artistAlbumData.keys()) if len(mularts.getArtistNames(artist)) > 1]
manyArtistAlbums = {artist: {"Num": len(artistAlbumData[artist]), "Albums": artistAlbumData[artist]} for artist in manyArtists}
len(manyArtistAlbums)

In [None]:
from pandas import DataFrame, Series
nAlbumsDF = DataFrame(artistAlbums).T
#nAlbumsDF.columns = ["Albums"]
nAlbumsDF.head()

***
***

# Analyze ManyArtists

In [None]:
from pandas import DataFrame, Series
nManyAlbumsDF = DataFrame(manyArtistAlbums).T
nManyAlbumsDF = nManyAlbumsDF.sort_values("Num", ascending=False)
#nAlbumsDF.columns = ["Albums"]
print("\n".join(list(nManyAlbumsDF[nManyAlbumsDF["Num"] >= 4].index)))

# Match The Single Artists

In [None]:
prevResults = {}
prevResults = getFile(ifile="matchedChartArtists.p", debug=True)

maxI = 4500
if maxI == 0:
    prevResults = {}
print("Found {0} previously matched artists".format(len(prevResults)))

In [None]:
nProc = 8
tmp   = singleArtists[8000:14000]
start, cmt = clock("Trying N={0} and L={1}".format(nProc, len(tmp)))
retval = poolProc(nProc, tmp)
elapsed(start, cmt)

In [None]:
savedata2 = dict(zip(tmp, retval))
len(savedata2)

In [None]:
oldSaveData = getFile(ifile="matchedChartArtists.p", debug=True)
saveFile(idata=oldSaveData, ifile="matchedChartArtists_tmp.p", debug=True)

print("Num Old Data:  {0}".format(len(oldSaveData)))
print("Num Save Data: {0}".format(len(savedata2)))
newSaveData = {**oldSaveData, **savedata2}
print("Num New Data:  {0}".format(len(newSaveData)))

In [None]:
#saveFile(idata=newSaveData, ifile="matchedChartArtists.p", debug=True)
oldSaveData = getFile(ifile="matchedChartArtists.p", debug=True)
saveFile(idata=oldSaveData, ifile="matchedChartArtists_tmp.p", debug=True)

# Load All Matched Single Chart Artists

In [None]:
maindb = mainDB(create=False)

In [None]:

dfT = dfT.join(nAlbumsDF)
dfT.head()

In [None]:
noMatches = getFile("NoChartMatches.p")
noMatches.sort_values("Num", ascending=False, inplace=True)
ranking = noMatches[["Num", "Albums"]]
ranking = ranking[~ranking.index.isin(notAnArtist)]mcm
toget = list(ranking[ranking["Num"] > 2].index)
saveFile(idata=toget, ifile="../discogs/unmatchedArtists2.p")

In [None]:
ranking[ranking.index.str.contains("feat")]

In [None]:
mdb = myMusicDBMap(debug=False)
mdb.getFullDBData()

In [None]:
from pandas import DataFrame
df = DataFrame(matchedChartResults)
dfT = df.T
dfT["Matched"] = df.count(axis = 0)
dfT.head()

# Loop Over The No Matches

In [None]:
unMatchedArtists = getFile(ifile="NoChartMatches.p", debug=True)

from multiprocessing import Pool    
from time import sleep
from copy import deepcopy

mdb = myMusicDBMap(debug=False)
mdb.getFullDBData()

mcm = matchChartMusic(mdb)
mularts  = multiartist(cutoff=0.9, discdata=None, exact=False)
multiDelimArtists=open("../multiartist/multiDelimArtists.dat").readlines()
multiDelimArtists = [x.replace("\n", "") for x in multiDelimArtists]
multiDelimArtists[:10]
mularts.setKnownMultiDelimArtists(multiDelimArtists)
results = {}

In [None]:
def process(artist):
    albums = artistAlbumData[artist]
    mcm.setChartArtistData(artist, albums)
    retval = mcm.matchChartArtist(ratioCut=0.8, returnData=True)
    return retval

def poolProc(nProc, tmp):
    pool = Pool(processes=nProc)
    result = pool.map_async(process, tmp)

    while not result.ready():
        sleep(10)
        print("")
        return result.get()

nProc = 8
tmp   = unMatchedArtists[:4]
start, cmt = clock("Trying N={0} and L={1}".format(nProc, len(tmp)))
retval = poolProc(nProc, tmp)
elapsed(start, cmt)

In [None]:
matches = dict(zip(tmp, retval))

In [None]:
matchedChartResults = getFile(ifile="matchedChartArtists.p", debug=True)

In [None]:
for artist in matches.keys():
    matchedChartResults[artist] = matches[artist]
saveFile(idata=matchedChartResults, ifile="matchedChartArtists.p", debug=True)

In [None]:


from pandas import DataFrame
df = DataFrame(matches)
df
dfT = df.T
dfT["Matched"] = df.count(axis = 0)
dfT.head()
dfT[dfT["Matched"] > 0]

# Find Errors and Merge DBs

In [None]:
errors = {}
print("{0: <20}{1: <15}{2: <40}{3: <40}".format("Artist", "DB", "My DB ID", "Chart DB ID", "ERROR"))
for i,artist in enumerate(list(df.columns)):
    print("{0: <20}".format(artist), end="")
    retval = mdb.getArtistDataIDs(artist)
    firstDB = True
    for db,dbID in retval.items():
        if firstDB is True:
            firstDB = False
        else:
            print("{0: <20}".format(''), end="")
        print("{0: <15}".format(db), end="")
            
        if dbID is None:
            print("{0: <40}".format(''), end="")
        else:
            print("{0: <40}".format(dbID), end="")

        chartDBID = df[artist][db]
        if chartDBID is None:
            print("{0: <40}".format(''), end="")
        else:
            print("{0: <40}".format(chartDBID), end="")
            
        if chartDBID is not None and dbID is None:
            mdb.add(artist, db, chartDBID)
            
        if all([dbID,chartDBID]):
            if dbID == chartDBID:
                pass
            else:
                print(dbID==chartDBID, end="")
                #mdb.add(artist, db, chartDBID)
                if errors.get(artist) is None:
                    errors[artist] = {}
                errors[artist][db] = {"Mine": dbID, "Chart": chartDBID}
        print("")
    print("")
    if len(errors) > 50:
        break

In [None]:
#maindb.dbdata["LastFM"]["Utils"].getArtistID('Morten Harket')
#mdb.add("Blackstreet", "MusicBrainz", "78850123839630113234659525690757885261")
#mdb.add("214847928836161113317702378728828431474
#mdb.save()

In [None]:

nProc = 4
tmp   = deepcopy(singleArtists[:80])
start, cmt = clock("Trying N={0} and L={1}".format(nProc, len(tmp)))
retval = poolProc(nProc, tmp)
elapsed(start, cmt)

In [None]:
nProc = 4
tmp   = deepcopy(singleArtists[:160])
start, cmt = clock("Trying N={0} and L={1}".format(nProc, len(tmp)))
retval = poolProc(nProc, tmp)
elapsed(start, cmt)

In [None]:
nProc = 4
tmp   = deepcopy(singleArtists[:640])
start, cmt = clock("Trying N={0} and L={1}".format(nProc, len(tmp)))
retval = poolProc(nProc, tmp)
elapsed(start, cmt)

In [None]:
nProc = 8
tmp   = deepcopy(singleArtists[:640])
start, cmt = clock("Trying N={0} and L={1}".format(nProc, len(tmp)))
retval = poolProc(nProc, tmp)
elapsed(start, cmt)

In [None]:
mcm = matchChartMusic(mdb)
mularts  = multiartist(cutoff=0.9, discdata=None, exact=False)
results = {}


n = len(artistAlbumData)
from collections import Counter
multiCntr = Counter()
multis = {}

for i,(artist,albums) in enumerate(artistAlbumData.items()):
    if i <= maxI:
        continue
    matches = mularts.getArtistNames(artist)
    if len(matches) > 1:
        print("{0} -> {1}".format(artist,matches))
        for match in matches:
            multiCntr[match] += 1
        multiCntr[artist] += 1
        continue
    #print("\n")
    #print("="*50)
    print(i,'/',n,"\t{0: <40}".format(artist),'\t',len(albums))
    mcm.setChartArtistData(artist, albums)
    mcm.matchChartArtist(ratioCut=0.85)
    
    
    if i % 250 == 0:
        if len(prevResults) > 0:
            results = {**prevResults, **mcm.getResults()}
            prevResults = {}
        else:
            results = mcm.getResults()
        print("Saving {0} artists".format(len(results)))
        saveFile(idata=results, ifile="matchedChartArtists.p", debug=True)

In [None]:
list(artistAlbumData.keys())[maxI:]

In [None]:

def parseArtistsMT(modVal, force=False, doExtra=False):
    artsMT.parseArtistModValFiles(modVal, force=force)

    
def parseArtistsParallel(db, nProcs=3, force=False):
    pool = Pool(processes=nProcs)
    if db == "Discogs":
        result = pool.map_async(parseArtistsDC, range(100))
    elif db == "AllMusic":
        result = pool.map_async(parseArtistsAM, range(100))
    elif db == "MusicBrainz":
        result = pool.map_async(parseArtistsMB, range(100))
    elif db == "AceBootlegs":
        result = pool.map_async(parseArtistsAB, [None])
    elif db == "DatPiff":
        result = pool.map_async(parseArtistsDP, [None])
    elif db == "RateYourMusic":
        dbdata["RateYourMusic"]["Artists"].parseDownloadedFiles()
        result = pool.map_async(parseArtistsRM, range(100))
    elif db == "LastFM":
        result = pool.map_async(parseArtistsLM, range(100))
    elif db == "RockCorner":
        result = pool.map_async(parseArtistsRC, range(100))
    elif db == "CDandLP":
        result = pool.map_async(parseArtistsCL, range(100))
        #result = pool.map_async(parseArtistsCL, range(56,72))
        #result = pool.map_async(parseArtistsCL, [55,25,26])
    elif db == "MusicStack":
        result = pool.map_async(parseArtistsMS, [None])
    elif db == "MetalStorm":
        result = pool.map_async(parseArtistsMT, range(100))
    else:
        raise ValueError("[{0}] is not recognized as a DB".format(db))

    while not result.ready():
        if force is True:
            time.sleep(10)
        else:
            time.sleep(1)
    print("")
    return result.get()

In [None]:
multiCntr.most_common()

In [None]:
from pandas import DataFrame
df = DataFrame(results)
dfT = df.T
dfT["Matched"] = df.count(axis = 0)
dfT[dfT["Matched"] == 0]

In [None]:
for artist,artistData in fullChartData.items(): #[artist][key][album][chartName][date] = i
    print("\n")
    print("="*100)
    print(artist)
    # Songs
    print("\tSongs:")
    for songName,songData in artistData["Songs"].items():
        print("\t",songName,'\t\t',end="")
        chartWeeks = 0
        for chartName,songChartData in songData.items():
            weeks = len(songChartData)
            chartWeeks += weeks
        print(chartWeeks)
        
    # Albums
    print("\tAlbums:")
    for albumName,albumData in artistData["Albums"].items():
        print("\t",albumName,'\t\t',end="")
        chartWeeks = 0
        for chartName,albumChartData in albumData.items():
            weeks = len(albumChartData)
            chartWeeks += weeks
        print(chartWeeks)

In [None]:
knownArtists

In [None]:
from fileUtils import getBaseFilename
results = {}
for ifile in files:
    chartData = getFile(ifile)
    cname     = getBaseFilename(ifile)
    print("{0: <40}".format(cname),end="")
    for date, values in chartData.items():
        for i,item in enumerate(values):
            artist = item["Artist"]
            prevArtist = None
            if multiRenames.get(artist) is not None:
                prevArtist = artist
                artist = multiRenames[artist]
            if singleRenames.get(artist) is not None:
                idx    = str(singleRenames[artist])
                artist = artistIDToName[idx]
            if prevArtist is None:
                prevArtist = artist
            album  = item["Album"]
            if results.get(artist) is None:
                results[artist] = {"Songs": {}, "Albums": {}}
            if cname.endswith("Albums"):
                key = "Albums"
            else:
                key = "Songs"
            if results[artist][key].get(album) is None:
                results[artist][key][album] = {}
            if results[artist][key][album].get(cname) is None:
                results[artist][key][album][cname] = {}
            results[artist][key][album][cname][date] = i
    print(len(results))

In [None]:
for chartName, chartData in newCharts.items():
    

In [None]:
def getStarterFiles(charts):
    for cid, cname in charts.items():
        print("{0} --> {1}".format(cid,cname))
        url      = "https://top40-charts.com/chart.php?cid={0}".format(cid)
        user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
        headers={'User-Agent':user_agent,}

        savedir  = join(basedir, "data", "top40")
        mkDir(savedir)

        savename = setFile(savedir, "{0}.p".format(cname.replace("/", " ")))

        request=urllib.request.Request(url,None,headers) #The assembled request
        response = urllib.request.urlopen(request)
        data = response.read() # The data u need

        chartsToGet = {}
        bsdata = getHTML(data)
        select = bsdata.find("select", {"name": "date"})
        if select is None:
            raise ValueError("No dates!")
        for option in select.findAll("option"):
            attr  = option.attrs
            value = attr['value']
            url   = "https://top40-charts.com/chart.php?cid={0}&date={1}".format(cid, value)
            chartsToGet[value] = url

        savename = setFile(savedir, "{0}-Dates.p".format(cname.replace("/", " ")))
        print("Saving {0} dates to {1}".format(len(chartsToGet), savename))
        saveFile(idata=chartsToGet, ifile=savename)
        sleep(2)

In [None]:
from datetime import date, timedelta

def all_sundays(year):
    # January 1st of the given year
       dt = date(year, 1, 1)
        # First Sunday of the given year       
       dt += timedelta(days = 6 - dt.weekday())  
       while dt.year == year:
          yield dt
          dt += timedelta(days = 7)
            
from pandas import DataFrame
def getMusicData(key, artist):
    retval = discdf[discdf[key] == artist]
    if retval.shape[0] > 0:
        return retval
    else:
        return None
    
from difflib import SequenceMatcher

def getRowByIndex(pdf, idx):
    return pdf.loc[idx]

In [None]:
def getBestArtistIDMatch(artistName, artistResults, possibleMatches, N=3, cutoff=0.7, debug=False):    
    myAlbumNames = list(artistResults)
    idxResults = {}
    matchValue = {}
    for idx in possibleMatches:
        idxResults[idx] = 0
        matchValue[idx] = artistName

        artistAlbumsData = getRowByIndex(artistAlbumsDB, idx)
        artistAlbums     = artistAlbumsData["Albums"]

        for myAlbumName in myAlbumNames:
            maxRatio = 0
            for albumType, albumTypeData in artistAlbums.items():
                for albumID, dbAlbumName in albumTypeData.items():
                    s = SequenceMatcher(None, myAlbumName, dbAlbumName)
                    ratio = s.ratio()
                    if ratio > maxRatio:
                        maxRatio = ratio

            if maxRatio > cutoff:
                idxResults[idx] += maxRatio

        if debug:
            print("\t{0: <5}{1: <15}{2}".format(idxResults[idx], idx, matchValue[idx]))
            
    if len(idxResults) > 0:
        mc     = Counter(idxResults)
        best   = mc.most_common(1)[0]
        idx    = best[0]
        ratio  = round(best[1],2)
        artist = matchValue[idx]
        return (idx, artist, ratio)
    else:
        return (None, None, None)
    


def getBestArtistMatch(artistName, artistResults, N=3, cutoff=0.7, debug=False):
    myAlbumNames = list(artistResults)
    if debug:
        print(artistName)

    idxResults = {}
    matchValue = {}

    
    for artist in findNearest(artistName, artistNameToID.keys(), N, cutoff):
        for idx in artistNameToID[artist]:
            idxResults[idx] = 0
            matchValue[idx] = artist
            
            artistAlbumsData = getRowByIndex(artistAlbumsDB, idx)
            artistAlbums     = artistAlbumsData["Albums"]
            
            for myAlbumName in myAlbumNames:
                maxRatio = 0
                for albumType, albumTypeData in artistAlbums.items():
                    for albumID, dbAlbumName in albumTypeData.items():
                        s = SequenceMatcher(None, myAlbumName, dbAlbumName)
                        ratio = s.ratio()
                        if ratio > maxRatio:
                            maxRatio = ratio
                            
                if maxRatio > cutoff:
                    idxResults[idx] += maxRatio
            
            if debug:
                print("\t{0: <5}{1: <15}{2}".format(idxResults[idx], idx, matchValue[idx]))

    if len(idxResults) > 0:
        mc     = Counter(idxResults)
        best   = mc.most_common(1)[0]
        idx    = best[0]
        ratio  = round(best[1],2)
        artist = matchValue[idx]
        return (idx, artist, ratio)
    else:
        return (None, None, None)

# Discogs Downloads

In [None]:
disc = discogs()
discdf = disc.getMasterSlimArtistDiscogsDB()
artistIDToName = discdf["DiscArtist"].to_dict()
from masterdb import getArtistAlbumsDB, discConv
artistAlbumsDB = getArtistAlbumsDB(disc)

artistNameToID = {}
print("Found {0} ID -> Name entries".format(len(artistIDToName)))
for artistID,artistName in artistIDToName.items():
    if artistNameToID.get(artistName) is None:
        artistNameToID[artistName] = []
    artistNameToID[artistName].append(artistID)
print("Found {0} Name -> ID entries".format(len(artistNameToID)))
mulArts  = multiArtist(cutoff=0.9, discdata=artistNameToID, exact=False)

    matches = mulArts.getArtistNames(artist)
    if len(matches) > 1:
        continue

# Rename Files

In [None]:
singleRenames = getFile(ifile="singleRenames.p")
multiRenames  = getFile(ifile="multiRenames.p")
knownArtists  = getFile(ifile="artistMap.p")

for k in singleRenames.keys():
    v = singleRenames[k]
    if isinstance(v, list):
        if len(v) == 2:
            singleRenames[k] = v[0]
saveFile(idata=singleRenames, ifile="singleRenames.p")

# Get Starter File

In [None]:
savename=join(basedir, "data", "top40", "starter.p")

def getStarter(basedir):
    url="https://top40-charts.com"
    getWebData(base=url, savename=savename, useSafari=False)
    
if not isFile(savename):
    getStarter(basedir)
    fdata = getHTML(savename)
else:
    fdata = getHTML(savename)

In [None]:
fdata

In [None]:
charts = {}
for iform,formdata in enumerate(fdata.findAll("form")):
    print(iform)
    for isel,seldata in enumerate(formdata.findAll("select", {"name": "cid"})):
        print(isel)
        for iop,opdata in enumerate(seldata.findAll("option")):
            attrs  = opdata.attrs
            value  = attrs['value']
            charts[value] = opdata.text
#return charts
charts

In [None]:
chartData  = {}
dirname = None
baseURL = "http://top40-charts.com"

## Get Charts
def getCharts(fdata):
    charts = {}
    for iform,formdata in enumerate(fdata.findAll("form")):
        for isel,seldata in enumerate(formdata.findAll("select", {"name": "cid"})):
            for iop,opdata in enumerate(seldata.findAll("option")):
                attrs  = opdata.attrs
                value  = attrs['value']
                charts[value] = opdata.text
    return charts


## Get Dates
def getDates(fdata):
    dates = []
    for iform,formdata in enumerate(fdata.findAll("form")):
        for isel,seldata in enumerate(formdata.findAll("select", {"name": "date"})):
            continue
            for iop,opdata in enumerate(seldata.findAll("option")):
                attrs  = opdata.attrs
                value  = attrs['value']
                dates.append(value)
    dates = sorted(list(set(dates)))
    return dates

charts = getCharts(fdata)

In [None]:
charts

### Last Downloaded on 5/21/20

In [None]:
def getStarterFiles(charts):
    for cid, cname in charts.items():
        print("{0} --> {1}".format(cid,cname))
        url      = "https://top40-charts.com/chart.php?cid={0}".format(cid)
        user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
        headers={'User-Agent':user_agent,}

        savedir  = join(basedir, "data", "top40")
        mkDir(savedir)

        savename = setFile(savedir, "{0}.p".format(cname.replace("/", " ")))

        request=urllib.request.Request(url,None,headers) #The assembled request
        response = urllib.request.urlopen(request)
        data = response.read() # The data u need

        chartsToGet = {}
        bsdata = getHTML(data)
        select = bsdata.find("select", {"name": "date"})
        if select is None:
            raise ValueError("No dates!")
        for option in select.findAll("option"):
            attr  = option.attrs
            value = attr['value']
            url   = "https://top40-charts.com/chart.php?cid={0}&date={1}".format(cid, value)
            chartsToGet[value] = url

        savename = setFile(savedir, "{0}-Dates.p".format(cname.replace("/", " ")))
        print("Saving {0} dates to {1}".format(len(chartsToGet), savename))
        saveFile(idata=chartsToGet, ifile=savename)
        sleep(2)

In [None]:
getStarterFiles(charts)

# Download Chart Data

In [None]:
for cid, cname in charts.items():
    print("{0} --> {1}".format(cid,cname))
    
    
    cidsavedir = join(basedir, "data", "top40")
    savename   = setFile(cidsavedir, "{0}-Dates.p".format(cname.replace("/", " ")))
    chartData  = getFile(savename)
    
    values = sorted(chartData.keys(), reverse=True)
    for idts,value in enumerate(values):
        if value is None:
            continue
            
        url = chartData[value]
        try:
            if getDateTime(value).year < 2000:
                continue
        except:
            continue

        try:
            if getDateTime(value).year < 201:
                continue        
        except:
            continue
            
        if value is None:
            continue
            
        savedir  = join(basedir, "data", "top40", cname.replace("/", " "))
        savename = setFile(savedir, "{0}.p".format(value))
        if savename is None:
            continue
            
        if isFile(savename):
            continue
            print("Touching {0}".format(savename))
            Path(savename).touch()
        else:
            user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
            headers={'User-Agent':user_agent,}
    
            request=urllib.request.Request(url,None,headers) #The assembled request
            response = urllib.request.urlopen(request)
            data = response.read() # The data u need

            print("URL ---> {0}".format(url))
            print(idts,'/',len(values),"\tSaving {0}".format(savename))
            saveJoblib(data=data, filename=savename, compress=True)
            sleep(2)

# Parse Charts

In [None]:
def getChartData(chartData, debug=False):
    results = []
    pos = 1
    
    debVars = None
    
    for it,table in enumerate(chartData.findAll("table")):
        ths = table.findAll("th")
        trs = table.findAll("tr")
        attrs = table.attrs
        if debug:
            print(it,len(ths),len(trs),attrs)

        if attrs == {'cellpadding': '0', 'cellspacing': '0', 'borer': '0'}:
            if len(trs) == 1:
                tds = trs[0].findAll("td")
                if len(tds) == 3:
                    refs = tds[2].findAll("a")
                    if len(refs) == 2:
                        album  = refs[0].text
                        artist = refs[1].text
                        results.append({"Artist": artist, "Album": album})
                        if debug:
                            print(pos,'\t',artist,'\t',album)
                        pos += 1

    return results

In [None]:
for cid, cname in charts.items():
    print("{0} --> {1}".format(cid,cname))
    
    
    cidsavedir = join(basedir, "data", "top40")
    savename   = setFile(cidsavedir, "{0}-Dates.p".format(cname.replace("/", " ")))
    chartData  = getFile(savename)
    
    values = sorted(chartData.keys(), reverse=True)
    chartResults = {}
    for idts,value in enumerate(values):
        if value is None:
            continue
            
        savedir  = join(basedir, "data", "top40", cname.replace("/", " "))
        savename = setFile(savedir, "{0}.p".format(value))            
        data     = getHTML(savename)
        results  = getChartData(data)
        chartResults[value] = results
        if len(results) == 0:
            print('\t',cname,'\t\t',value,'\t',len(results)," <<-------")
        else:
            print('\t',cname,'\t\t',value,'\t',len(results))
            
    savedir = join(basedir, "results", "top40")
    savename = setFile(savedir, "{0}.p".format(cname.replace("/", " ")))
    saveJoblib(data=chartResults, filename=savename, compress=True)
    #savename = setFile("~/Dropbox/charts", "{0}.p".format(cname.replace("/", " ")))
    #saveJoblib(data=chartResults[cid], filename=savename, compress=True)

# Aggregate Charts

In [None]:
from searchUtils import findExt
savedir = join(basedir, "results", "top40")
files = findExt(savedir, ".p")
print("Found {0} files".format(len(files)))

In [None]:
from fileUtils import getBaseFilename
results = {}
for ifile in files:
    chartData = getFile(ifile)
    cname     = getBaseFilename(ifile)
    print("{0: <40}".format(cname),end="")
    for date, values in chartData.items():
        for i,item in enumerate(values):
            artist = item["Artist"]
            prevArtist = None
            if multiRenames.get(artist) is not None:
                prevArtist = artist
                artist = multiRenames[artist]
            if singleRenames.get(artist) is not None:
                idx    = str(singleRenames[artist])
                artist = artistIDToName[idx]
            if prevArtist is None:
                prevArtist = artist
            album  = item["Album"]
            if results.get(artist) is None:
                results[artist] = {"Songs": {}, "Albums": {}}
            if cname.endswith("Albums"):
                key = "Albums"
            else:
                key = "Songs"
            if results[artist][key].get(album) is None:
                results[artist][key][album] = {}
            if results[artist][key][album].get(cname) is None:
                results[artist][key][album][cname] = {}
            results[artist][key][album][cname][date] = i
    print(len(results))

In [None]:
saveFile(ifile="chartResults.p", idata=results, debug=True)

In [None]:
from collections import Counter
slimResults = Counter()
for artist, artistData in results.items():
    for key, keyData in artistData.items():
        for album, albumData in keyData.items():
            slimResults[artist] += sum({k: len(v) for k,v in albumData.items()}.values())

In [None]:
saveFile(ifile="chartCounter.p", idata=slimResults, debug=True)

In [None]:
for artist, artistData in results.items():
    print(artist)
    print("    Songs:")
    for album, albumData in artistData["Songs"].items():
        print('\t',album,'\t',len(albumData))
        for chart, chartData in albumData.items():
            pass
            #print('\t\t',chart,chartData)
    print("    Albums:")
    for album, albumData in artistData["Albums"].items():
        print('\t',album,'\t',len(albumData))
        for chart, chartData in albumData.items():
            pass
            #print('\t\t',chart,chartData)
    break

# Find Missing Artists

In [None]:
slimResults = getFile(ifile="chartCounter.p", debug=True)
fullResults = getFile(ifile="chartResults.p", debug=True)

singleRenames = getFile(ifile="singleRenames.p", debug=True)
multiRenames  = getFile(ifile="multiRenames.p", debug=True)
knownArtists  = getFile(ifile="artistMap.p", debug=True)

In [None]:
skips = {}

# Unknown Artists

In [None]:
## 5500
artistsToGet = {}
#for i, (artist, cnt) in enumerate(slimResults.most_common()):
for i, (artist, cnt) in enumerate(sorted(slimResults.items(), key=lambda pair: pair[1], reverse=True)):
    if i <= 14961:
        continue
    if i % 250 == 0:
        print("==>",i,len(slimResults))
    if multiRenames.get(artist) is not None:
        artist = multiRenames[artist]
    if singleRenames.get(artist) is not None:
        idx    = singleRenames[artist]
        artist = artistIDToName[idx]
    if knownArtists.get(artist) is not None:
        continue
    if skips.get(artist) is not None:
        continue

    matches = mulArts.getArtistNames(artist)
    if len(matches) > 1:
        continue

    mdata = getMusicData("DiscArtist", artist)
    if mdata is None:
        artistsToGet[artist] = cnt
        print("{0: <6}{1: <40}{2: <6}{3}".format(i,artist,cnt,len(artistsToGet)))
        if len(artistsToGet) > 200:
            break
    elif isinstance(mdata, DataFrame):
        if mdata.shape[0] >= 1:
            continue
        artistsToGet[artist] = cnt
        print("{0: <6}{1: <40}{2: <6}{3}".format(i,artist,cnt,len(artistsToGet)))
        if len(artistsToGet) > 200:
            break

In [None]:
from collections import Counter
cutoff = 0.7
skips = {}
possibleResults = {}
for i,(artist, cnt) in enumerate(artistsToGet.items()):
    print(i,"/",len(artistsToGet),'  \t',artist)
    artistAlbums = set(list(fullResults[artist]["Songs"].keys()) + list(fullResults[artist]["Albums"].keys()))
    results      = getBestArtistMatch(artist, artistAlbums, N=3, cutoff=cutoff)
    if results[2] is not None:
        if results[2] >= cutoff:
            print(i,"/",len(artistsToGet),'  \t',artist,results)
            possibleResults[artist] = list(results[:-1])
        else:
            skips[artist] = True
    else:
        skips[artist] = True

In [None]:
keep = {}

#mdata = getMusicData("DiscArtist", "Tyrese")
#keep["Tyrese"] = [mdata.index[0], mdata["Name"].values[0]]
#keep["PPK \r"] = [mdata.index[0], mdata["Name"].values[0]]

#keep['Sarah Brightman']=['59756', 'Sarah Brightman']

for artist,results in possibleResults.items():
    keep[artist] = results
#keep["Lil' Romeo"] = ['134987', "Lil' Romeo"]
keep

In [None]:
singleRenames = getFile(ifile="singleRenames.p", debug=True)
for k in keep.keys():
    v = keep[k]
    if isinstance(v, list):
        if len(v) == 2:
            singleRenames[k] = v[0]
            print(k,v[0])
saveFile(idata=singleRenames, ifile="singleRenames.p")

In [None]:
saveFile(idata=skips, ifile="toget11.p")

# Get Multi Results

In [None]:
from collections import Counter
cutoff = 0.8
multiMatchResult = {}
for i, (artist, cnt) in enumerate(slimResults.most_common()):
    if i > 0 and i % 1000 == 0:
        print("Passed",i,'/',len(slimResults))
    prevArtist = None
    if multiRenames.get(artist) is not None:
        prevArtist = artist
        artist = multiRenames[artist]
    if singleRenames.get(artist) is not None:
        idx    = str(singleRenames[artist])
        artist = artistIDToName[idx]
    if prevArtist is None:
        prevArtist = artist
    if knownArtists.get(prevArtist) is not None:
        continue
    mdata = getMusicData("DiscArtist", prevArtist)
    if isinstance(mdata, DataFrame):
        if mdata.shape[0] <= 1:
            continue
        matches = mdata["Name"].index
        artistAlbums = set(list(fullResults[prevArtist]["Songs"].keys()) + list(fullResults[prevArtist]["Albums"].keys()))
        results = getBestArtistIDMatch(artist, artistAlbums, matches, N=3, cutoff=cutoff)
        if results[2] is not None:
            if results[2] >= cutoff:
                print(i,"/",len(slimResults),'  \t',artist,results)
                multiMatchResult[artist] = list(results[:-1])

In [None]:
print(len(knownArtists))
knownArtists.update(multiMatchResult)
print(len(knownArtists))
saveFile(idata=knownArtists, ifile="artistMap.p", debug=True)

In [None]:
multiMatchResult[artist] = results

# Get Multi Artist Results

In [None]:
cutoff = 0.8
multiResult = {}
for i, (artist, cnt) in enumerate(slimResults.most_common()):
    if i > 0 and i % 1000 == 0 or i == 100:
        print("Passed",i,'/',len(slimResults))
    prevArtist = None
    if multiRenames.get(artist) is not None:
        prevArtist = artist
        artist = multiRenames[artist]
    if singleRenames.get(artist) is not None:
        idx    = str(singleRenames[artist])
        artist = artistIDToName[idx]
    if prevArtist is None:
        prevArtist = artist
    artistAlbums = set(list(fullResults[prevArtist]["Songs"].keys()) + list(fullResults[prevArtist]["Albums"].keys()))
    matches = mulArts.getArtistNames(artist)
    if len(matches) == 1:
        continue
    for match in matches:
        if singleRenames.get(match) is not None:
            idx    = str(singleRenames[match])
            #print('\t',match,' --> ',end="")
            match  = artistIDToName[idx]  
            #print(match)
        mdata = getMusicData("DiscArtist", match)
        if not isinstance(mdata, DataFrame):
            results = getBestArtistMatch(match, artistAlbums, N=3, cutoff=cutoff)
            if results[2] is not None:
                if results[2] >= cutoff:
                    print(i,"/",len(slimResults),'  \t',artist,results)
                    if multiResult.get(artist) is None:
                        multiResult[artist] = {"Results": results, "Match": []}
                    multiResult[artist]["Match"].append(match)

    if i > 5000:
        break

In [None]:
for k,v in multiResult.items():
    x = k.replace(v["Match"][0], v["Results"][1])
    print("multiRenames[\"{0}\"] = \"{1}\"".format(k, x))

In [None]:
multiRenames = getFile("multiRenames.p")
print(len(multiRenames))
multiRenames["Drake, Wizkid & Kyla"] = "Drake, WizKid & Kyla"
multiRenames["A. R. Rahman & Pussycat Dolls"] = "A.R. Rahman & Pussycat Dolls"
multiRenames["Calvin Harris, Pharell Williams, Katy Perry & Big Sean"] = "Calvin Harris, Pharrell Williams, Katy Perry & Big Sean"
multiRenames["Flo Rida, Sage The Gemini & Lookas"] = "Flo Rida, Sage The Gemini & LooKas"
multiRenames["DJ Snake & Lil Jon"] = "DJ Snake & Lil' Jon"
multiRenames["Rihanna & Jay-z"] = "Rihanna & Jay-Z"
multiRenames["Lil Jon & East Side Boyz"] = "Lil' Jon & East Side Boyz"
multiRenames["Remady & Manu-L"] = "DJ Remady & Manu-L"
multiRenames["Lil Jon & The East Side Boyz, Usher & Ludacris"] = "Lil' Jon & The East Side Boyz, Usher & Ludacris"
multiRenames["Jay Sean, Sean Paul & Lil Jon"] = "Jay Sean, Sean Paul & Lil' Jon"
multiRenames["Brandy & Ray Jay"] = "Brandy & Ray J"
multiRenames["KitschKrieg, Trettmann, Gringo, Ufo361 & Gzuz"] = "Kitschkrieg, Trettmann, Gringo, Ufo361 & Gzuz"
multiRenames["Mike WiLL Made-It, Miley Cyrus, Wiz Khalifa & Juicy J"] = "Mike WiLL Made It, Miley Cyrus, Wiz Khalifa & Juicy J"
multiRenames["DJ Broiler & Ravvel"] = "Broiler & Ravvel"
multiRenames["Bob Sinclar & Raffaella Carra"] = "Bob Sinclar & Raffaella Carrà"
multiRenames["Revolverheld & Marta Jandova"] = "Revolverheld & Marta Jandová"
multiRenames["Parra For Cuva & Anna Naklab"] = "Parra for Cuva & Anna Naklab"
multiRenames["Patti Labelle & Ron Isley"] = "Patti LaBelle & Ron Isley"
print(len(multiRenames))
saveFile(idata=multiRenames, ifile="multiRenames.p")

# Get Single Results

In [None]:
sorted(slimResults.items(), key=lambda pair: pair[1], reverse=False)
#for i, (artist, cnt) in enumerate(slimResults.most_common()):

In [None]:
cutoff = 0.8
singleResult = {}
#for i, (artist, cnt) in enumerate(slimResults.most_common()):
for i, (artist, cnt) in enumerate(sorted(slimResults.items(), key=lambda pair: pair[1], reverse=False)):
    if i <= 1842:
        continue
    if i > 0 and i % 100 == 0 or i == 100:
        print("Passed",i,'/',len(slimResults))
    if cnt < 1:
        continue
    if multiRenames.get(artist) is not None:
        prevArtist = artist
        artist = multiRenames[artist]
    if singleRenames.get(artist) is not None:
        idx    = str(singleRenames[artist])
        artist = artistIDToName[idx]
    if prevArtist is None:
        prevArtist = artist
    if knownArtists.get(prevArtist) is not None:
        continue
    artistAlbums = set(list(fullResults[artist]["Songs"].keys()) + list(fullResults[artist]["Albums"].keys()))
    match = mulArts.getArtistNames(artist                                                     )
    if len(match) == 1:
        mdata = getMusicData("DiscArtist", list(match.keys())[0])
        if not isinstance(mdata, DataFrame):
            results = getBestArtistMatch(artist, artistAlbums, N=3, cutoff=cutoff)
            if results[2] is not None:
                if results[2] >= cutoff:
                    print(i,"/",len(slimResults),'  \t',artist,'\t',results)
                    singleResult[artist] = results
                    if len(singleResult) > 20:
                        break

In [None]:
for k,v in singleResult.items():
    print("keep[\"{0}\"] = {1}  #{2}".format(k, v[0], v[1]))

In [None]:
singleResults = {k: v[0] for k,v in singleResult.items()}
print("Found {0} single results".format(len(singleResults)))
print("Found {0} all results".format(len(knownArtists)))
knownArtists.update(singleResults)
print("Found {0} all results".format(len(knownArtists)))

In [None]:
saveFile(idata=knownArtists, ifile="artistMap.p", debug=True)

## Manual Fixes

In [None]:
keep["Wizkid"] = 3292269  #WizKid
keep["A. R. Rahman"] = 4459  #A.R. Rahman
keep["Pharell Williams"] = 90037  #Pharrell Williams
keep["Lookas"] = 3619379  #LooKas
keep["Lil Jon"] = 120307  #Lil' Jon
keep["Jay-z"] = 21742  #Jay-Z

keep["Jean Michel Jarre"] = 209415  #Jean-Michel Jarre
keep["Puppini Sisters"] = 663589  #The Puppini Sisters
keep["Hadouken"] = 744317  #Hadouken!
keep["Peter Doherty"] = 219403  #Pete Doherty
keep["Dani Konig"] = 95685  #Dani König
keep["X-centric Sound System"] = 177063  #Ex-Centric Sound System
keep["And You Will Know Us By The T"] = 80393  #And You Will Know Us By The Trail Of Dead
keep["Serena Maneesh"] = 563504  #Serena-Maneesh
keep["Potatoheadz"] = 38359  #Potatoheads
keep["Black Angels"] = 428135  #The Black Angels
keep["La Fiancee"] = 1603916  #La Fiancée
keep["Knut Anders Sorum"] = 713971  #Knut Anders Sørum

keep["Magic Numbers"] = 245226  #The Magic Numbers
keep["Melissa M"] = 952871  #Mélissa M
keep["Weird Al Yankovic"] = 259422  #"Weird Al" Yankovic
keep["Star Academy 6"] = 395756  #Star Academy
keep["Dj Boozywoozy"] = 39640  #DJ BoozyWoozy
keep["Lauri Tahka"] = 1236855  #Lauri Tähkä
keep["Finn Brothers"] = 377361  #The Finn Brothers
keep["Red Jumpsuit Apparatus"] = 746639  #The Red Jumpsuit Apparatus
keep["Katharine Mcphee"] = 544699  #Katharine McPhee
keep["Massive Tone"] = 51650  #Massive Töne
keep["Soul'd Out"] = 152449  #Soul'd OUT
keep["Djames Braun"] = 3024392  #Djämes Braun
keep["K-otic"] = 99126  #K-Otic
keep["Dr. Kucho"] = 171326  #Dr. Kucho!
keep["Sivert Hoyem"] = 226487  #Sivert Høyem
keep["DJ's @ Work"] = 27887  #DJs @ Work
keep["T.m.revolution"] = 1367682  #T.M.Revolution
keep["Suburban Tribe"] = 294619  #Sub-Urban Tribe
keep["Peer Gunt"] = 475654  #Peer Günt
keep["Acda En De Munnik"] = 283015  #Acda en de Munnik
keep["Susanne Sundfor"] = 747088  #Susanne Sundfør
keep["Zen Cafe"] = 264334  #Zen Café
keep["Lutricia Mcneal"] = 10896  #Lutricia McNeal
keep["Soul Decision"] = 725913  #soulDecision
keep["MC Anitta"] = 2807393  #Anitta
keep["Antonio Jose"] = 1661452  #Antonio José
keep["Miguel Angel Silva"] = 2374710  #Miguel Ángel Silva
keep["Giant Leap"] = 55439  #1 Giant Leap
keep["Stephanie McIntosh"] = 597377  #Stephanie Mcintosh
keep["Beautiful South"] = 171427  #The Beautiful South
keep["Mars Volta"] = 96218  #The Mars Volta
keep["India Arie"] = 410260  #India.Arie
keep["Sheepdogs"] = 1980121  #The Sheepdogs
keep["Bjorn Rosenstrom"] = 352899  #Björn Rosenström
keep["Pretty Reckless"] = 1773405  #The Pretty Reckless
keep["Wallflowers"] = 257714  #The Wallflowers
keep["Booming People"] = 440378  #The Booming People
keep["Tania Mara"] = 1614077  #Tânia Mara
keep["Zacharius Carl Group"] = 329135  #Zacharius Carls Group
keep["Daniel Lindstrom"] = 1465965  #Daniel Lindström
keep["Pigeon Detectives"] = 481793  #The Pigeon Detectives
keep["Last Shadow Puppets"] = 1099843  #The Last Shadow Puppets
keep["Brian Mckight"] = 97515  #Brian McKnight
keep["Mrs. Greenbird"] = 3134860  #Mrs Greenbird
keep["Ophelie Winter"] = 581072  #Ophélie Winter
keep["Les Deesses"] = 2392811  #Les Déesses
keep["Xander De Buisonje"] = 451325  #Xander De Buisonjé
keep["Partysquad"] = 523070  #The Partysquad
keep["Jose Gonzalez"] = 190821  #José González
keep["Ok Go"] = 219647  #OK Go
keep["Ladi6"] = 283432  #Ladi 6
keep["Sergio Mendes"] = 27986  #Sérgio Mendes
keep["Dorothee"] = 397164  #Dorothée
keep["Suvi Terasniska"] = 1032538  #Suvi Teräsniska
keep["Anna Jarvinen"] = 937842  #Anna Järvinen
keep["Kinderen Voor Kinderen"] = 354362  #Kinderen voor Kinderen
keep["Rippingtons"] = 555275  #The Rippingtons
keep["Flaming Lips"] = 67156  #The Flaming Lips
keep["William Balde"] = 1744946  #William Baldé
keep["Justice Collective"] = 3071540  #The Justice Collective
keep["Ultra Nate"] = 8099  #Ultra Naté
keep["Robin S"] = 66727  #Robin S.
keep["Trentemoller"] = 117914  #Trentemøller
keep["Lene Alexandra"] = 917923  #Lene Alexandra Øien
keep["Ready Set"] = 2183109  #The Ready Set
keep["Charlatans"] = 49023  #The Charlatans
keep["Maximo Park"] = 384412  #Maxïmo Park
keep["Mokobe"] = 392235  #Mokobé
keep["Paps 'n' Skar"] = 41801  #Paps N Skar
keep["ScHoolboy Q"] = 2353545  #Schoolboy Q
keep["Nelja Ruusua"] = 308352  #Neljä Ruusua
keep["Afro-dite"] = 296038  #Afro-Dite
keep["Breaks Co-op"] = 61311  #Breaks Co-Op
keep["Laurent Wery"] = 1310886  #Laurent Wéry
keep["Proclaimers"] = 274034  #The Proclaimers
keep["Courteeners"] = 971940  #The Courteeners
keep["No Tone"] = 841468  #No-Tone
keep["Mo Things Family"] = 174410  #Mo Thugs Family
keep["Sammy Deluxe"] = 88398  #Samy Deluxe
keep["LaCrim"] = 2707062  #Lacrim
keep["Trancelucent"] = 55384  #TranceLucent
keep["Luis Represas"] = 1237858  #Luís Represas
keep["Tatsurou Yamashita"] = 119485  #Tatsuro Yamashita
keep["Ana Tijoux"] = 469819  #Anita Tijoux
keep["Axe Bahia"] = 427149  #Axé Bahia
keep["Elakelaiset"] = 264335  #Eläkeläiset
keep["Turmion Katilot"] = 339414  #Turmion Kätilöt
keep["Jean Claude Ades"] = 66552  #Jean-Claude Ades
keep["Hammerfall"] = 287459  #HammerFall
keep["Hear'say"] = 312508  #Hear'Say
keep["Jack Penate"] = 747248  #Jack Peñate
keep["Jeremy Chatelain"] = 1107704  #Jérémy Chatelain
keep["Alex Max Band"] = 413545  #Alex Band
keep["Falsa Alarma"] = 1278412  #Falsalarma
keep["G-spott"] = 11679  #G-Spott
keep["Ray LaMontagne"] = 502002  #Ray Lamontagne
keep["Chris De Burgh"] = 151304  #Chris de Burgh
keep["Verve Pipe"] = 169539  #The Verve Pipe
keep["Plusch"] = 372645  #Plüsch
keep["Compagnie Creole"] = 528609  #La Compagnie Créole
keep["MC Fioti"] = 5865714  #Mc Fioti
keep["Sie7te"] = 1771803  #Sie7e
keep["Bastard Sons Of Dioniso"] = 2634291  #The Bastard Sons Of Dioniso
keep["Baby Blue Sound Crew"] = 1094228  #Baby Blue Soundcrew
keep["Stone Roses"] = 7298  #The Stone Roses
keep["DJ Matvey Emerson"] = 2126113  #Matvey Emerson
keep["Gerald De Palmas"] = 533880  #Gérald de Palmas
keep["Oli. P"] = 296389  #Oli.P
keep["Andre Hazes Jr."] = 4298003  #André Hazes Jr.
keep["Motorhead"] = 233658  #Motörhead
keep["Grateful Dead"] = 246650  #The Grateful Dead
keep["Liset Alea"] = 138872  #Lissette Alea
keep["Jennie Lofgren"] = 996876  #Jennie Löfgren
keep["Cardigans"] = 39900  #The Cardigans
keep["Cast Of Rent"] = 2531774  #The Cast Of Rent
keep["Saw Doctors"] = 544713  #The Saw Doctors
keep["Robert Delong"] = 1121732  #Robert DeLong
keep["Huun Huur Tu"] = 109547  #Huun-Huur-Tu
keep["Jari Sillanpaa"] = 713952  #Jari Sillanpää
keep["Hellacopters"] = 261426  #The Hellacopters
keep["Dan Backman"] = 1106992  #Dan Bäckman
keep["Artists Stand Up To Cancer"] = 1350696  #Just Stand Up To Cancer
keep["Mr.President"] = 41707  #Mr. President
keep["Loreena Mckennitt"] = 213366  #Loreena McKennitt
keep["Lashun Pace"] = 824951  #LaShun Pace
keep["Beach Boys"] = 70829  #The Beach Boys
keep["Allstars"] = 1009547  #TV Allstars
keep["Raveonettes"] = 200321  #The Raveonettes
keep["Body Rox"] = 269697  #Bodyrox
keep["Lazytown"] = 729388  #LazyTown
keep["Marie Laforet"] = 462548  #Marie Laforêt
keep["Dj Lhasa"] = 196037  #DJ Lhasa
keep["Robert Cray Band"] = 292478  #The Robert Cray Band
keep["Eppu Normaal"] = 381585  #Eppu Normaali
keep["Dj Norman"] = 23243  #DJ Norman
keep["B-Yentl"] = 2747976  #BYentl
keep["Jessica Folker"] = 28518  #Jessica Folcker
keep["Brolle"] = 572997  #Brolle JR
keep["The Cheetah Girls"] = 633276  #Cheetah Girls
keep["Pipettes"] = 360206  #The Pipettes
keep["John Dahlback"] = 20805  #John Dahlbäck
keep["HollySiz"] = 2082991  #Hollysiz
keep["Cassia Eller"] = 1105685  #Cássia Eller
keep["Marios Fragoulis"] = 328042  #Mario Frangoulis
keep["I Panta Nei"] = 1138210  #Panta Rei
keep["Sober"] = 946450  #Sôber
keep["Jarjestyshairio"] = 1931984  #Järjestyshäiriö
keep["Swingfly"] = 70432  #Swing-Fly
keep["Decemberists"] = 264812  #The Decemberists
keep["Jean Jacques Goldman"] = 307094  #Jean-Jacques Goldman
keep["Star Academy 7"] = 395756  #Star Academy
keep["Dj Chuckie"] = 135575  #DJ Chuckie
keep["Didrik Solli-tangen"] = 1818627  #Didrik Solli-Tangen
keep["Dj Goldfinger"] = 802185  #DJ Goldfinger
keep["Overtones"] = 1443138  #The Overtones
keep["Vaccines"] = 2029033  #The Vaccines
keep["Roshelle"] = 2321646  #Rochelle
keep["La Habitacion Roja"] = 393825  #La Habitación Roja
keep["Peter Lemarc"] = 266999  #Peter LeMarc
keep["Goldie Lookin' Chain"] = 184984  #Goldie Lookin Chain
keep["Smashing Pumpkins"] = 28970  #The Smashing Pumpkins
keep["Mighty Mighty Bosstones"] = 275193  #The Mighty Mighty Bosstones
keep["The Notorious B.I.G."] = 65049  #Notorious B.I.G.
keep["Chante Moore"] = 455230  #Chanté Moore
keep["Josh Gracin"] = 655983  #Joshua Gracin
keep["Franck Michael"] = 478419  #Frank Michael
keep["The Go-betweens"] = 83077  #The Go-Betweens
keep["Shakespears Sister"] = 30318  #Shakespear's Sister
keep["Piero Pelu"] = 662395  #Piero Pelù
keep["Los Delinquentes"] = 674561  #Los Delinqüentes
keep["St. Germain"] = 74  #St Germain
keep["Zuri West"] = 188532  #Züri West
keep["Bustafunk"] = 16272  #Busta Funk
keep["Lea Castel"] = 1153522  #Léa Castel
keep["De Lillos"] = 261173  #deLillos
keep["Dj The Wave"] = 241825  #DJ The Wave
keep["The Academy Is"] = 425277  #The Academy Is...
keep["Dj S.P.U.D."] = 42088  #DJ S.P.U.D.
keep["Dj Rebel"] = 209672  #DJ Rebel
keep["Susana Felix"] = 1333026  #Susana Félix
keep["Wildhearts"] = 293211  #The Wildhearts
keep["Altern8"] = 12846  #Altern 8
keep["Loredana Berte"] = 366682  #Loredana Bertè
keep["Bun-B"] = 185582  #Bun B
keep["Academia Operacion Triunfo"] = 1375332  #Academia Operación Triunfo
keep["Barr Brothers"] = 2470090  #The Barr Brothers
keep["Soulvation"] = 53390  #Soulvation*
keep["Ritmo Dynamic"] = 123374  #Ritmo-Dynamic
keep["Bleeders"] = 483312  #The Bleeders
keep["Hearsay"] = 312508  #Hear'Say
keep["Dj Shadow"] = 4478  #DJ Shadow
keep["Cooper Temple Clause"] = 138071  #The Cooper Temple Clause
keep["Maccabees"] = 499923  #The Maccabees
keep["Jon B."] = 20389  #Jon B
keep["North American Halloween Prevention Initiative"] = 1039212  #North American Hallowe'en Prevention Initiative
keep["Cerena"] = 1894383  #Cérena
keep["Ze Pequeno"] = 4613736  #Ze Pequeño
keep["Les Muscles"] = 297430  #Les Musclés
keep["Teki Latex"] = 247176  #Tekilatex
keep["Tommy February6"] = 1283490  #Tommy february6
keep["Tommy Februaryo"] = 1283490  #Tommy february6
keep["Lovefreekz"] = 208128  #The Lovefreekz
keep["Upper Room"] = 487039  #The Upper Room
keep["Avett Brothers"] = 824244  #The Avett Brothers
keep["Legiao Urbana"] = 264082  #Legião Urbana
keep["Whitlams"] = 254486  #The Whitlams
keep["Jo Jo"] = 306427  #JoJo
keep["Fundacion Tony Manero"] = 194603  #Fundación Tony Manero
keep["Ali B."] = 234355  #Ali B
keep["De Toppers"] = 602723  #Toppers
keep["Motorhomes"] = 252568  #The Motorhomes
keep["Di Leva"] = 75662  #Di leva
keep["Timo Raisanen"] = 277925  #Timo Räisänen
keep["Sebastien Tellier"] = 2280  #Sébastien Tellier
keep["Albatraoz"] = 1183850  #Alcatraz
keep["Roisin Murphy"] = 455520  #Róisín Murphy
keep["Ha Rule"] = 51369  #Ja Rule
keep["Pepper's Ghost"] = 2364386  #Peppers Ghost
keep["Waterboys"] = 125174  #The Waterboys
keep["Georges-alain Jones"] = 1940009  #Georges-Alain Jones
keep["Neg'marrons"] = 219331  #Neg'Marrons
keep["Kaleidoscopio"] = 166678  #Kaleidoscópio
keep["alt-J"] = 2830806  #Alt-J
keep["Tommy February"] = 1283490  #Tommy february6
keep["Disco Boys"] = 196758  #The Disco Boys
keep["Natalia Jimenez"] = 2130134  #Natalia Jiménez
keep["MadMan"] = 2668959  #Madman
keep["Aitana Ocana"] = 6371628  #Aitana Ocaña
keep["Polyphonic Spree"] = 79023  #The Polyphonic Spree
keep["John Mellencamp"] = 237890  #John Cougar Mellencamp
keep["White Tie Affair"] = 1943163  #The White Tie Affair
keep["Bon Garcon"] = 458392  #Bon Garçon
keep["Fu-tourist"] = 34401  #Fu-Tourist
keep["Kapteeni A-ni"] = 85882  #Kapteeni Ä-ni
keep["Ron Van Den Beuken"] = 90120  #Ron van den Beuken
keep["For My Pain"] = 619891  #For My Pain...
keep["Jack McManus"] = 808532  #Jack Mcmanus
keep["Radio Dept."] = 238640  #The Radio Dept.
keep["Elin Sigvardsson"] = 1063935  #Elin Ruth Sigvardsson
keep["Dj Felli Fel"] = 227465  #Felli Fel
keep["New Deal"] = 44720  #The New Deal
keep["Camera Cafe"] = 1645853  #Caméra Café
keep["Ben L`Ocle Soul"] = 1689692  #Ben L'Oncle Soul
keep["Veronique Sanson"] = 394617  #Véronique Sanson
keep["Dj Schwede"] = 41815  #DJ Schwede
keep["Beam Vs. Cyrus"] = 19442  #Beam vs. Cyrus
keep["Dj Tocadisco"] = 424389  #Tocadisco
keep["De Vrienden Van Meneer Konijn"] = 4591921  #Vrienden Van Meneer Konijn
keep["4 Taste"] = 325990  #Taste
keep["Shaka Labbits"] = 1829370  #Shakalabbits
keep["Elio E Le Storie Tes"] = 191166  #Elio E Le Storie Tese
keep["Paco De Lucia"] = 20184  #Paco De Lucía
keep["Format B"] = 263420  #Format: B
keep["Oceanlab"] = 427638  #OceanLab
keep["Motley Crue"] = 94068  #Mötley Crüe
keep["Janelle Monae"] = 445868  #Janelle Monáe
keep["Civil Wars"] = 2001094  #The Civil Wars
keep["Moneybagg Yo"] = 5197378  #MoneyBagg Yo
keep["Caleidoscopio"] = 166678  #Kaleidoscópio
keep["Zeljko Joksimovic"] = 473482  #Željko Joksimović
keep["Mousee T"] = 8803  #Mousse T.
keep["Polo Montanes"] = 483555  #Polo Montañez
keep["Annette Artani"] = 1466811  #Annet Artani
keep["Payo Malo"] = 613450  #El Payo Malo
keep["To Die For"] = 239041  #To-Die-For
keep["Dj Jurgen"] = 17260  #DJ Jurgen
keep["Modesha"] = 128255  #Nodesha
keep["Latin Kings"] = 158482  #The Latin Kings
keep["Dead Weather"] = 1391789  #The Dead Weather
keep["High School Musical Cast"] = 673607  #The High School Musical Cast
keep["Dj Mark Farina"] = 4710  #Mark Farina
keep["Sandrine Francois"] = 1446111  #Sandrine François
keep["Gregori Baquet"] = 568948  #Grégori Baquet
keep["Superheavy"] = 2418975  #SuperHeavy
keep["Deborah De Corral"] = 2035021  #Deborah de Corral
keep["Brilliant Green"] = 296909  #The Brilliant Green
keep["Laith Al-deen"] = 341824  #Laith Al-Deen
keep["Audiobullys"] = 50016  #Audio Bullys
keep["Mr Redz"] = 21964  #Mr. Reds
keep["Eye Opener"] = 138167  #Eyeopener
keep["Kraftklub"] = 2028458  #KraftKlub
keep["Human League"] = 10383  #The Human League
keep["Lost Brothers"] = 158835  #The Lost Brothers
keep["Young Knives"] = 470191  #The Young Knives
keep["Get Cape Wear Cape Fly"] = 747070  #Get Cape. Wear Cape. Fly
keep["Starting Line"] = 355465  #The Starting Line
keep["The-dream"] = 1008036  #The-Dream
keep["Piano Guys"] = 3507956  #The Piano Guys
keep["Beyonce"] = 52835  #Beyoncé
keep["Amity Affliction"] = 2446213  #The Amity Affliction
keep["A$AP Mob"] = 2898554  #ASAP Mob
keep["Kelly K"] = 970192  #Kelly Key
keep["Blizzard Brothers Inc"] = 41806  #Blizzard Brothers
keep["Jade Macrae"] = 197656  #Jade MacRae
keep["Herman Dune"] = 264602  #Herman Düne
keep["Alex  Britti"] = 653853  #Alex Britti
keep["Alfred Garcia"] = 6371630  #Alfred García
keep["Rena Dif"] = 233790  #René Dif
keep["Flaming Sideburns"] = 491667  #The Flaming Sideburns
keep["JS-16"] = 35833  #JS16
keep["Lisa Left Eye Lopes"] = 110359  #Lisa "Left Eye" Lopes
keep["Rosemary' Sons"] = 285835  #Rosemary's Sons
keep["K-klass"] = 36624  #K-Klass
keep["Drumatic Twins"] = 14784  #Drumattic Twins
keep["Crashdiet"] = 825015  #Crashdïet
keep["Anne-Lie Ryde"] = 264170  #Anne-Lie Rydé
keep["Alex Swing Oskars Sings!"] = 1442099  #Alex Swings Oscar Sings!
keep["Adolphson Falk"] = 119104  #Adolphson-Falk
keep["Run DMC"] = 219213  #Run-DMC
keep["Pj Harvey"] = 36052  #PJ Harvey
keep["Journey South"] = 325103  #Joe South
keep["Lil Scrappy"] = 239305  #Lil' Scrappy
keep["Breeders"] = 39778  #The Breeders
keep["Huis Anubis"] = 2797437  #Het Huis Anubis
keep["Avalanches"] = 9130  #The Avalanches
keep["Dj F.E.X."] = 43804  #DJ F.E.X
keep["Kc Da Rookee"] = 220984  #KC Da Rookee
keep["N Trance"] = 11001  #N-Trance
keep["Trooper Da Doon"] = 66144  #Trooper Da Don
keep["Divine Comedy"] = 27933  #The Divine Comedy
keep["Blumchen"] = 20156  #Blümchen
keep["Dt8 Project"] = 34061  #DT8 Project
keep["Dead 60s"] = 260915  #The Dead 60s
keep["Duke Spirit"] = 275448  #The Duke Spirit
keep["Airborne Toxic Event"] = 1311029  #The Airborne Toxic Event
keep["Big Pink"] = 1337426  #The Big Pink
keep["Boyz In Da Hood"] = 336836  #Boyz N Da Hood
keep["Devil Wears Prada"] = 989173  #The Devil Wears Prada
keep["Sixx: A.M."] = 895454  #Sixx:A.M.
keep["Dirty Heads"] = 2089008  #The Dirty Heads
keep["Mac DeMarco"] = 2568722  #Mac Demarco
keep["Mauricio Manieri"] = 1563247  #Maurício Manieri
keep["Butterfly Effect"] = 441749  #The Butterfly Effect
keep["L'aura"] = 880722  #L'Aura
keep["Fabula"] = 210657  #Jabula
keep["Dj Joe K"] = 225215  #DJ Joe K.
keep["Rasmus Nohr"] = 396235  #Rasmus Nøhr
keep["Valkyrians"] = 538336  #The Valkyrians
keep["Dir En Grey"] = 348163  #Dir en grey
keep["Rock'a'trench"] = 2981100  #Rock'A'Trench
keep["Wolfe Tones"] = 1219696  #The Wolfe Tones
keep["Andre Van Duin"] = 367055  #André van Duin
keep["M Hederos M Hellberg"] = 474515  #Hederos & Hellberg
keep["Christer Sjogren"] = 656866  #Christer Sjögren
keep["Feelstyle"] = 266896  #Tha Feelstyle
keep["Beta Band"] = 3847  #The Beta Band
keep["Angelis"] = 251965  #Angelfish
keep["Dr Hook"] = 206325  #Dr. Hook
keep["Royal Scots Dragoon Guards"] = 595755  #The Royal Scots Dragoon Guards
keep["Hold Steady"] = 491197  #The Hold Steady
keep["SheDaisy"] = 1479264  #Shedaisy
keep["Gregoire"] = 1407221  #Grégoire
keep["Tragically Hip"] = 267249  #The Tragically Hip
keep["Dj Molella"] = 36544  #Molella
keep["Facteur X"] = 334431  #Factor X
keep["Che Nelle"] = 1055693  #Che'Nelle
keep["Jerome Echenoz"] = 695381  #Jérôme Echenoz
keep["Svein Ostvik"] = 3781616  #Svein Østvik
keep["Dj Taylor"] = 16567  #DJ Taylor
keep["Wiener Sangerknaben"] = 533488  #Die Wiener Sängerknaben
keep["Zoe Straub"] = 4767549  #Zoë Straub
keep["David Latour "] = 1189620  #David Latour
keep["Finger Tips"] = 1663670  #Fingertips
keep["Kelly Family"] = 319276  #The Kelly Family
keep["Jaime Cullum"] = 194401  #Jamie Cullum
keep["Queensryche"] = 255363  #Queensrÿche
keep["Black Crowes"] = 262691  #The Black Crowes
keep["David Crowder Band"] = 413886  #David Crowder*Band
keep["Gaslight Anthem"] = 1167086  #The Gaslight Anthem
keep["BrockHampton"] = 4581123  #Brockhampton
keep["Getaway Plan"] = 1230298  #The Getaway Plan
keep["Ener G"] = 1660398  #Ener.G
keep["C- Bool"] = 251008  #C-Bool
keep["Dublex INC."] = 3762  #Dublex Inc.
keep["R.E.G. Project"] = 164534  #The R.E.G. Project
keep["Niccolo Fabi"] = 329588  #Niccolò Fabi
keep["Jakkata"] = 3442  #Jakatta
keep["Rald Schmitz"] = 1404551  #Ralf Schmitz
keep["Bikstok Rogsystem"] = 272410  #Bikstok Røgsystem
keep["Sinead O'Connor"] = 42895  #Sinéad O'Connor
keep["Dj Slow"] = 55897  #DJ Slow
keep["Buck-tick"] = 69223  #Buck-Tick
keep["Mahala Rai Banda"] = 1208659  #Mahala Raï Banda
keep["SunStroke Project"] = 1818637  #Sunstroke Project
keep["Gary Clark Jr"] = 2699369  #Gary Clark Jr.
keep["Royal Guardsmen"] = 290319  #The Royal Guardsmen
keep["Temptations"] = 28332  #The Temptations
keep["Searchers"] = 277352  #The Searchers
keep["New Seekers"] = 93841  #The New Seekers
keep["Counting Crow"] = 262643  #Counting Crows
keep["Louise Attague"] = 374074  #Louise Attaque
keep["Osborne Brothers"] = 1088676  #The Osborne Brothers
keep["Roman Flugel"] = 13111  #Roman Flügel
keep["Rubettes"] = 240902  #The Rubettes
keep["Bart Kaell"] = 446710  #Bart Kaëll
keep["Slongs Dievanongs "] = 4435213  #Slongs Dievanongs
keep["Ez Special"] = 148110  #EZ Special
keep["Reelists"] = 154054  #The Reelists
keep["King Gidra"] = 385694  #King Giddra
keep["Zoe Birkett"] = 320018  #Zoë Birkett
keep["D!-Nation"] = 1599356  #D!Nation
keep["Von Bondies"] = 313529  #The Von Bondies
keep["Weather Girls"] = 80134  #The Weather Girls
keep["Levellers"] = 23672  #The Levellers
keep["Mj Cole"] = 1515  #MJ Cole
keep["Paddingtons"] = 337981  #The Paddingtons
keep["Long Blondes"] = 245214  #The Long Blondes
keep["Sunshine Underground"] = 361710  #The Sunshine Underground
keep["Unklejam"] = 774556  #UnkleJam
keep["Dave Clark Five"] = 329252  #The Dave Clark Five
keep["Boney M"] = 235979  #Boney M.
keep["Trustcompany"] = 279197  #Trust Company
keep["K`Jon"] = 211210  #K'Jon
keep["New Pornographers"] = 363443  #The New Pornographers
keep["Grace Vanderwaal"] = 5436370  #Grace VanderWaal
keep["Lil Peep"] = 5378070  #Lil' Peep
keep["Superjesus"] = 273378  #The Superjesus
keep["Dissociatives"] = 233572  #The Dissociatives
keep["Sleepy Jackson"] = 138068  #The Sleepy Jackson
keep["McClymonts"] = 2376183  #The McClymonts
keep["Jezabels"] = 1704414  #The Jezabels
keep["Raining Pleausure"] = 637513  #Raining Pleasure
keep["Loredana Berte'"] = 366682  #Loredana Bertè
keep["Claduio Baglioni"] = 638411  #Claudio Baglioni
keep["Papa Levante"] = 388519  #Papá Levante
keep["Dj Marta"] = 198294  #DJ Marta
keep["Mendonca Do Rio"] = 783996  #Mendonça Do Rio
keep["Epila"] = 1608899  #Epilä
keep["Man-Eating Tree"] = 2289900  #The Man-Eating Tree
keep["Jon Norgaard"] = 845262  #Jon Nørgaard
keep["Black League"] = 404409  #The Black League
keep["Pate Mustajarvi"] = 477874  #Pate Mustajärvi
keep["Brand New Heavies"] = 1396  #The Brand New Heavies
keep["Erik E"] = 9647  #Erick E
keep["Dj Mobster"] = 943953  #DJ Mobster
keep["Frank Ti-aya"] = 583397  #Frank Ti-Aya
keep["Osten Med Resten"] = 704765  #Östen Med Resten
keep["Ape"] = 404433  #Apse
keep["Tough Alliance"] = 305005  #The Tough Alliance
keep["Kristet Utseende"] = 360021  #The Kristet Utseende
keep["Kalomoira"] = 2270989  #Kalomira

In [None]:
keep["Ms Dynamite"] = 24059  #Ms. Dynamite
keep["Rene Froger"] = 283021  #René Froger
keep["VanVelzen"] = 702319  #Vanvelzen
keep["Fratellis"] = 472288  #The Fratellis
keep["Andre Hazes"] = 282287  #André Hazes
keep["Kat-tun"] = 2884266  #Kat-Tun
keep["Jean Roch"] = 1368909  #Jean-Roch
keep["TikTak"] = 383787  #Tiktak
keep["L.E.J."] = 4777768  #L.E.J
keep["Pablo Lopez"] = 3465939  #Pablo López
keep["Agnetha Faltskog"] = 149038  #Agnetha Fältskog
keep["Kapten Rod"] = 1104466  #Kapten Röd
keep["BBmak"] = 231074  #BBMak
keep["Dj Jose"] = 53783  #DJ Jose
keep["Gigi D'alessio"] = 839027  #Gigi D'Alessio
keep["Terasbetoni"] = 333752  #Teräsbetoni
keep["Kim-lian"] = 323824  #Kim-Lian
keep["Jose Feliciano"] = 465889  #José Feliciano
keep["Asteroids Galaxy Tour"] = 865897  #The Asteroids Galaxy Tour
keep["Strumbellas"] = 3705444  #The Strumbellas
keep["Pep`s"] = 701116  #Pep's
keep["Gue Pequeno"] = 1216738  #Guè Pequeno
keep["Dead By April"] = 1477009  #Dead by April
keep["Luca Hanni"] = 2779963  #Luca Hänni
keep["Noir Desir"] = 85007  #Noir Désir
keep["Jean Pascal"] = 614758  #Jean-Pascal
keep["Herbert Groenemeyer"] = 163850  #Herbert Grönemeyer
keep["Nina Pastori"] = 925146  #Niña Pastori
keep["Saddle Club"] = 2956368  #The Saddle Club
keep["Pariisin Kevat"] = 1984505  #Pariisin Kevät
keep["Barbara Streisand"] = 53248  #Barbra Streisand
keep["Bohse Onkelz"] = 262898  #Böhse Onkelz
keep["Manolo Garcia"] = 705504  #Manolo García
keep["Ordinary Boys"] = 366147  #The Ordinary Boys
keep["Dj Tatana"] = 13864  #DJ Tatana
keep["Peter Joback"] = 343362  #Peter Jöback
keep["Loic Nottet"] = 4355587  #Loïc Nottet
keep["Sander Van Doorn"] = 183267  #Sander van Doorn
keep["Youngbloodz"] = 209479  #YoungBloodZ
keep["A$AP Ferg"] = 2503395  #ASAP Ferg
keep["Benassi Bros"] = 171187  #Benassi Bros.
keep["Watermat"] = 3856896  #Watermät
keep["Asian Kung-fu Generation"] = 289562  #Asian Kung-Fu Generation
keep["Hi-Tack"] = 364150  #Hi_Tack
keep["Hubert Von Goisern"] = 446821  #Hubert von Goisern
keep["Lee Dewyze"] = 2016926  #Lee DeWyze
keep["Sofia Essaidi"] = 1400771  #Sofia Essaïdi
keep["Elodie Frege"] = 958245  #Elodie Frégé
keep["Jurgen Drews"] = 109458  #Jürgen Drews
keep["Jorge Vercilo"] = 1609339  #Jorge Vercillo
keep["Futureheads"] = 132795  #The Futureheads
keep["Jim Stark"] = 273036  #Jim Stärk
keep["Andre Sardet"] = 1324640  #André Sardet
keep["Mago De Oz"] = 691599  #Mägo De Oz
keep["Kapasiteettiyksikko"] = 264342  #Kapasiteettiyksikkö

In [None]:
keys = ["Click Five", "Gospellers", "Temper Trap", "Cranberries", "Matthew Good Band", "Esmee Denters", "David Demaria",
        "Hoosiers", "L'Arc-en-Ciel", "Dj Aligator Project", "Diana Degarmo", "Adelen", "Madden Brothers", "Grupo Revelacao",
        "Brolle Jr.", "Beatfreakz", "Star Academy 2"]
for key in keys:
    keep[key] = singleResult[key][0]

In [None]:
keys = ["Potbelleez", "Los Autenticos Decadentes", "Gunther", "Star Academy 5", "Pedro Capo", "Dandy Warhols",
        "Postal Service", "Tea Party", "B 3", "Dj Tomekk", "M-kids", "Waldo`s People", "Lil Kleine", "Laura Narhi", "TopGunn", 
        "Joaquin Sabina", "Raconteurs", "Monica Naranjo", "Patrick Sebastien", "Molly Sanden", "Miguel Bose"]
for key in keys:
    keep[key] = singleResult[key][0]

In [None]:
keys = ['Petri Nygard', 'Blackbear', 'Den Svenska Bjornstammen', 'Baseballs', 'Sandra Van Nieuwland', 'Ian Carey Project',
        "BossHoss", "Max Gazze", "MoTrip", "Les Enfoires", "Lil Bow Wow", "Frero Delavega", "Kumi Kouda", "Bjork", 
        "RedFoo", "Star Academy 4", "John Butler Trio", "Gregory Lemarchal"]
for key in keys:
    keep[key] = singleResult[key][0]

In [None]:
keys = ['AronChupa', 'Sexion D`Assaut', 'Die Arzte', "Bro'sis", "Maneskin", "Royksopp", "Opposites", "OpShop", 
        "Jennifer Pena", "Paul Van Dyk", "La 5ta Estacion", "Supermen Lovers", "Keshia Chante", "Special D",
        "Kinki Kids", "Neighbourhood", "No Te Va A Gustar", "Ll Cool J", "Jeroen Van Der Boom", "Star Academy 3",
        "Miguel Angel Munoz", "Tito El Bambino", "Andres Calamaro", "A$AP Rocky", "Boogie Pimps"]
for key in keys:
    keep[key] = singleResult[key][0]

In [None]:
keys = ['DJ Otzi', 'DJ Bobo', 'R.I.O.', 'Chimene Badi', 'Underdog Project', 'Thalia', 'Mr Children', 'Lonely Island', 'Tragedie',
        'Burhan G', 'Mans Zelmerlow', 'Christophe Mae', 'Marco Antonio Solis', 'Sohne Mannheims', 'US5', 'Lars Winnerback',
        'Alejandro Fernandez', 'Mis-teeq', 'Gestort Aber Geil', 'Herbert Gronemeyer', "Shy'M", "Hakan Hellstrom",
        "Keen`V", "Ting Tings", "Frankie J", "K 3", "Common Linnets", "Coeur De Pirate", "Julien Dore", 'Dani Martin', 'Di-rect']
for key in keys:
    keep[key] = singleResult[key][0]

In [None]:
keys = ['T.A.T.U.', 'Outkast', 'Mr Probz', 'Michael Buble', 'B.o.B', 'Michel Telo', 'Pablo Alboran', 'O-zone', 'Amy Macdonald',
        'Lumineers', 'DJ Tiesto', 'Chainsmokers','MadCon', 'Mylene Farmer', 'XXXTentacion', 'Dj Sammy', 'Sophie Ellis Bextor',
        'Veronicas', 'In Grid', 'Chemical Brothers', 'K-Maro', 'Collectif Metisse', 'Matt Pokora', 'White Stripes', 'Bebe Lilly',
        'Calling', 'Saturdays', 'Christina Sturmer', 'All-American Rejects', 'Dan Balan', 'J Balvin', 'Elena Paparizou', 
        'Nadiya', 'Blink 182', 'Rene La Taupe', 'DonkeyBoy', 'Rolling Stones', 'Sigur Ros', 'Israel Kamakawiwoole', 'Armin Van Buuren']
for key in keys:
    keep[key] = singleResult[key][0]

In [None]:
print(keep)
saveFile(idata=keep, ifile="known.p", debug=True)

In [None]:
keep = getFile("known.p")
saveFile(idata=keep, ifile="singleRenames.p")
print(keep)

In [None]:
multiGet  = {}
singleGet = {}
matchGet  = {}


for i, (artist, cnt) in enumerate(slimResults.most_common()):    
    artistAlbums = set(list(fullResults[artist]["Songs"].keys()) + list(fullResults[artist]["Albums"].keys()))
    matches = mulArts.getArtistNames(artist)
    for match in matches.keys():
        if multiGet.get(match) is not None:
            continue
        if singleGet.get(match) is not None:
            continue
        if matchGet.get(match) is not None:
            continue
        mdata = getMusicData("DiscArtist", match)
        if not isinstance(mdata, DataFrame):
            if len(matches) == 1:
                singleGet[match] = artistAlbums
                print("{0: <20}{1: <30}{2: <30}{3}\tSingle".format("{0} / {1}".format(i,len(slimResults)), match, artist, cnt))
            else:
                multiGet[match] = artistAlbums
                print("{0: <20}{1: <30}{2: <30}{3}\tMulti".format("{0} / {1}".format(i,len(slimResults)), match, artist, cnt))
        else:
            if mdata.shape[0] == 1:
                continue
            else:
                matchGet[match] = artistAlbums
                print("{0: <20}{1: <30}{2: <30}{3}\tMatch".format("{0} / {1}".format(i,len(slimResults)), match, artist, cnt))
    if i > 1000:
        break

In [None]:
saveFile(idata=multiGet, ifile="multiToGet.p")
saveFile(idata=singleGet, ifile="singleGet.p")
saveFile(idata=matchGet, ifile="matchGet.p")

In [None]:
for artistName, artistResults in multiGet.items():
    results = getBestArtistMatch(artistName, artistResults, N=10, cutoff=0.6)
    print(artistName,'\t',results)

In [None]:
%load_ext autoreload
%autoreload

from artists import artists
disc = discogs()
arts = artists(disc)
for artistName in singleGet.keys():
    print("===========>",artistName)
    arts.searchDiscogForArtist(artistName)

In [None]:
from collections import Counter
x = Counter({'a': 0, 'b': 0.5, 'c': 0.75})
x.most_common(1)

In [None]:
getBestArtistMatch("Beyonce", {'Crazy In Love', 'Deja Vu', 'Drunk In Love'})

In [None]:
ratVal = 0.6
if len(myMusicNameIDMap) > 0:
    ratVal = 0.3
artistIDMap = {}
for artistName,artistSlimData in multiMap.items():
    print("\n","="*50)
    print("ArtistName: {0}".format(artistName))
    print("   Albums: {0}".format(len(artistSlimData["Albums"])))

    myAlbumNames = []
    for album in artistSlimData["Albums"]:
        myAlbumName = album.split("/{0}/".format(artistName))[-1]
        #print("     {0: <15}{1: <10}{2}".format("", "", myAlbumName))
        myAlbumNames.append(myAlbumName)
        
    for idx,row in artistSlimData["DB"].iterrows():
        artistAlbumsData = getRowByIndex(artistAlbumsDB, idx)
        artistAlbums     = artistAlbumsData["Albums"]
        print("   Match: {0}  [{1}]".format(row["DiscArtist"], row["Name"]))
        if isinstance(artistAlbums, dict):
            for albumType, albumTypeData in artistAlbums.items():
                for albumID, dbAlbumName in albumTypeData.items():
                    for myAlbumName in myAlbumNames:
                        s = SequenceMatcher(None, myAlbumName, dbAlbumName)
                        ratio = s.ratio()
                        if ratio > 0.6:
                            print("     {0: <15}{1: <10}{2: <8}{3}".format(albumType, albumID, round(ratio,2), dbAlbumName))
                            artistIDMap[artistName] = [idx, row["Name"]]

In [None]:
daytype = {"USA Albums": ["SAT", "%Y-%m-%d"],
           "USA Singles Top 40": ["SAT", "%Y-%m-%d"]}
daytype = {"UK Singles Top 40": ["SAT", "%Y-%m-%d"]}
daytype = {"Top40-Charts.com Web Top 100": ["SAT", "%Y-%m-%d"]}
daytype = {"Airplay World Official Top 100": ["SAT", "%Y-%m-%d"]}

#daytype = {"USA Singles Top 40": ["SAT", "%Y-%m-%d"]}

from pandas import Timestamp
def getDates(daytype, cname):
    dates = []
    if cname == "USA Singles Top 40":
        dates1 = date_range(start='1997-07-07', end="2000-04-01", freq="W-MON").strftime("%Y-%m-%d")
        dates2 = date_range(start="2000-04-01", end=Timestamp.today(), freq="W-SAT").strftime("%Y-%m-%d")
        dates = list(dates1) + list(dates2)
    if cname == "UK Singles Top 40":
        dates1 = date_range(start="2002-12-08", end='2011-11-12', freq="W-SUN").strftime("%Y-%m-%d")
        dates2 = date_range(start="2011-11-12", end=Timestamp.today(), freq="W-SAT").strftime("%Y-%m-%d")
        dates = list(dates1) + list(dates2)        
    if cname == "Top40-Charts.com Web Top 100":
        dates1 = date_range(start="2002-10-14", end='2005-12-26', freq="W-MON").strftime("%Y-%m-%d")
        dates2 = date_range(start="2009-02-28", end=Timestamp.today(), freq="W-SAT").strftime("%Y-%m-%d")
        dates = list(dates1) + list(dates2)
    if cname == "Airplay World Official Top 100":
        dates1 = date_range(start="2002-01-07", end='2002-01-21', freq="W-MON").strftime("%Y-%m-%d")
        dates2 = date_range(start="2002-02-02", end=Timestamp.today(), freq="W-SAT").strftime("%Y-%m-%d")
        dates = list(dates1) + list(dates2)
    return dates

In [None]:
def showDates(year, day):
    return date_range(start=str(year), end=str(int(year)+1), freq='W-{0}'.format(day))
    
showDates(2011, 'SUN')

In [None]:
from time import sleep
for cid,cname in charts.items():
    if daytype.get(cname) is None:
        continue
    dates = getDates(daytype, cname)
    
    ##savedir  = join(basedir, "data", "top40", "starters")
    ##savename = join(savedir, "{0}.p".format(cname.replace("/", " ")))
    ##starter  = getHTML(savename)
    #dates    = getDates(starter)
    
    for idts,datename in enumerate(dates):
        
        if idts > 2000:
            break
        url      = "https://top40-charts.com/chart.php?cid={0}&date={1}".format(cid, datename)
        user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
        headers={'User-Agent':user_agent,}
    
        savedir  = join(basedir, "data", "top40", cname.replace("/", " "))
        mkDir(savedir)

        savename = setFile(savedir, "{0}.p".format(datename))
        if isFile(savename):
            continue

        request=urllib.request.Request(url,None,headers) #The assembled request
        response = urllib.request.urlopen(request)
        data = response.read() # The data u need

        print("URL ---> {0}".format(url))
        print(idts,'/',len(dates),"\tSaving {0}".format(savename))
        saveJoblib(data=data, filename=savename, compress=True)
        sleep(3)
        

#list(all_sundays(2001))

In [None]:
from time import sleep
for cid,cname in charts.items():
    if daytype.get(cname) is None:
        continue    
    dates = getDates(daytype, cname)
    
    for idts,datename in enumerate(dates):
        savedir  = join(basedir, "data", "top40", cname.replace("/", " "))
        savename = setFile(savedir, "{0}.p".format(datename))

        chartData = getHTML(savename)
        results = getChartData(chartData, debug=False)
        if len(results) == 0:
            print(cname,'\t\t',datename,'\t',len(results)," <<-------")
        else:
            print(cname,'\t\t',datename,'\t',len(results))

# Process Chart Data

In [None]:
savedir  = join(basedir, "data", "top40")
dirnames = [x for x in findDirs(savedir) if x.endswith("starters") == False]
for dirname in dirnames:
    files = findExt(dirname, ext="*")
    for ifile in files:
        src = ifile
        dst = "{0}.p".format(ifile)
        moveFile(src, dst)


In [None]:
savedir  = join(basedir, "data", "top40")
dirnames = [x for x in findDirs(savedir) if x.endswith("starters") == False]
for dirname in dirnames:
    files = findExt(dirname, ext="*.p")
    for ifile in files:
        chartData = getHTML(ifile)
        break

In [None]:
chartData

In [None]:
chartData = getHTML("/Users/tgadfort/Documents/code/charts/data/top40/test.html")

In [None]:
def getChartData(chartData):
    artists = []
    titles = []
    
    trs = chartData.findAll("tr", {"class": "latc_song"})    
    for xs in [x.findAll("a", {"title": "View song details"}) for x in trs]:
        titles.append([x.text for x in xs if x.find("img") == None][0])

    for xs in [x.findAll("a", {"style": "text-decoration: none; "}) for x in trs]:
        artists.append(xs[0].text)
    
    retval = dict(zip(artists, titles))
    return retval

In [None]:

len(chartData.findAll('table'))

In [None]:
from searchUtils import findExt

In [None]:
files = findExt("data/top40/World Singles Official Top 100/", ext=".p")

In [None]:
for ifile in files[:5]:
    print(ifile)
    chartData = getHTML(ifile)
    
    for it,x in enumerate(chartData.findAll("table")):
        trs = x.findAll("tr", {"class": "latc_song"})
        for itr,tr in enumerate(trs):
            tds = tr.findAll("td")
            for itd,td in enumerate(tds):
                hrefs = td.findAll("a")
                for ihref,href in enumerate(hrefs):
                    print(it,'\t',itr,'\t',itd,'\t',ihref,'\t',href.text)

In [None]:
for it,x in enumerate(chartData.findAll("table")):
    print(it,len(x))

In [None]:
chartData.findAll("table")[8]

In [None]:
files = findExt("data/top40/World Singles Official Top 100/", ext=".p")

In [None]:
files[0]

In [None]:
getHTML(files[0])

In [None]:
def all_sundays(year):
# January 1st of the given year
       dt = date(year, 1, 1)
# First Sunday of the given year       
       dt += timedelta(days = 6 - dt.weekday())  
       while dt.year == year:
            yield dt
            dt += timedelta(days = 7)
            
for s in all_sundays(2020):
    m = s.month
    d = s.day
    y = s.year
    print(s.strftime("%d-%m-%Y"))
    print(type(s))

In [None]:
d = "2002-01-07"

In [None]:
files = findExt("data/top40/old/World Singles Official Top 100/", ext=".p")

In [None]:
bs = getHTML(files[0])

In [None]:
bs.find("Iglesias")

In [None]:
url = "https://top40-charts.com/chart.php?cid=35&date=2002-01-07"

In [None]:
user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
headers={'User-Agent':user_agent,}

savename = setFile(savedir, "mytest.p")

request=urllib.request.Request(url,None,headers) #The assembled request
response = urllib.request.urlopen(request)
data = response.read() # The data u need

print(idts,'/',len(dates),"\tSaving {0}".format(savename))
saveJoblib(data=data, filename=savename, compress=True)

In [None]:
str(data).find("Enya")

In [None]:
N   = 28
obs = 0.5
r   = obs
Nmc = 250000

In [None]:
import random
from numpy import corrcoef
from pandas import Series

In [None]:
corrs = []
for i in range(Nmc):
    x1 = [random.random() for x in range(N)]
    x2 = [random.random() for x in range(N)]
    corrs.append(corrcoef(x1,x2)[0][1])

In [None]:
sc = Series(corrs)
sc.hist(bins=100)
Nabove = len(sc[sc >= obs])
pvalue = Nabove / Nmc
import scipy.stats as st
print("{0} / {1}".format(Nabove,Nmc))
print("N --> {0}".format(N))
print("r --> {0}".format(r))

tscore = r*sqrt(N-2)/sqrt(1-r**2)
print("t --> {0}".format(t))
p  = 1 - stats.t.cdf(tscore,df=N-2)
print("p --> {0}".format(p))


from scipy.stats import norm
for x in linspace(0,5,500):
    p = 2*(1 - norm.cdf(x))
    if p <= pvalue:
        print("p-value  --> {0}".format(pvalue))
        print("# of std --> {0}".format(x))
        break

In [None]:
from numpy import linspace
rcorrs = {}
for rho in linspace(-1,1,100):
    rcorrs[rho] = rho*sqrt(N-2)/sqrt(1-rho**2)
Series(rcorrs).plot()