# Top 40 Charts Functions

In [20]:
## Basic stuff
%load_ext autoreload
%autoreload
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))

## Python Version
import sys
from glob import glob
from os import getcwd
from os.path import join
from fileUtils import getBasename, getDirname, getBaseFilename
from fsUtils import isFile, isDir, moveFile, removeFile, mkDir, setFile, moveFile
from searchUtils import findDirs, findExt, findNearest
from webUtils import getHTML, getWebData
from timeUtils import getDateTime, isDate
from ioUtils import saveJoblib, loadJoblib, saveFile, getFile
import urllib

from discogsBase import discogs
from multiArtist import multiArtist
print("Python: {0}".format(sys.version))

from pandas import date_range
basedir = getcwd()
print("Basedir = {0}".format(basedir))

import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Python: 3.7.3 (default, Mar 27 2019, 16:54:48) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
Basedir = /Users/tgadfort/Documents/code/charts
Notebook Last Run Initiated: 2020-01-17 19:20:18.100759


In [22]:
from datetime import date, timedelta

def all_sundays(year):
    # January 1st of the given year
       dt = date(year, 1, 1)
        # First Sunday of the given year       
       dt += timedelta(days = 6 - dt.weekday())  
       while dt.year == year:
          yield dt
          dt += timedelta(days = 7)
            
from pandas import DataFrame
def getMusicData(key, artist):
    retval = discdf[discdf[key] == artist]
    if retval.shape[0] > 0:
        return retval
    else:
        return None
    
from difflib import SequenceMatcher

def getRowByIndex(pdf, idx):
    return pdf.loc[idx]

In [23]:
def getBestArtistIDMatch(artistName, artistResults, possibleMatches, N=3, cutoff=0.7, debug=False):    
    myAlbumNames = list(artistResults)
    idxResults = {}
    matchValue = {}
    for idx in possibleMatches:
        idxResults[idx] = 0
        matchValue[idx] = artistName

        artistAlbumsData = getRowByIndex(artistAlbumsDB, idx)
        artistAlbums     = artistAlbumsData["Albums"]

        for myAlbumName in myAlbumNames:
            maxRatio = 0
            for albumType, albumTypeData in artistAlbums.items():
                for albumID, dbAlbumName in albumTypeData.items():
                    s = SequenceMatcher(None, myAlbumName, dbAlbumName)
                    ratio = s.ratio()
                    if ratio > maxRatio:
                        maxRatio = ratio

            if maxRatio > cutoff:
                idxResults[idx] += maxRatio

        if debug:
            print("\t{0: <5}{1: <15}{2}".format(idxResults[idx], idx, matchValue[idx]))
            
    if len(idxResults) > 0:
        mc     = Counter(idxResults)
        best   = mc.most_common(1)[0]
        idx    = best[0]
        ratio  = round(best[1],2)
        artist = matchValue[idx]
        return (idx, artist, ratio)
    else:
        return (None, None, None)
    


def getBestArtistMatch(artistName, artistResults, N=3, cutoff=0.7, debug=False):
    myAlbumNames = list(artistResults)
    if debug:
        print(artistName)

    idxResults = {}
    matchValue = {}

    
    for artist in findNearest(artistName, artistNameToID.keys(), N, cutoff):
        for idx in artistNameToID[artist]:
            idxResults[idx] = 0
            matchValue[idx] = artist
            
            artistAlbumsData = getRowByIndex(artistAlbumsDB, idx)
            artistAlbums     = artistAlbumsData["Albums"]
            
            for myAlbumName in myAlbumNames:
                maxRatio = 0
                for albumType, albumTypeData in artistAlbums.items():
                    for albumID, dbAlbumName in albumTypeData.items():
                        s = SequenceMatcher(None, myAlbumName, dbAlbumName)
                        ratio = s.ratio()
                        if ratio > maxRatio:
                            maxRatio = ratio
                            
                if maxRatio > cutoff:
                    idxResults[idx] += maxRatio
            
            if debug:
                print("\t{0: <5}{1: <15}{2}".format(idxResults[idx], idx, matchValue[idx]))

    if len(idxResults) > 0:
        mc     = Counter(idxResults)
        best   = mc.most_common(1)[0]
        idx    = best[0]
        ratio  = round(best[1],2)
        artist = matchValue[idx]
        return (idx, artist, ratio)
    else:
        return (None, None, None)

# Discogs Downloads

In [24]:
disc = discogs()
discdf = disc.getMasterSlimArtistDiscogsDB()
artistIDToName = discdf["DiscArtist"].to_dict()
from masterdb import getArtistAlbumsDB, discConv
artistAlbumsDB = getArtistAlbumsDB(disc)

artistNameToID = {}
print("Found {0} ID -> Name entries".format(len(artistIDToName)))
for artistID,artistName in artistIDToName.items():
    if artistNameToID.get(artistName) is None:
        artistNameToID[artistName] = []
    artistNameToID[artistName].append(artistID)
print("Found {0} Name -> ID entries".format(len(artistNameToID)))
mulArts  = multiArtist(cutoff=0.9, discdata=artistNameToID, exact=False)

Saved Discog Directory /Volumes/Music/Discog is Available
Local Discog Directory /Users/tgadfort/Music/Discog is Available
/Volumes/Music/Discog/collections exists
/Volumes/Music/Discog/artists exists
/Volumes/Music/Discog/albums exists
/Volumes/Music/Discog/collections-db exists
/Volumes/Music/Discog/artists-db exists
/Volumes/Music/Discog/albums-db exists
/Volumes/Music/Discog/artists-db/metadata exists
/Volumes/Music/Discog/albums-db/metadata exists
/Volumes/Music/Discog/diagnostic exists
/Volumes/Music/Discog/db exists
Loading data from /Volumes/Music/Discog/db/MasterSlimArtistDB.p
  --> This file is 53.5MB.
Loading /Volumes/Music/Discog/db/MasterSlimArtistDB.p
Current Time is Fri Jan 17, 2020 19:21:23 for 
Loading ArtistID Data
Loading data from /Volumes/Music/Discog/db/ArtistIDToAlbumNames.p
  --> This file is 180.4MB.
Loading /Volumes/Music/Discog/db/ArtistIDToAlbumNames.p
Creating Pandas DataFrame for 669071 Artists
	Shape --> (669071, 1)
DataFrame Shape is (669071, 1)
Current 

# Rename Files

In [58]:
singleRenames = getFile(ifile="singleRenames.p")
multiRenames  = getFile(ifile="multiRenames.p")
knownArtists  = getFile(ifile="artistMap.p")

for k in singleRenames.keys():
    v = singleRenames[k]
    if isinstance(v, list):
        if len(v) == 2:
            singleRenames[k] = v[0]
saveFile(idata=singleRenames, ifile="singleRenames.p")

  --> This file is 105.6kB.


# Get Starter File

In [None]:
try:
    filename = glob(join(basedir, "data", "top40", "starter.html"))[0]
except:
    print("Could not find starter HTML file!")
fdata = getHTML(filename)

In [None]:
chartData  = {}
dirname = None
baseURL = "http://top40-charts.com"

## Get Charts
def getCharts(fdata):
    charts = {}
    for iform,formdata in enumerate(fdata.findAll("form")):
        for isel,seldata in enumerate(formdata.findAll("select", {"name": "cid"})):
            for iop,opdata in enumerate(seldata.findAll("option")):
                    attrs  = opdata.attrs
                    value  = attrs['value']
                    charts[value] = opdata.text
    return charts


## Get Dates
def getDates(fdata):
    dates = []
    for iform,formdata in enumerate(fdata.findAll("form")):
        for isel,seldata in enumerate(formdata.findAll("select", {"name": "date"})):
            continue
            for iop,opdata in enumerate(seldata.findAll("option")):
                attrs  = opdata.attrs
                value  = attrs['value']
                dates.append(value)
    dates = sorted(list(set(dates)))
    return dates

charts = getCharts(fdata)

In [None]:
for cid in charts.keys():
    url      = "https://top40-charts.com/chart.php?cid={0}"
    user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
    headers={'User-Agent':user_agent,}

    savedir  = join(basedir, "data", "top40", cname.replace("/", " "))
    mkDir(savedir)

    savename = setFile(savedir, "{0}.p".format(datename))
    if isFile(savename):
        continue

    request=urllib.request.Request(url,None,headers) #The assembled request
    response = urllib.request.urlopen(request)
    data = response.read() # The data u need

    print(idts,'/',len(dates),"\tSaving {0}".format(savename))
    saveJoblib(data=data, filename=savename, compress=True)
    sleep(3)

# Download Chart Data

In [None]:
for cid, cname in charts.items():
    starterURL = "https://top40-charts.com/chart.php?cid={0}".format(cid)
    url = starterURL
    savedir  = join(basedir, "data", "top40")
    mkDir(savedir)
    savename = setFile(savedir, "{0}.p".format(cname.replace("/", " ")))
    
    user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
    headers={'User-Agent':user_agent,}
    
    if isFile(savename):
        continue

    request=urllib.request.Request(url,None,headers) #The assembled request
    response = urllib.request.urlopen(request)
    data = response.read() # The data u need

    print("URL ---> {0}".format(url))
    print(idts,'/',len(dates),"\tSaving {0}".format(savename))
    saveJoblib(data=data, filename=savename, compress=True)
    sleep(3)

In [None]:
chartsToGet = {}
for cid, cname in charts.items():
    chartsToGet[cid] = {}
    savename = setFile(savedir, "{0}.p".format(cname.replace("/", " ")))
    data = getHTML(savename)
    select = data.find("select", {"name": "date"})
    if select is None:
        raise ValueError("No dates!")
    for option in select.findAll("option"):
        attr  = option.attrs
        value = attr['value']
        url   = "https://top40-charts.com/chart.php?cid={0}&date={1}".format(cid, value)
        chartsToGet[cid][value] = url

# Download Charts

In [None]:
from pathlib import Path
for cid, cidData in chartsToGet.items():
    cname   = charts[cid]
    savedir = join(basedir, "data", "top40", cname.replace("/", " "))

    for value, url in cidData.items():
        try:
            if getDateTime(value).year < 201:
                continue        
        except:
            continue
            
        if value is None:
            continue
        savename = setFile(savedir, "{0}.p".format(value))
        if savename is None:
            continue
            
        if isFile(savename):
            print("Touching {0}".format(savename))
            Path(savename).touch()
        else:
            user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
            headers={'User-Agent':user_agent,}
    
            request=urllib.request.Request(url,None,headers) #The assembled request
            response = urllib.request.urlopen(request)
            data = response.read() # The data u need

            print("URL ---> {0}".format(url))
            print(idts,'/',len(dates),"\tSaving {0}".format(savename))
            saveJoblib(data=data, filename=savename, compress=True)
            sleep(2)

# Parse Charts

In [None]:
def getChartData(chartData, debug=False):
    results = []
    pos = 1
    
    debVars = None
    
    for it,table in enumerate(chartData.findAll("table")):
        ths = table.findAll("th")
        trs = table.findAll("tr")
        attrs = table.attrs
        if debug:
            print(it,len(ths),len(trs),attrs)

        if attrs == {'cellpadding': '0', 'cellspacing': '0', 'borer': '0'}:
            if len(trs) == 1:
                tds = trs[0].findAll("td")
                if len(tds) == 3:
                    refs = tds[2].findAll("a")
                    if len(refs) == 2:
                        album  = refs[0].text
                        artist = refs[1].text
                        results.append({"Artist": artist, "Album": album})
                        if debug:
                            print(pos,'\t',artist,'\t',album)
                        pos += 1

    return results

In [None]:
for cid, cidData in chartsToGet.items():
    chartResults = {}
    cname   = charts[cid]
    print(cname)
    if cname == "World Adult Top 20 Singles":
        continue
    if cname.startswith("W"):
        pass
    else:
        continue
    savedir = join(basedir, "data", "top40", cname.replace("/", " "))

    for value, url in cidData.items():
        savename = setFile(savedir, "{0}.p".format(value))
        data     = getHTML(savename)
        results  = getChartData(data)
        chartResults[value] = results
        if len(results) == 0:
            print('\t',cname,'\t\t',value,'\t',len(results)," <<-------")
        else:
            print('\t',cname,'\t\t',value,'\t',len(results))
            
    savedir = join(basedir, "results", "top40")
    savename = setFile(savedir, "{0}.p".format(cname.replace("/", " ")))
    saveJoblib(data=chartResults, filename=savename, compress=True)
    #savename = setFile("~/Dropbox/charts", "{0}.p".format(cname.replace("/", " ")))
    #saveJoblib(data=chartResults[cid], filename=savename, compress=True)

# Aggregate Charts

In [30]:
from searchUtils import findExt
savedir = join(basedir, "results", "top40")
files = findExt(savedir, ".p")

In [None]:
from fileUtils import getBaseFilename
results = {}
for ifile in files:
    chartData = getFile(ifile)
    cname     = getBaseFilename(ifile)
    print("{0: <40}".format(cname),end="")
    for date, values in chartData.items():
        for i,item in enumerate(values):
            artist = item["Artist"]
            prevArtist = None
            if multiRenames.get(artist) is not None:
                prevArtist = artist
                artist = multiRenames[artist]
            if singleRenames.get(artist) is not None:
                idx    = str(singleRenames[artist])
                artist = artistIDToName[idx]
            if prevArtist is None:
                prevArtist = artist
            album  = item["Album"]
            if results.get(artist) is None:
                results[artist] = {"Songs": {}, "Albums": {}}
            if cname.endswith("Albums"):
                key = "Albums"
            else:
                key = "Songs"
            if results[artist][key].get(album) is None:
                results[artist][key][album] = {}
            if results[artist][key][album].get(cname) is None:
                results[artist][key][album][cname] = {}
            results[artist][key][album][cname][date] = i
    print(len(results))

In [None]:
saveFile(ifile="chartResults.p", idata=results, debug=True)

In [None]:
from collections import Counter
slimResults = Counter()
for artist, artistData in results.items():
    for key, keyData in artistData.items():
        for album, albumData in keyData.items():
            slimResults[artist] += sum({k: len(v) for k,v in albumData.items()}.values())

In [None]:
saveFile(ifile="chartCounter.p", idata=slimResults, debug=True)

In [None]:
for artist, artistData in results.items():
    print(artist)
    print("    Songs:")
    for album, albumData in artistData["Songs"].items():
        print('\t',album,'\t',len(albumData))
        for chart, chartData in albumData.items():
            pass
            #print('\t\t',chart,chartData)
    print("    Albums:")
    for album, albumData in artistData["Albums"].items():
        print('\t',album,'\t',len(albumData))
        for chart, chartData in albumData.items():
            pass
            #print('\t\t',chart,chartData)
    break

# Find Missing Artists

In [136]:
slimResults = getFile(ifile="chartCounter.p", debug=True)
fullResults = getFile(ifile="chartResults.p", debug=True)

singleRenames = getFile(ifile="singleRenames.p", debug=True)
multiRenames  = getFile(ifile="multiRenames.p", debug=True)
knownArtists  = getFile(ifile="artistMap.p", debug=True)

Loading data from chartCounter.p
  --> This file is 244.9kB.
Loading chartCounter.p
Loading data from chartResults.p
  --> This file is 5.1MB.
Loading chartResults.p
Loading data from singleRenames.p
  --> This file is 106.6kB.
Loading singleRenames.p
Loading data from multiRenames.p
  --> This file is 622B.
Loading multiRenames.p
Loading data from artistMap.p
  --> This file is 158.2kB.
Loading artistMap.p


In [59]:
skips = {}

# Unknown Artists

In [None]:
## 5500
artistsToGet = {}
#for i, (artist, cnt) in enumerate(slimResults.most_common()):
for i, (artist, cnt) in enumerate(sorted(slimResults.items(), key=lambda pair: pair[1], reverse=True)):
    if i <= 14961:
        continue
    if i % 250 == 0:
        print("==>",i,len(slimResults))
    if multiRenames.get(artist) is not None:
        artist = multiRenames[artist]
    if singleRenames.get(artist) is not None:
        idx    = singleRenames[artist]
        artist = artistIDToName[idx]
    if knownArtists.get(artist) is not None:
        continue
    if skips.get(artist) is not None:
        continue

    matches = mulArts.getArtistNames(artist)
    if len(matches) > 1:
        continue

    mdata = getMusicData("DiscArtist", artist)
    if mdata is None:
        artistsToGet[artist] = cnt
        print("{0: <6}{1: <40}{2: <6}{3}".format(i,artist,cnt,len(artistsToGet)))
        if len(artistsToGet) > 200:
            break
    elif isinstance(mdata, DataFrame):
        if mdata.shape[0] >= 1:
            continue
        artistsToGet[artist] = cnt
        print("{0: <6}{1: <40}{2: <6}{3}".format(i,artist,cnt,len(artistsToGet)))
        if len(artistsToGet) > 200:
            break

14962 David Barrul                            2     1
14966 Andres Suarez                           2     2
14971 Blas Canto                              2     3
==> 15000 18175
15009 Activa Vs. Matt Abbott                  2     4
15017 Ruth-anne                               2     5
15018 Virtanen Band                           2     6
15020 Kakka-hata 77                           2     7
15033 Mathias Og Henriette                    2     8
15034 Kasper Nyemann                          2     9
15038 Chresten                                2     10
15050 Zog Chorus                              2     11
15051 High Hopes Choir                        2     12
15058 Nova Muzika                             2     13
15068 3Robi                                   2     14
15069 Davin Herbruggen                        2     15
15070 Kalvijn                                 2     16
15078 Avantasia                               2     17
15085 Wingenfelder:Wingenfelder               2     18
151

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

--- Logging error ---
--- Logging error ---
--- Logging error ---
--- Logging error ---
--- Logging error ---
Error in sys.excepthook:
Traceback (most recent call last):
  File "/anaconda3/envs/py37/lib/python3.7/linecache.py", line 95, in updatecache
    stat = os.stat(fullname)
NotADirectoryError: [Errno 20] Not a directory: '/anaconda3/envs/py37/lib/python3.7/site-packages/discogs-0.0.1-py3.7.egg/multiArtist.py'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/anaconda3/envs/py37/lib/python3.7/linecache.py", line 47, in getlines
    return updatecache(filename, module_globals)
  File "/anaconda3/envs/py37/l

    self.handle(record)
  File "/anaconda3/envs/py37/lib/python3.7/logging/__init__.py", line 1529, in handle
    self.callHandlers(record)
  File "/anaconda3/envs/py37/lib/python3.7/logging/__init__.py", line 1591, in callHandlers
    hdlr.handle(record)
  File "/anaconda3/envs/py37/lib/python3.7/logging/__init__.py", line 905, in handle
    self.emit(record)
  File "/anaconda3/envs/py37/lib/python3.7/logging/__init__.py", line 1040, in emit
    self.handleError(record)
  File "/anaconda3/envs/py37/lib/python3.7/logging/__init__.py", line 958, in handleError
    traceback.print_exception(t, v, tb, None, sys.stderr)
  File "/anaconda3/envs/py37/lib/python3.7/traceback.py", line 104, in print_exception
    type(value), value, tb, limit=limit).format(chain=chain):
  File "/anaconda3/envs/py37/lib/python3.7/traceback.py", line 521, in __init__
    self._load_lines()
  File "/anaconda3/envs/py37/lib/python3.7/traceback.py", line 533, in _load_lines
    self.__context__._load_lines()
  File

ERROR:root:Invalid alias: The name clear can't be aliased because it is another magic command.
ERROR:root:Invalid alias: The name more can't be aliased because it is another magic command.
ERROR:root:Invalid alias: The name less can't be aliased because it is another magic command.
ERROR:root:Invalid alias: The name man can't be aliased because it is another magic command.


In [161]:
from collections import Counter
cutoff = 0.7
skips = {}
possibleResults = {}
for i,(artist, cnt) in enumerate(artistsToGet.items()):
    print(i,"/",len(artistsToGet),'  \t',artist)
    artistAlbums = set(list(fullResults[artist]["Songs"].keys()) + list(fullResults[artist]["Albums"].keys()))
    results      = getBestArtistMatch(artist, artistAlbums, N=3, cutoff=cutoff)
    if results[2] is not None:
        if results[2] >= cutoff:
            print(i,"/",len(artistsToGet),'  \t',artist,results)
            possibleResults[artist] = list(results[:-1])
        else:
            skips[artist] = True
    else:
        skips[artist] = True

0 / 201   	 Little
1 / 201   	 Yoshii Harmony
2 / 201   	 Motoyosi
3 / 201   	 Fortunately
4 / 201   	 Hirai It Is Hard
5 / 201   	 Tsukishima Kirari
6 / 201   	 Aids
7 / 201   	 Hiroshi Kiyama
8 / 201   	 20th Century
9 / 201   	 244-x Endli
10 / 201   	 Tatsuya Ishii
11 / 201   	 Hata Motohiro
12 / 201   	 Milkyway
13 / 201   	 Cajun Dance Party
14 / 201   	 Mari Takenote
15 / 201   	 Color Bottle
16 / 201   	 Spontania
17 / 201   	 Tanimura Nana
18 / 201   	 Kimura Kaela
19 / 201   	 Tokunaga Hideaki
20 / 201   	 Shin Ki
21 / 201   	 Fukuyama Masaharu
22 / 201   	 Chattomonchi
23 / 201   	 Hiroshi Hata Motoe
24 / 201   	 Mano Eri Mustard
25 / 201   	 MACO
26 / 201   	 Kishida Kyodan
27 / 201   	 Mag!C Prince
28 / 201   	 Silviya Kacarova
29 / 201   	 Cosnoul Trainin
30 / 201   	 Viktorija Georgieva
31 / 201   	 Eva Parmakova
32 / 201   	 Lara Veronin
33 / 201   	 Stewartdotdoogan
33 / 201   	 Stewartdotdoogan ('2284592', 'StewartdotDoogan', 0.93)
34 / 201   	 D'side
35 / 201   	 Man

In [162]:
keep = {}

#mdata = getMusicData("DiscArtist", "Tyrese")
#keep["Tyrese"] = [mdata.index[0], mdata["Name"].values[0]]
#keep["PPK \r"] = [mdata.index[0], mdata["Name"].values[0]]

#keep['Sarah Brightman']=['59756', 'Sarah Brightman']

for artist,results in possibleResults.items():
    keep[artist] = results
#keep["Lil' Romeo"] = ['134987', "Lil' Romeo"]
keep

{'Stewartdotdoogan': ['2284592', 'StewartdotDoogan'],
 'Vanorly': ['493202', 'Van Orly'],
 'Jeroen Van Koningsbrugge': ['1197732', 'Jeroen van Koningsbrugge'],
 'Datafork': ['139996', 'DataFork'],
 'Priddyboy': ['1255141', 'Priddy Boy'],
 'Nina Soderquist': ['3348807', 'Nina Söderquist'],
 'Chuck-N-Blood': ['2225642', 'Chuck-n-blood'],
 'Bundnerflaisch': ['3381802', 'Bündnerflaisch'],
 'Cruxshadows': ['223528', 'The Crüxshadows'],
 'Aurelie Konate': ['1845311', 'Aurélie Konaté'],
 'Marchis Flow': ['1131056', "Marchi's Flow"],
 'Les Voix De Daia': ['4400634', 'Les Voix de Daïa'],
 "L'Algerino": ['460583', "L'Algérino"],
 'Tiptop': ['452424', 'TipTop'],
 'Planckaerts': ['908061', 'De Planckaerts'],
 'Slongs Dievanongs\xa0': ['4435213', 'Slongs Dievanongs'],
 'Lisbon Lions': ['4367001', 'The Lisbon Lions'],
 'Cast Of The Only Way Is Essex': ['2575281', 'The Only Way Is Essex'],
 'Big Reunion Cast 2013': ['3600400', 'The Big Reunion Cast 2013'],
 'Eighties Matchbox B-line': ['109948',
  'T

In [163]:
singleRenames = getFile(ifile="singleRenames.p", debug=True)
for k in keep.keys():
    v = keep[k]
    if isinstance(v, list):
        if len(v) == 2:
            singleRenames[k] = v[0]
            print(k,v[0])
saveFile(idata=singleRenames, ifile="singleRenames.p")

Loading data from singleRenames.p
  --> This file is 107.4kB.
Loading singleRenames.p
Stewartdotdoogan 2284592
Vanorly 493202
Jeroen Van Koningsbrugge 1197732
Datafork 139996
Priddyboy 1255141
Nina Soderquist 3348807
Chuck-N-Blood 2225642
Bundnerflaisch 3381802
Cruxshadows 223528
Aurelie Konate 1845311
Marchis Flow 1131056
Les Voix De Daia 4400634
L'Algerino 460583
Tiptop 452424
Planckaerts 908061
Slongs Dievanongs  4435213
Lisbon Lions 4367001
Cast Of The Only Way Is Essex 2575281
Big Reunion Cast 2013 3600400
Eighties Matchbox B-line 109948
Dualers 1421796
Forward Russia 481779
Monkees 132927
Fast Life Yungstaz (F.L.Y) 2085044
Formula Abierta 778351
G-martin 712052
Nikoh E.s. 1981563
  --> This file is 108.0kB.


In [164]:
saveFile(idata=skips, ifile="toget11.p")

  --> This file is 2.2kB.


# Get Multi Results

In [18]:
from collections import Counter
cutoff = 0.8
multiMatchResult = {}
for i, (artist, cnt) in enumerate(slimResults.most_common()):
    if i > 0 and i % 1000 == 0:
        print("Passed",i,'/',len(slimResults))
    prevArtist = None
    if multiRenames.get(artist) is not None:
        prevArtist = artist
        artist = multiRenames[artist]
    if singleRenames.get(artist) is not None:
        idx    = str(singleRenames[artist])
        artist = artistIDToName[idx]
    if prevArtist is None:
        prevArtist = artist
    if knownArtists.get(prevArtist) is not None:
        continue
    mdata = getMusicData("DiscArtist", prevArtist)
    if isinstance(mdata, DataFrame):
        if mdata.shape[0] <= 1:
            continue
        matches = mdata["Name"].index
        artistAlbums = set(list(fullResults[prevArtist]["Songs"].keys()) + list(fullResults[prevArtist]["Albums"].keys()))
        results = getBestArtistIDMatch(artist, artistAlbums, matches, N=3, cutoff=cutoff)
        if results[2] is not None:
            if results[2] >= cutoff:
                print(i,"/",len(slimResults),'  \t',artist,results)
                multiMatchResult[artist] = list(results[:-1])

In [15]:
print(len(knownArtists))
knownArtists.update(multiMatchResult)
print(len(knownArtists))
saveFile(idata=knownArtists, ifile="artistMap.p", debug=True)

5413
5505
Saving data to artistMap.p
  --> This file is 146.9kB.
Saved data to artistMap.p
  --> This file is 146.9kB.


In [None]:
multiMatchResult[artist] = results

# Get Multi Artist Results

In [23]:
cutoff = 0.8
multiResult = {}
for i, (artist, cnt) in enumerate(slimResults.most_common()):
    if i > 0 and i % 1000 == 0 or i == 100:
        print("Passed",i,'/',len(slimResults))
    prevArtist = None
    if multiRenames.get(artist) is not None:
        prevArtist = artist
        artist = multiRenames[artist]
    if singleRenames.get(artist) is not None:
        idx    = str(singleRenames[artist])
        artist = artistIDToName[idx]
    if prevArtist is None:
        prevArtist = artist
    artistAlbums = set(list(fullResults[prevArtist]["Songs"].keys()) + list(fullResults[prevArtist]["Albums"].keys()))
    matches = mulArts.getArtistNames(artist)
    if len(matches) == 1:
        continue
    for match in matches:
        if singleRenames.get(match) is not None:
            idx    = str(singleRenames[match])
            #print('\t',match,' --> ',end="")
            match  = artistIDToName[idx]  
            #print(match)
        mdata = getMusicData("DiscArtist", match)
        if not isinstance(mdata, DataFrame):
            results = getBestArtistMatch(match, artistAlbums, N=3, cutoff=cutoff)
            if results[2] is not None:
                if results[2] >= cutoff:
                    print(i,"/",len(slimResults),'  \t',artist,results)
                    if multiResult.get(artist) is None:
                        multiResult[artist] = {"Results": results, "Match": []}
                    multiResult[artist]["Match"].append(match)

    if i > 5000:
        break

In [None]:
for k,v in multiResult.items():
    x = k.replace(v["Match"][0], v["Results"][1])
    print("multiRenames[\"{0}\"] = \"{1}\"".format(k, x))

In [None]:
multiRenames = getFile("multiRenames.p")
print(len(multiRenames))
multiRenames["Drake, Wizkid & Kyla"] = "Drake, WizKid & Kyla"
multiRenames["A. R. Rahman & Pussycat Dolls"] = "A.R. Rahman & Pussycat Dolls"
multiRenames["Calvin Harris, Pharell Williams, Katy Perry & Big Sean"] = "Calvin Harris, Pharrell Williams, Katy Perry & Big Sean"
multiRenames["Flo Rida, Sage The Gemini & Lookas"] = "Flo Rida, Sage The Gemini & LooKas"
multiRenames["DJ Snake & Lil Jon"] = "DJ Snake & Lil' Jon"
multiRenames["Rihanna & Jay-z"] = "Rihanna & Jay-Z"
multiRenames["Lil Jon & East Side Boyz"] = "Lil' Jon & East Side Boyz"
multiRenames["Remady & Manu-L"] = "DJ Remady & Manu-L"
multiRenames["Lil Jon & The East Side Boyz, Usher & Ludacris"] = "Lil' Jon & The East Side Boyz, Usher & Ludacris"
multiRenames["Jay Sean, Sean Paul & Lil Jon"] = "Jay Sean, Sean Paul & Lil' Jon"
multiRenames["Brandy & Ray Jay"] = "Brandy & Ray J"
multiRenames["KitschKrieg, Trettmann, Gringo, Ufo361 & Gzuz"] = "Kitschkrieg, Trettmann, Gringo, Ufo361 & Gzuz"
multiRenames["Mike WiLL Made-It, Miley Cyrus, Wiz Khalifa & Juicy J"] = "Mike WiLL Made It, Miley Cyrus, Wiz Khalifa & Juicy J"
multiRenames["DJ Broiler & Ravvel"] = "Broiler & Ravvel"
multiRenames["Bob Sinclar & Raffaella Carra"] = "Bob Sinclar & Raffaella Carrà"
multiRenames["Revolverheld & Marta Jandova"] = "Revolverheld & Marta Jandová"
multiRenames["Parra For Cuva & Anna Naklab"] = "Parra for Cuva & Anna Naklab"
multiRenames["Patti Labelle & Ron Isley"] = "Patti LaBelle & Ron Isley"
print(len(multiRenames))
saveFile(idata=multiRenames, ifile="multiRenames.p")

# Get Single Results

In [None]:
sorted(slimResults.items(), key=lambda pair: pair[1], reverse=False)
#for i, (artist, cnt) in enumerate(slimResults.most_common()):

In [42]:
cutoff = 0.8
singleResult = {}
#for i, (artist, cnt) in enumerate(slimResults.most_common()):
for i, (artist, cnt) in enumerate(sorted(slimResults.items(), key=lambda pair: pair[1], reverse=False)):
    if i <= 1842:
        continue
    if i > 0 and i % 100 == 0 or i == 100:
        print("Passed",i,'/',len(slimResults))
    if cnt < 1:
        continue
    if multiRenames.get(artist) is not None:
        prevArtist = artist
        artist = multiRenames[artist]
    if singleRenames.get(artist) is not None:
        idx    = str(singleRenames[artist])
        artist = artistIDToName[idx]
    if prevArtist is None:
        prevArtist = artist
    if knownArtists.get(prevArtist) is not None:
        continue
    artistAlbums = set(list(fullResults[artist]["Songs"].keys()) + list(fullResults[artist]["Albums"].keys()))
    match = mulArts.getArtistNames(artist                                                     )
    if len(match) == 1:
        mdata = getMusicData("DiscArtist", list(match.keys())[0])
        if not isinstance(mdata, DataFrame):
            results = getBestArtistMatch(artist, artistAlbums, N=3, cutoff=cutoff)
            if results[2] is not None:
                if results[2] >= cutoff:
                    print(i,"/",len(slimResults),'  \t',artist,'\t',results)
                    singleResult[artist] = results
                    if len(singleResult) > 20:
                        break

1849 / 18175   	 Chloe Howl 	 ('3205374', 'Chloë Howl', 1.0)
Passed 1900 / 18175
1930 / 18175   	 Moody Blues 	 ('266422', 'The Moody Blues', 1.0)
1933 / 18175   	 MarieMarie 	 ('3660973', 'Mariemarie', 1.0)
1941 / 18175   	 Laura Van Den Elzen 	 ('5072342', 'Laura van den Elzen', 0.85)
1969 / 18175   	 Dj Gert 	 ('13845', 'DJ Gert', 1.0)
1978 / 18175   	 Bailar Project 	 ('47404', 'DJ Bailar Project', 1.0)
1979 / 18175   	 Zdob Si Zdub 	 ('3167486', 'Zdob și Zdub', 1.0)
1992 / 18175   	 Lis Sorensen 	 ('250436', 'Lis Sørensen', 0.91)
Passed 2000 / 18175
2055 / 18175   	 Mc Taakiborsta 	 ('216807', 'MC Taakibörsta', 0.86)
2066 / 18175   	 Dallas Superstar 	 ('38106', 'Dallas Superstars', 1.0)
2083 / 18175   	 Dj Orkidea 	 ('13778', 'Orkidea', 1.0)
2087 / 18175   	 Hauli Bros 	 ('1502269', 'The Hauli Bros', 1.0)
Passed 2100 / 18175
2100 / 18175   	 Discokings 	 ('240092', 'Disco Kings', 1.0)
2102 / 18175   	 Rachel Starr 	 ('153755', 'Rachael Starr', 1.0)
2107 / 18175   	 C.O. Club Orie

In [40]:
for k,v in singleResult.items():
    print("keep[\"{0}\"] = {1}  #{2}".format(k, v[0], v[1]))

keep["$uicideBoy$"] = 4820166  #$uicideboy$
keep["Highwomen"] = 7354774  #The Highwomen
keep["Belinda Chappel"] = 352048  #Belinda Chapple
keep["Disco Stewie Harrison"] = 6900816  #Stewie Harrison
keep["Christian City Church - Oxford Falls"] = 5970086  #Christian City Church Oxford Falls
keep["Galvatrons"] = 1262417  #The Galvatrons
keep["El Bosco"] = 688489  #Elbosco
keep["Gabrielle Diandrea"] = 62968  #Gabriele D'Andrea
keep["Fede Poggipollini"] = 1033608  #Federico Poggipollini
keep["Dj Ross"] = 62735  #DJ Ross
keep["2b Funk"] = 378514  #2B Funk
keep["Dj Nano"] = 107718  #DJ Nano
keep["CC Catch"] = 177603  #C.C. Catch
keep["Clubblander"] = 91248  #Clublanders
keep["J. Louis"] = 205801  #J.Louis
keep["Niels Van Coch"] = 17261  #Niels Van Gogh
keep["Sander Van Doom"] = 183267  #Sander van Doorn
keep["Dj K-rrion"] = 489457  #DJ K-Rrion
keep["Jose Am"] = 1221269  #Jose AM
keep["Pokomoxo"] = 1136595  #Pocomoxo
keep["Miki Nunez"] = 6838432  #Miki Núñez


In [41]:
singleResults = {k: v[0] for k,v in singleResult.items()}
print("Found {0} single results".format(len(singleResults)))
print("Found {0} all results".format(len(knownArtists)))
knownArtists.update(singleResults)
print("Found {0} all results".format(len(knownArtists)))

Found 21 single results
Found 5552 all results
Found 5573 all results


In [38]:
saveFile(idata=knownArtists, ifile="artistMap.p", debug=True)

Saving data to artistMap.p
  --> This file is 147.9kB.
Saved data to artistMap.p
  --> This file is 147.9kB.


## Manual Fixes

In [None]:
keep["Wizkid"] = 3292269  #WizKid
keep["A. R. Rahman"] = 4459  #A.R. Rahman
keep["Pharell Williams"] = 90037  #Pharrell Williams
keep["Lookas"] = 3619379  #LooKas
keep["Lil Jon"] = 120307  #Lil' Jon
keep["Jay-z"] = 21742  #Jay-Z

keep["Jean Michel Jarre"] = 209415  #Jean-Michel Jarre
keep["Puppini Sisters"] = 663589  #The Puppini Sisters
keep["Hadouken"] = 744317  #Hadouken!
keep["Peter Doherty"] = 219403  #Pete Doherty
keep["Dani Konig"] = 95685  #Dani König
keep["X-centric Sound System"] = 177063  #Ex-Centric Sound System
keep["And You Will Know Us By The T"] = 80393  #And You Will Know Us By The Trail Of Dead
keep["Serena Maneesh"] = 563504  #Serena-Maneesh
keep["Potatoheadz"] = 38359  #Potatoheads
keep["Black Angels"] = 428135  #The Black Angels
keep["La Fiancee"] = 1603916  #La Fiancée
keep["Knut Anders Sorum"] = 713971  #Knut Anders Sørum

keep["Magic Numbers"] = 245226  #The Magic Numbers
keep["Melissa M"] = 952871  #Mélissa M
keep["Weird Al Yankovic"] = 259422  #"Weird Al" Yankovic
keep["Star Academy 6"] = 395756  #Star Academy
keep["Dj Boozywoozy"] = 39640  #DJ BoozyWoozy
keep["Lauri Tahka"] = 1236855  #Lauri Tähkä
keep["Finn Brothers"] = 377361  #The Finn Brothers
keep["Red Jumpsuit Apparatus"] = 746639  #The Red Jumpsuit Apparatus
keep["Katharine Mcphee"] = 544699  #Katharine McPhee
keep["Massive Tone"] = 51650  #Massive Töne
keep["Soul'd Out"] = 152449  #Soul'd OUT
keep["Djames Braun"] = 3024392  #Djämes Braun
keep["K-otic"] = 99126  #K-Otic
keep["Dr. Kucho"] = 171326  #Dr. Kucho!
keep["Sivert Hoyem"] = 226487  #Sivert Høyem
keep["DJ's @ Work"] = 27887  #DJs @ Work
keep["T.m.revolution"] = 1367682  #T.M.Revolution
keep["Suburban Tribe"] = 294619  #Sub-Urban Tribe
keep["Peer Gunt"] = 475654  #Peer Günt
keep["Acda En De Munnik"] = 283015  #Acda en de Munnik
keep["Susanne Sundfor"] = 747088  #Susanne Sundfør
keep["Zen Cafe"] = 264334  #Zen Café
keep["Lutricia Mcneal"] = 10896  #Lutricia McNeal
keep["Soul Decision"] = 725913  #soulDecision
keep["MC Anitta"] = 2807393  #Anitta
keep["Antonio Jose"] = 1661452  #Antonio José
keep["Miguel Angel Silva"] = 2374710  #Miguel Ángel Silva
keep["Giant Leap"] = 55439  #1 Giant Leap
keep["Stephanie McIntosh"] = 597377  #Stephanie Mcintosh
keep["Beautiful South"] = 171427  #The Beautiful South
keep["Mars Volta"] = 96218  #The Mars Volta
keep["India Arie"] = 410260  #India.Arie
keep["Sheepdogs"] = 1980121  #The Sheepdogs
keep["Bjorn Rosenstrom"] = 352899  #Björn Rosenström
keep["Pretty Reckless"] = 1773405  #The Pretty Reckless
keep["Wallflowers"] = 257714  #The Wallflowers
keep["Booming People"] = 440378  #The Booming People
keep["Tania Mara"] = 1614077  #Tânia Mara
keep["Zacharius Carl Group"] = 329135  #Zacharius Carls Group
keep["Daniel Lindstrom"] = 1465965  #Daniel Lindström
keep["Pigeon Detectives"] = 481793  #The Pigeon Detectives
keep["Last Shadow Puppets"] = 1099843  #The Last Shadow Puppets
keep["Brian Mckight"] = 97515  #Brian McKnight
keep["Mrs. Greenbird"] = 3134860  #Mrs Greenbird
keep["Ophelie Winter"] = 581072  #Ophélie Winter
keep["Les Deesses"] = 2392811  #Les Déesses
keep["Xander De Buisonje"] = 451325  #Xander De Buisonjé
keep["Partysquad"] = 523070  #The Partysquad
keep["Jose Gonzalez"] = 190821  #José González
keep["Ok Go"] = 219647  #OK Go
keep["Ladi6"] = 283432  #Ladi 6
keep["Sergio Mendes"] = 27986  #Sérgio Mendes
keep["Dorothee"] = 397164  #Dorothée
keep["Suvi Terasniska"] = 1032538  #Suvi Teräsniska
keep["Anna Jarvinen"] = 937842  #Anna Järvinen
keep["Kinderen Voor Kinderen"] = 354362  #Kinderen voor Kinderen
keep["Rippingtons"] = 555275  #The Rippingtons
keep["Flaming Lips"] = 67156  #The Flaming Lips
keep["William Balde"] = 1744946  #William Baldé
keep["Justice Collective"] = 3071540  #The Justice Collective
keep["Ultra Nate"] = 8099  #Ultra Naté
keep["Robin S"] = 66727  #Robin S.
keep["Trentemoller"] = 117914  #Trentemøller
keep["Lene Alexandra"] = 917923  #Lene Alexandra Øien
keep["Ready Set"] = 2183109  #The Ready Set
keep["Charlatans"] = 49023  #The Charlatans
keep["Maximo Park"] = 384412  #Maxïmo Park
keep["Mokobe"] = 392235  #Mokobé
keep["Paps 'n' Skar"] = 41801  #Paps N Skar
keep["ScHoolboy Q"] = 2353545  #Schoolboy Q
keep["Nelja Ruusua"] = 308352  #Neljä Ruusua
keep["Afro-dite"] = 296038  #Afro-Dite
keep["Breaks Co-op"] = 61311  #Breaks Co-Op
keep["Laurent Wery"] = 1310886  #Laurent Wéry
keep["Proclaimers"] = 274034  #The Proclaimers
keep["Courteeners"] = 971940  #The Courteeners
keep["No Tone"] = 841468  #No-Tone
keep["Mo Things Family"] = 174410  #Mo Thugs Family
keep["Sammy Deluxe"] = 88398  #Samy Deluxe
keep["LaCrim"] = 2707062  #Lacrim
keep["Trancelucent"] = 55384  #TranceLucent
keep["Luis Represas"] = 1237858  #Luís Represas
keep["Tatsurou Yamashita"] = 119485  #Tatsuro Yamashita
keep["Ana Tijoux"] = 469819  #Anita Tijoux
keep["Axe Bahia"] = 427149  #Axé Bahia
keep["Elakelaiset"] = 264335  #Eläkeläiset
keep["Turmion Katilot"] = 339414  #Turmion Kätilöt
keep["Jean Claude Ades"] = 66552  #Jean-Claude Ades
keep["Hammerfall"] = 287459  #HammerFall
keep["Hear'say"] = 312508  #Hear'Say
keep["Jack Penate"] = 747248  #Jack Peñate
keep["Jeremy Chatelain"] = 1107704  #Jérémy Chatelain
keep["Alex Max Band"] = 413545  #Alex Band
keep["Falsa Alarma"] = 1278412  #Falsalarma
keep["G-spott"] = 11679  #G-Spott
keep["Ray LaMontagne"] = 502002  #Ray Lamontagne
keep["Chris De Burgh"] = 151304  #Chris de Burgh
keep["Verve Pipe"] = 169539  #The Verve Pipe
keep["Plusch"] = 372645  #Plüsch
keep["Compagnie Creole"] = 528609  #La Compagnie Créole
keep["MC Fioti"] = 5865714  #Mc Fioti
keep["Sie7te"] = 1771803  #Sie7e
keep["Bastard Sons Of Dioniso"] = 2634291  #The Bastard Sons Of Dioniso
keep["Baby Blue Sound Crew"] = 1094228  #Baby Blue Soundcrew
keep["Stone Roses"] = 7298  #The Stone Roses
keep["DJ Matvey Emerson"] = 2126113  #Matvey Emerson
keep["Gerald De Palmas"] = 533880  #Gérald de Palmas
keep["Oli. P"] = 296389  #Oli.P
keep["Andre Hazes Jr."] = 4298003  #André Hazes Jr.
keep["Motorhead"] = 233658  #Motörhead
keep["Grateful Dead"] = 246650  #The Grateful Dead
keep["Liset Alea"] = 138872  #Lissette Alea
keep["Jennie Lofgren"] = 996876  #Jennie Löfgren
keep["Cardigans"] = 39900  #The Cardigans
keep["Cast Of Rent"] = 2531774  #The Cast Of Rent
keep["Saw Doctors"] = 544713  #The Saw Doctors
keep["Robert Delong"] = 1121732  #Robert DeLong
keep["Huun Huur Tu"] = 109547  #Huun-Huur-Tu
keep["Jari Sillanpaa"] = 713952  #Jari Sillanpää
keep["Hellacopters"] = 261426  #The Hellacopters
keep["Dan Backman"] = 1106992  #Dan Bäckman
keep["Artists Stand Up To Cancer"] = 1350696  #Just Stand Up To Cancer
keep["Mr.President"] = 41707  #Mr. President
keep["Loreena Mckennitt"] = 213366  #Loreena McKennitt
keep["Lashun Pace"] = 824951  #LaShun Pace
keep["Beach Boys"] = 70829  #The Beach Boys
keep["Allstars"] = 1009547  #TV Allstars
keep["Raveonettes"] = 200321  #The Raveonettes
keep["Body Rox"] = 269697  #Bodyrox
keep["Lazytown"] = 729388  #LazyTown
keep["Marie Laforet"] = 462548  #Marie Laforêt
keep["Dj Lhasa"] = 196037  #DJ Lhasa
keep["Robert Cray Band"] = 292478  #The Robert Cray Band
keep["Eppu Normaal"] = 381585  #Eppu Normaali
keep["Dj Norman"] = 23243  #DJ Norman
keep["B-Yentl"] = 2747976  #BYentl
keep["Jessica Folker"] = 28518  #Jessica Folcker
keep["Brolle"] = 572997  #Brolle JR
keep["The Cheetah Girls"] = 633276  #Cheetah Girls
keep["Pipettes"] = 360206  #The Pipettes
keep["John Dahlback"] = 20805  #John Dahlbäck
keep["HollySiz"] = 2082991  #Hollysiz
keep["Cassia Eller"] = 1105685  #Cássia Eller
keep["Marios Fragoulis"] = 328042  #Mario Frangoulis
keep["I Panta Nei"] = 1138210  #Panta Rei
keep["Sober"] = 946450  #Sôber
keep["Jarjestyshairio"] = 1931984  #Järjestyshäiriö
keep["Swingfly"] = 70432  #Swing-Fly
keep["Decemberists"] = 264812  #The Decemberists
keep["Jean Jacques Goldman"] = 307094  #Jean-Jacques Goldman
keep["Star Academy 7"] = 395756  #Star Academy
keep["Dj Chuckie"] = 135575  #DJ Chuckie
keep["Didrik Solli-tangen"] = 1818627  #Didrik Solli-Tangen
keep["Dj Goldfinger"] = 802185  #DJ Goldfinger
keep["Overtones"] = 1443138  #The Overtones
keep["Vaccines"] = 2029033  #The Vaccines
keep["Roshelle"] = 2321646  #Rochelle
keep["La Habitacion Roja"] = 393825  #La Habitación Roja
keep["Peter Lemarc"] = 266999  #Peter LeMarc
keep["Goldie Lookin' Chain"] = 184984  #Goldie Lookin Chain
keep["Smashing Pumpkins"] = 28970  #The Smashing Pumpkins
keep["Mighty Mighty Bosstones"] = 275193  #The Mighty Mighty Bosstones
keep["The Notorious B.I.G."] = 65049  #Notorious B.I.G.
keep["Chante Moore"] = 455230  #Chanté Moore
keep["Josh Gracin"] = 655983  #Joshua Gracin
keep["Franck Michael"] = 478419  #Frank Michael
keep["The Go-betweens"] = 83077  #The Go-Betweens
keep["Shakespears Sister"] = 30318  #Shakespear's Sister
keep["Piero Pelu"] = 662395  #Piero Pelù
keep["Los Delinquentes"] = 674561  #Los Delinqüentes
keep["St. Germain"] = 74  #St Germain
keep["Zuri West"] = 188532  #Züri West
keep["Bustafunk"] = 16272  #Busta Funk
keep["Lea Castel"] = 1153522  #Léa Castel
keep["De Lillos"] = 261173  #deLillos
keep["Dj The Wave"] = 241825  #DJ The Wave
keep["The Academy Is"] = 425277  #The Academy Is...
keep["Dj S.P.U.D."] = 42088  #DJ S.P.U.D.
keep["Dj Rebel"] = 209672  #DJ Rebel
keep["Susana Felix"] = 1333026  #Susana Félix
keep["Wildhearts"] = 293211  #The Wildhearts
keep["Altern8"] = 12846  #Altern 8
keep["Loredana Berte"] = 366682  #Loredana Bertè
keep["Bun-B"] = 185582  #Bun B
keep["Academia Operacion Triunfo"] = 1375332  #Academia Operación Triunfo
keep["Barr Brothers"] = 2470090  #The Barr Brothers
keep["Soulvation"] = 53390  #Soulvation*
keep["Ritmo Dynamic"] = 123374  #Ritmo-Dynamic
keep["Bleeders"] = 483312  #The Bleeders
keep["Hearsay"] = 312508  #Hear'Say
keep["Dj Shadow"] = 4478  #DJ Shadow
keep["Cooper Temple Clause"] = 138071  #The Cooper Temple Clause
keep["Maccabees"] = 499923  #The Maccabees
keep["Jon B."] = 20389  #Jon B
keep["North American Halloween Prevention Initiative"] = 1039212  #North American Hallowe'en Prevention Initiative
keep["Cerena"] = 1894383  #Cérena
keep["Ze Pequeno"] = 4613736  #Ze Pequeño
keep["Les Muscles"] = 297430  #Les Musclés
keep["Teki Latex"] = 247176  #Tekilatex
keep["Tommy February6"] = 1283490  #Tommy february6
keep["Tommy Februaryo"] = 1283490  #Tommy february6
keep["Lovefreekz"] = 208128  #The Lovefreekz
keep["Upper Room"] = 487039  #The Upper Room
keep["Avett Brothers"] = 824244  #The Avett Brothers
keep["Legiao Urbana"] = 264082  #Legião Urbana
keep["Whitlams"] = 254486  #The Whitlams
keep["Jo Jo"] = 306427  #JoJo
keep["Fundacion Tony Manero"] = 194603  #Fundación Tony Manero
keep["Ali B."] = 234355  #Ali B
keep["De Toppers"] = 602723  #Toppers
keep["Motorhomes"] = 252568  #The Motorhomes
keep["Di Leva"] = 75662  #Di leva
keep["Timo Raisanen"] = 277925  #Timo Räisänen
keep["Sebastien Tellier"] = 2280  #Sébastien Tellier
keep["Albatraoz"] = 1183850  #Alcatraz
keep["Roisin Murphy"] = 455520  #Róisín Murphy
keep["Ha Rule"] = 51369  #Ja Rule
keep["Pepper's Ghost"] = 2364386  #Peppers Ghost
keep["Waterboys"] = 125174  #The Waterboys
keep["Georges-alain Jones"] = 1940009  #Georges-Alain Jones
keep["Neg'marrons"] = 219331  #Neg'Marrons
keep["Kaleidoscopio"] = 166678  #Kaleidoscópio
keep["alt-J"] = 2830806  #Alt-J
keep["Tommy February"] = 1283490  #Tommy february6
keep["Disco Boys"] = 196758  #The Disco Boys
keep["Natalia Jimenez"] = 2130134  #Natalia Jiménez
keep["MadMan"] = 2668959  #Madman
keep["Aitana Ocana"] = 6371628  #Aitana Ocaña
keep["Polyphonic Spree"] = 79023  #The Polyphonic Spree
keep["John Mellencamp"] = 237890  #John Cougar Mellencamp
keep["White Tie Affair"] = 1943163  #The White Tie Affair
keep["Bon Garcon"] = 458392  #Bon Garçon
keep["Fu-tourist"] = 34401  #Fu-Tourist
keep["Kapteeni A-ni"] = 85882  #Kapteeni Ä-ni
keep["Ron Van Den Beuken"] = 90120  #Ron van den Beuken
keep["For My Pain"] = 619891  #For My Pain...
keep["Jack McManus"] = 808532  #Jack Mcmanus
keep["Radio Dept."] = 238640  #The Radio Dept.
keep["Elin Sigvardsson"] = 1063935  #Elin Ruth Sigvardsson
keep["Dj Felli Fel"] = 227465  #Felli Fel
keep["New Deal"] = 44720  #The New Deal
keep["Camera Cafe"] = 1645853  #Caméra Café
keep["Ben L`Ocle Soul"] = 1689692  #Ben L'Oncle Soul
keep["Veronique Sanson"] = 394617  #Véronique Sanson
keep["Dj Schwede"] = 41815  #DJ Schwede
keep["Beam Vs. Cyrus"] = 19442  #Beam vs. Cyrus
keep["Dj Tocadisco"] = 424389  #Tocadisco
keep["De Vrienden Van Meneer Konijn"] = 4591921  #Vrienden Van Meneer Konijn
keep["4 Taste"] = 325990  #Taste
keep["Shaka Labbits"] = 1829370  #Shakalabbits
keep["Elio E Le Storie Tes"] = 191166  #Elio E Le Storie Tese
keep["Paco De Lucia"] = 20184  #Paco De Lucía
keep["Format B"] = 263420  #Format: B
keep["Oceanlab"] = 427638  #OceanLab
keep["Motley Crue"] = 94068  #Mötley Crüe
keep["Janelle Monae"] = 445868  #Janelle Monáe
keep["Civil Wars"] = 2001094  #The Civil Wars
keep["Moneybagg Yo"] = 5197378  #MoneyBagg Yo
keep["Caleidoscopio"] = 166678  #Kaleidoscópio
keep["Zeljko Joksimovic"] = 473482  #Željko Joksimović
keep["Mousee T"] = 8803  #Mousse T.
keep["Polo Montanes"] = 483555  #Polo Montañez
keep["Annette Artani"] = 1466811  #Annet Artani
keep["Payo Malo"] = 613450  #El Payo Malo
keep["To Die For"] = 239041  #To-Die-For
keep["Dj Jurgen"] = 17260  #DJ Jurgen
keep["Modesha"] = 128255  #Nodesha
keep["Latin Kings"] = 158482  #The Latin Kings
keep["Dead Weather"] = 1391789  #The Dead Weather
keep["High School Musical Cast"] = 673607  #The High School Musical Cast
keep["Dj Mark Farina"] = 4710  #Mark Farina
keep["Sandrine Francois"] = 1446111  #Sandrine François
keep["Gregori Baquet"] = 568948  #Grégori Baquet
keep["Superheavy"] = 2418975  #SuperHeavy
keep["Deborah De Corral"] = 2035021  #Deborah de Corral
keep["Brilliant Green"] = 296909  #The Brilliant Green
keep["Laith Al-deen"] = 341824  #Laith Al-Deen
keep["Audiobullys"] = 50016  #Audio Bullys
keep["Mr Redz"] = 21964  #Mr. Reds
keep["Eye Opener"] = 138167  #Eyeopener
keep["Kraftklub"] = 2028458  #KraftKlub
keep["Human League"] = 10383  #The Human League
keep["Lost Brothers"] = 158835  #The Lost Brothers
keep["Young Knives"] = 470191  #The Young Knives
keep["Get Cape Wear Cape Fly"] = 747070  #Get Cape. Wear Cape. Fly
keep["Starting Line"] = 355465  #The Starting Line
keep["The-dream"] = 1008036  #The-Dream
keep["Piano Guys"] = 3507956  #The Piano Guys
keep["Beyonce"] = 52835  #Beyoncé
keep["Amity Affliction"] = 2446213  #The Amity Affliction
keep["A$AP Mob"] = 2898554  #ASAP Mob
keep["Kelly K"] = 970192  #Kelly Key
keep["Blizzard Brothers Inc"] = 41806  #Blizzard Brothers
keep["Jade Macrae"] = 197656  #Jade MacRae
keep["Herman Dune"] = 264602  #Herman Düne
keep["Alex  Britti"] = 653853  #Alex Britti
keep["Alfred Garcia"] = 6371630  #Alfred García
keep["Rena Dif"] = 233790  #René Dif
keep["Flaming Sideburns"] = 491667  #The Flaming Sideburns
keep["JS-16"] = 35833  #JS16
keep["Lisa Left Eye Lopes"] = 110359  #Lisa "Left Eye" Lopes
keep["Rosemary' Sons"] = 285835  #Rosemary's Sons
keep["K-klass"] = 36624  #K-Klass
keep["Drumatic Twins"] = 14784  #Drumattic Twins
keep["Crashdiet"] = 825015  #Crashdïet
keep["Anne-Lie Ryde"] = 264170  #Anne-Lie Rydé
keep["Alex Swing Oskars Sings!"] = 1442099  #Alex Swings Oscar Sings!
keep["Adolphson Falk"] = 119104  #Adolphson-Falk
keep["Run DMC"] = 219213  #Run-DMC
keep["Pj Harvey"] = 36052  #PJ Harvey
keep["Journey South"] = 325103  #Joe South
keep["Lil Scrappy"] = 239305  #Lil' Scrappy
keep["Breeders"] = 39778  #The Breeders
keep["Huis Anubis"] = 2797437  #Het Huis Anubis
keep["Avalanches"] = 9130  #The Avalanches
keep["Dj F.E.X."] = 43804  #DJ F.E.X
keep["Kc Da Rookee"] = 220984  #KC Da Rookee
keep["N Trance"] = 11001  #N-Trance
keep["Trooper Da Doon"] = 66144  #Trooper Da Don
keep["Divine Comedy"] = 27933  #The Divine Comedy
keep["Blumchen"] = 20156  #Blümchen
keep["Dt8 Project"] = 34061  #DT8 Project
keep["Dead 60s"] = 260915  #The Dead 60s
keep["Duke Spirit"] = 275448  #The Duke Spirit
keep["Airborne Toxic Event"] = 1311029  #The Airborne Toxic Event
keep["Big Pink"] = 1337426  #The Big Pink
keep["Boyz In Da Hood"] = 336836  #Boyz N Da Hood
keep["Devil Wears Prada"] = 989173  #The Devil Wears Prada
keep["Sixx: A.M."] = 895454  #Sixx:A.M.
keep["Dirty Heads"] = 2089008  #The Dirty Heads
keep["Mac DeMarco"] = 2568722  #Mac Demarco
keep["Mauricio Manieri"] = 1563247  #Maurício Manieri
keep["Butterfly Effect"] = 441749  #The Butterfly Effect
keep["L'aura"] = 880722  #L'Aura
keep["Fabula"] = 210657  #Jabula
keep["Dj Joe K"] = 225215  #DJ Joe K.
keep["Rasmus Nohr"] = 396235  #Rasmus Nøhr
keep["Valkyrians"] = 538336  #The Valkyrians
keep["Dir En Grey"] = 348163  #Dir en grey
keep["Rock'a'trench"] = 2981100  #Rock'A'Trench
keep["Wolfe Tones"] = 1219696  #The Wolfe Tones
keep["Andre Van Duin"] = 367055  #André van Duin
keep["M Hederos M Hellberg"] = 474515  #Hederos & Hellberg
keep["Christer Sjogren"] = 656866  #Christer Sjögren
keep["Feelstyle"] = 266896  #Tha Feelstyle
keep["Beta Band"] = 3847  #The Beta Band
keep["Angelis"] = 251965  #Angelfish
keep["Dr Hook"] = 206325  #Dr. Hook
keep["Royal Scots Dragoon Guards"] = 595755  #The Royal Scots Dragoon Guards
keep["Hold Steady"] = 491197  #The Hold Steady
keep["SheDaisy"] = 1479264  #Shedaisy
keep["Gregoire"] = 1407221  #Grégoire
keep["Tragically Hip"] = 267249  #The Tragically Hip
keep["Dj Molella"] = 36544  #Molella
keep["Facteur X"] = 334431  #Factor X
keep["Che Nelle"] = 1055693  #Che'Nelle
keep["Jerome Echenoz"] = 695381  #Jérôme Echenoz
keep["Svein Ostvik"] = 3781616  #Svein Østvik
keep["Dj Taylor"] = 16567  #DJ Taylor
keep["Wiener Sangerknaben"] = 533488  #Die Wiener Sängerknaben
keep["Zoe Straub"] = 4767549  #Zoë Straub
keep["David Latour "] = 1189620  #David Latour
keep["Finger Tips"] = 1663670  #Fingertips
keep["Kelly Family"] = 319276  #The Kelly Family
keep["Jaime Cullum"] = 194401  #Jamie Cullum
keep["Queensryche"] = 255363  #Queensrÿche
keep["Black Crowes"] = 262691  #The Black Crowes
keep["David Crowder Band"] = 413886  #David Crowder*Band
keep["Gaslight Anthem"] = 1167086  #The Gaslight Anthem
keep["BrockHampton"] = 4581123  #Brockhampton
keep["Getaway Plan"] = 1230298  #The Getaway Plan
keep["Ener G"] = 1660398  #Ener.G
keep["C- Bool"] = 251008  #C-Bool
keep["Dublex INC."] = 3762  #Dublex Inc.
keep["R.E.G. Project"] = 164534  #The R.E.G. Project
keep["Niccolo Fabi"] = 329588  #Niccolò Fabi
keep["Jakkata"] = 3442  #Jakatta
keep["Rald Schmitz"] = 1404551  #Ralf Schmitz
keep["Bikstok Rogsystem"] = 272410  #Bikstok Røgsystem
keep["Sinead O'Connor"] = 42895  #Sinéad O'Connor
keep["Dj Slow"] = 55897  #DJ Slow
keep["Buck-tick"] = 69223  #Buck-Tick
keep["Mahala Rai Banda"] = 1208659  #Mahala Raï Banda
keep["SunStroke Project"] = 1818637  #Sunstroke Project
keep["Gary Clark Jr"] = 2699369  #Gary Clark Jr.
keep["Royal Guardsmen"] = 290319  #The Royal Guardsmen
keep["Temptations"] = 28332  #The Temptations
keep["Searchers"] = 277352  #The Searchers
keep["New Seekers"] = 93841  #The New Seekers
keep["Counting Crow"] = 262643  #Counting Crows
keep["Louise Attague"] = 374074  #Louise Attaque
keep["Osborne Brothers"] = 1088676  #The Osborne Brothers
keep["Roman Flugel"] = 13111  #Roman Flügel
keep["Rubettes"] = 240902  #The Rubettes
keep["Bart Kaell"] = 446710  #Bart Kaëll
keep["Slongs Dievanongs "] = 4435213  #Slongs Dievanongs
keep["Ez Special"] = 148110  #EZ Special
keep["Reelists"] = 154054  #The Reelists
keep["King Gidra"] = 385694  #King Giddra
keep["Zoe Birkett"] = 320018  #Zoë Birkett
keep["D!-Nation"] = 1599356  #D!Nation
keep["Von Bondies"] = 313529  #The Von Bondies
keep["Weather Girls"] = 80134  #The Weather Girls
keep["Levellers"] = 23672  #The Levellers
keep["Mj Cole"] = 1515  #MJ Cole
keep["Paddingtons"] = 337981  #The Paddingtons
keep["Long Blondes"] = 245214  #The Long Blondes
keep["Sunshine Underground"] = 361710  #The Sunshine Underground
keep["Unklejam"] = 774556  #UnkleJam
keep["Dave Clark Five"] = 329252  #The Dave Clark Five
keep["Boney M"] = 235979  #Boney M.
keep["Trustcompany"] = 279197  #Trust Company
keep["K`Jon"] = 211210  #K'Jon
keep["New Pornographers"] = 363443  #The New Pornographers
keep["Grace Vanderwaal"] = 5436370  #Grace VanderWaal
keep["Lil Peep"] = 5378070  #Lil' Peep
keep["Superjesus"] = 273378  #The Superjesus
keep["Dissociatives"] = 233572  #The Dissociatives
keep["Sleepy Jackson"] = 138068  #The Sleepy Jackson
keep["McClymonts"] = 2376183  #The McClymonts
keep["Jezabels"] = 1704414  #The Jezabels
keep["Raining Pleausure"] = 637513  #Raining Pleasure
keep["Loredana Berte'"] = 366682  #Loredana Bertè
keep["Claduio Baglioni"] = 638411  #Claudio Baglioni
keep["Papa Levante"] = 388519  #Papá Levante
keep["Dj Marta"] = 198294  #DJ Marta
keep["Mendonca Do Rio"] = 783996  #Mendonça Do Rio
keep["Epila"] = 1608899  #Epilä
keep["Man-Eating Tree"] = 2289900  #The Man-Eating Tree
keep["Jon Norgaard"] = 845262  #Jon Nørgaard
keep["Black League"] = 404409  #The Black League
keep["Pate Mustajarvi"] = 477874  #Pate Mustajärvi
keep["Brand New Heavies"] = 1396  #The Brand New Heavies
keep["Erik E"] = 9647  #Erick E
keep["Dj Mobster"] = 943953  #DJ Mobster
keep["Frank Ti-aya"] = 583397  #Frank Ti-Aya
keep["Osten Med Resten"] = 704765  #Östen Med Resten
keep["Ape"] = 404433  #Apse
keep["Tough Alliance"] = 305005  #The Tough Alliance
keep["Kristet Utseende"] = 360021  #The Kristet Utseende
keep["Kalomoira"] = 2270989  #Kalomira

In [None]:
keep["Ms Dynamite"] = 24059  #Ms. Dynamite
keep["Rene Froger"] = 283021  #René Froger
keep["VanVelzen"] = 702319  #Vanvelzen
keep["Fratellis"] = 472288  #The Fratellis
keep["Andre Hazes"] = 282287  #André Hazes
keep["Kat-tun"] = 2884266  #Kat-Tun
keep["Jean Roch"] = 1368909  #Jean-Roch
keep["TikTak"] = 383787  #Tiktak
keep["L.E.J."] = 4777768  #L.E.J
keep["Pablo Lopez"] = 3465939  #Pablo López
keep["Agnetha Faltskog"] = 149038  #Agnetha Fältskog
keep["Kapten Rod"] = 1104466  #Kapten Röd
keep["BBmak"] = 231074  #BBMak
keep["Dj Jose"] = 53783  #DJ Jose
keep["Gigi D'alessio"] = 839027  #Gigi D'Alessio
keep["Terasbetoni"] = 333752  #Teräsbetoni
keep["Kim-lian"] = 323824  #Kim-Lian
keep["Jose Feliciano"] = 465889  #José Feliciano
keep["Asteroids Galaxy Tour"] = 865897  #The Asteroids Galaxy Tour
keep["Strumbellas"] = 3705444  #The Strumbellas
keep["Pep`s"] = 701116  #Pep's
keep["Gue Pequeno"] = 1216738  #Guè Pequeno
keep["Dead By April"] = 1477009  #Dead by April
keep["Luca Hanni"] = 2779963  #Luca Hänni
keep["Noir Desir"] = 85007  #Noir Désir
keep["Jean Pascal"] = 614758  #Jean-Pascal
keep["Herbert Groenemeyer"] = 163850  #Herbert Grönemeyer
keep["Nina Pastori"] = 925146  #Niña Pastori
keep["Saddle Club"] = 2956368  #The Saddle Club
keep["Pariisin Kevat"] = 1984505  #Pariisin Kevät
keep["Barbara Streisand"] = 53248  #Barbra Streisand
keep["Bohse Onkelz"] = 262898  #Böhse Onkelz
keep["Manolo Garcia"] = 705504  #Manolo García
keep["Ordinary Boys"] = 366147  #The Ordinary Boys
keep["Dj Tatana"] = 13864  #DJ Tatana
keep["Peter Joback"] = 343362  #Peter Jöback
keep["Loic Nottet"] = 4355587  #Loïc Nottet
keep["Sander Van Doorn"] = 183267  #Sander van Doorn
keep["Youngbloodz"] = 209479  #YoungBloodZ
keep["A$AP Ferg"] = 2503395  #ASAP Ferg
keep["Benassi Bros"] = 171187  #Benassi Bros.
keep["Watermat"] = 3856896  #Watermät
keep["Asian Kung-fu Generation"] = 289562  #Asian Kung-Fu Generation
keep["Hi-Tack"] = 364150  #Hi_Tack
keep["Hubert Von Goisern"] = 446821  #Hubert von Goisern
keep["Lee Dewyze"] = 2016926  #Lee DeWyze
keep["Sofia Essaidi"] = 1400771  #Sofia Essaïdi
keep["Elodie Frege"] = 958245  #Elodie Frégé
keep["Jurgen Drews"] = 109458  #Jürgen Drews
keep["Jorge Vercilo"] = 1609339  #Jorge Vercillo
keep["Futureheads"] = 132795  #The Futureheads
keep["Jim Stark"] = 273036  #Jim Stärk
keep["Andre Sardet"] = 1324640  #André Sardet
keep["Mago De Oz"] = 691599  #Mägo De Oz
keep["Kapasiteettiyksikko"] = 264342  #Kapasiteettiyksikkö

In [None]:
keys = ["Click Five", "Gospellers", "Temper Trap", "Cranberries", "Matthew Good Band", "Esmee Denters", "David Demaria",
        "Hoosiers", "L'Arc-en-Ciel", "Dj Aligator Project", "Diana Degarmo", "Adelen", "Madden Brothers", "Grupo Revelacao",
        "Brolle Jr.", "Beatfreakz", "Star Academy 2"]
for key in keys:
    keep[key] = singleResult[key][0]

In [None]:
keys = ["Potbelleez", "Los Autenticos Decadentes", "Gunther", "Star Academy 5", "Pedro Capo", "Dandy Warhols",
        "Postal Service", "Tea Party", "B 3", "Dj Tomekk", "M-kids", "Waldo`s People", "Lil Kleine", "Laura Narhi", "TopGunn", 
        "Joaquin Sabina", "Raconteurs", "Monica Naranjo", "Patrick Sebastien", "Molly Sanden", "Miguel Bose"]
for key in keys:
    keep[key] = singleResult[key][0]

In [None]:
keys = ['Petri Nygard', 'Blackbear', 'Den Svenska Bjornstammen', 'Baseballs', 'Sandra Van Nieuwland', 'Ian Carey Project',
        "BossHoss", "Max Gazze", "MoTrip", "Les Enfoires", "Lil Bow Wow", "Frero Delavega", "Kumi Kouda", "Bjork", 
        "RedFoo", "Star Academy 4", "John Butler Trio", "Gregory Lemarchal"]
for key in keys:
    keep[key] = singleResult[key][0]

In [None]:
keys = ['AronChupa', 'Sexion D`Assaut', 'Die Arzte', "Bro'sis", "Maneskin", "Royksopp", "Opposites", "OpShop", 
        "Jennifer Pena", "Paul Van Dyk", "La 5ta Estacion", "Supermen Lovers", "Keshia Chante", "Special D",
        "Kinki Kids", "Neighbourhood", "No Te Va A Gustar", "Ll Cool J", "Jeroen Van Der Boom", "Star Academy 3",
        "Miguel Angel Munoz", "Tito El Bambino", "Andres Calamaro", "A$AP Rocky", "Boogie Pimps"]
for key in keys:
    keep[key] = singleResult[key][0]

In [None]:
keys = ['DJ Otzi', 'DJ Bobo', 'R.I.O.', 'Chimene Badi', 'Underdog Project', 'Thalia', 'Mr Children', 'Lonely Island', 'Tragedie',
        'Burhan G', 'Mans Zelmerlow', 'Christophe Mae', 'Marco Antonio Solis', 'Sohne Mannheims', 'US5', 'Lars Winnerback',
        'Alejandro Fernandez', 'Mis-teeq', 'Gestort Aber Geil', 'Herbert Gronemeyer', "Shy'M", "Hakan Hellstrom",
        "Keen`V", "Ting Tings", "Frankie J", "K 3", "Common Linnets", "Coeur De Pirate", "Julien Dore", 'Dani Martin', 'Di-rect']
for key in keys:
    keep[key] = singleResult[key][0]

In [None]:
keys = ['T.A.T.U.', 'Outkast', 'Mr Probz', 'Michael Buble', 'B.o.B', 'Michel Telo', 'Pablo Alboran', 'O-zone', 'Amy Macdonald',
        'Lumineers', 'DJ Tiesto', 'Chainsmokers','MadCon', 'Mylene Farmer', 'XXXTentacion', 'Dj Sammy', 'Sophie Ellis Bextor',
        'Veronicas', 'In Grid', 'Chemical Brothers', 'K-Maro', 'Collectif Metisse', 'Matt Pokora', 'White Stripes', 'Bebe Lilly',
        'Calling', 'Saturdays', 'Christina Sturmer', 'All-American Rejects', 'Dan Balan', 'J Balvin', 'Elena Paparizou', 
        'Nadiya', 'Blink 182', 'Rene La Taupe', 'DonkeyBoy', 'Rolling Stones', 'Sigur Ros', 'Israel Kamakawiwoole', 'Armin Van Buuren']
for key in keys:
    keep[key] = singleResult[key][0]

In [None]:
print(keep)
saveFile(idata=keep, ifile="known.p", debug=True)

In [None]:
keep = getFile("known.p")
saveFile(idata=keep, ifile="singleRenames.p")
print(keep)

In [None]:
multiGet  = {}
singleGet = {}
matchGet  = {}


for i, (artist, cnt) in enumerate(slimResults.most_common()):    
    artistAlbums = set(list(fullResults[artist]["Songs"].keys()) + list(fullResults[artist]["Albums"].keys()))
    matches = mulArts.getArtistNames(artist)
    for match in matches.keys():
        if multiGet.get(match) is not None:
            continue
        if singleGet.get(match) is not None:
            continue
        if matchGet.get(match) is not None:
            continue
        mdata = getMusicData("DiscArtist", match)
        if not isinstance(mdata, DataFrame):
            if len(matches) == 1:
                singleGet[match] = artistAlbums
                print("{0: <20}{1: <30}{2: <30}{3}\tSingle".format("{0} / {1}".format(i,len(slimResults)), match, artist, cnt))
            else:
                multiGet[match] = artistAlbums
                print("{0: <20}{1: <30}{2: <30}{3}\tMulti".format("{0} / {1}".format(i,len(slimResults)), match, artist, cnt))
        else:
            if mdata.shape[0] == 1:
                continue
            else:
                matchGet[match] = artistAlbums
                print("{0: <20}{1: <30}{2: <30}{3}\tMatch".format("{0} / {1}".format(i,len(slimResults)), match, artist, cnt))
    if i > 1000:
        break

In [None]:
saveFile(idata=multiGet, ifile="multiToGet.p")
saveFile(idata=singleGet, ifile="singleGet.p")
saveFile(idata=matchGet, ifile="matchGet.p")

In [None]:
for artistName, artistResults in multiGet.items():
    results = getBestArtistMatch(artistName, artistResults, N=10, cutoff=0.6)
    print(artistName,'\t',results)

In [None]:
%load_ext autoreload
%autoreload

from artists import artists
disc = discogs()
arts = artists(disc)
for artistName in singleGet.keys():
    print("===========>",artistName)
    arts.searchDiscogForArtist(artistName)

In [None]:
from collections import Counter
x = Counter({'a': 0, 'b': 0.5, 'c': 0.75})
x.most_common(1)

In [None]:
getBestArtistMatch("Beyonce", {'Crazy In Love', 'Deja Vu', 'Drunk In Love'})

In [None]:
ratVal = 0.6
if len(myMusicNameIDMap) > 0:
    ratVal = 0.3
artistIDMap = {}
for artistName,artistSlimData in multiMap.items():
    print("\n","="*50)
    print("ArtistName: {0}".format(artistName))
    print("   Albums: {0}".format(len(artistSlimData["Albums"])))

    myAlbumNames = []
    for album in artistSlimData["Albums"]:
        myAlbumName = album.split("/{0}/".format(artistName))[-1]
        #print("     {0: <15}{1: <10}{2}".format("", "", myAlbumName))
        myAlbumNames.append(myAlbumName)
        
    for idx,row in artistSlimData["DB"].iterrows():
        artistAlbumsData = getRowByIndex(artistAlbumsDB, idx)
        artistAlbums     = artistAlbumsData["Albums"]
        print("   Match: {0}  [{1}]".format(row["DiscArtist"], row["Name"]))
        if isinstance(artistAlbums, dict):
            for albumType, albumTypeData in artistAlbums.items():
                for albumID, dbAlbumName in albumTypeData.items():
                    for myAlbumName in myAlbumNames:
                        s = SequenceMatcher(None, myAlbumName, dbAlbumName)
                        ratio = s.ratio()
                        if ratio > 0.6:
                            print("     {0: <15}{1: <10}{2: <8}{3}".format(albumType, albumID, round(ratio,2), dbAlbumName))
                            artistIDMap[artistName] = [idx, row["Name"]]

In [None]:
daytype = {"USA Albums": ["SAT", "%Y-%m-%d"],
           "USA Singles Top 40": ["SAT", "%Y-%m-%d"]}
daytype = {"UK Singles Top 40": ["SAT", "%Y-%m-%d"]}
daytype = {"Top40-Charts.com Web Top 100": ["SAT", "%Y-%m-%d"]}
daytype = {"Airplay World Official Top 100": ["SAT", "%Y-%m-%d"]}

#daytype = {"USA Singles Top 40": ["SAT", "%Y-%m-%d"]}

from pandas import Timestamp
def getDates(daytype, cname):
    dates = []
    if cname == "USA Singles Top 40":
        dates1 = date_range(start='1997-07-07', end="2000-04-01", freq="W-MON").strftime("%Y-%m-%d")
        dates2 = date_range(start="2000-04-01", end=Timestamp.today(), freq="W-SAT").strftime("%Y-%m-%d")
        dates = list(dates1) + list(dates2)
    if cname == "UK Singles Top 40":
        dates1 = date_range(start="2002-12-08", end='2011-11-12', freq="W-SUN").strftime("%Y-%m-%d")
        dates2 = date_range(start="2011-11-12", end=Timestamp.today(), freq="W-SAT").strftime("%Y-%m-%d")
        dates = list(dates1) + list(dates2)        
    if cname == "Top40-Charts.com Web Top 100":
        dates1 = date_range(start="2002-10-14", end='2005-12-26', freq="W-MON").strftime("%Y-%m-%d")
        dates2 = date_range(start="2009-02-28", end=Timestamp.today(), freq="W-SAT").strftime("%Y-%m-%d")
        dates = list(dates1) + list(dates2)
    if cname == "Airplay World Official Top 100":
        dates1 = date_range(start="2002-01-07", end='2002-01-21', freq="W-MON").strftime("%Y-%m-%d")
        dates2 = date_range(start="2002-02-02", end=Timestamp.today(), freq="W-SAT").strftime("%Y-%m-%d")
        dates = list(dates1) + list(dates2)
    return dates

In [None]:
def showDates(year, day):
    return date_range(start=str(year), end=str(int(year)+1), freq='W-{0}'.format(day))
    
showDates(2011, 'SUN')

In [None]:
from time import sleep
for cid,cname in charts.items():
    if daytype.get(cname) is None:
        continue
    dates = getDates(daytype, cname)
    
    ##savedir  = join(basedir, "data", "top40", "starters")
    ##savename = join(savedir, "{0}.p".format(cname.replace("/", " ")))
    ##starter  = getHTML(savename)
    #dates    = getDates(starter)
    
    for idts,datename in enumerate(dates):
        
        if idts > 2000:
            break
        url      = "https://top40-charts.com/chart.php?cid={0}&date={1}".format(cid, datename)
        user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
        headers={'User-Agent':user_agent,}
    
        savedir  = join(basedir, "data", "top40", cname.replace("/", " "))
        mkDir(savedir)

        savename = setFile(savedir, "{0}.p".format(datename))
        if isFile(savename):
            continue

        request=urllib.request.Request(url,None,headers) #The assembled request
        response = urllib.request.urlopen(request)
        data = response.read() # The data u need

        print("URL ---> {0}".format(url))
        print(idts,'/',len(dates),"\tSaving {0}".format(savename))
        saveJoblib(data=data, filename=savename, compress=True)
        sleep(3)
        

#list(all_sundays(2001))

In [None]:
from time import sleep
for cid,cname in charts.items():
    if daytype.get(cname) is None:
        continue    
    dates = getDates(daytype, cname)
    
    for idts,datename in enumerate(dates):
        savedir  = join(basedir, "data", "top40", cname.replace("/", " "))
        savename = setFile(savedir, "{0}.p".format(datename))

        chartData = getHTML(savename)
        results = getChartData(chartData, debug=False)
        if len(results) == 0:
            print(cname,'\t\t',datename,'\t',len(results)," <<-------")
        else:
            print(cname,'\t\t',datename,'\t',len(results))

# Process Chart Data

In [None]:
savedir  = join(basedir, "data", "top40")
dirnames = [x for x in findDirs(savedir) if x.endswith("starters") == False]
for dirname in dirnames:
    files = findExt(dirname, ext="*")
    for ifile in files:
        src = ifile
        dst = "{0}.p".format(ifile)
        moveFile(src, dst)


In [None]:
savedir  = join(basedir, "data", "top40")
dirnames = [x for x in findDirs(savedir) if x.endswith("starters") == False]
for dirname in dirnames:
    files = findExt(dirname, ext="*.p")
    for ifile in files:
        chartData = getHTML(ifile)
        break

In [None]:
chartData

In [None]:
chartData = getHTML("/Users/tgadfort/Documents/code/charts/data/top40/test.html")

In [None]:
def getChartData(chartData):
    artists = []
    titles = []
    
    trs = chartData.findAll("tr", {"class": "latc_song"})    
    for xs in [x.findAll("a", {"title": "View song details"}) for x in trs]:
        titles.append([x.text for x in xs if x.find("img") == None][0])

    for xs in [x.findAll("a", {"style": "text-decoration: none; "}) for x in trs]:
        artists.append(xs[0].text)
    
    retval = dict(zip(artists, titles))
    return retval

In [None]:

len(chartData.findAll('table'))

In [None]:
from searchUtils import findExt

In [None]:
files = findExt("data/top40/World Singles Official Top 100/", ext=".p")

In [None]:
for ifile in files[:5]:
    print(ifile)
    chartData = getHTML(ifile)
    
    for it,x in enumerate(chartData.findAll("table")):
        trs = x.findAll("tr", {"class": "latc_song"})
        for itr,tr in enumerate(trs):
            tds = tr.findAll("td")
            for itd,td in enumerate(tds):
                hrefs = td.findAll("a")
                for ihref,href in enumerate(hrefs):
                    print(it,'\t',itr,'\t',itd,'\t',ihref,'\t',href.text)

In [None]:
for it,x in enumerate(chartData.findAll("table")):
    print(it,len(x))

In [None]:
chartData.findAll("table")[8]

In [None]:
files = findExt("data/top40/World Singles Official Top 100/", ext=".p")

In [None]:
files[0]

In [None]:
getHTML(files[0])

In [None]:
def all_sundays(year):
# January 1st of the given year
       dt = date(year, 1, 1)
# First Sunday of the given year       
       dt += timedelta(days = 6 - dt.weekday())  
       while dt.year == year:
            yield dt
            dt += timedelta(days = 7)
            
for s in all_sundays(2020):
    m = s.month
    d = s.day
    y = s.year
    print(s.strftime("%d-%m-%Y"))
    print(type(s))

In [None]:
d = "2002-01-07"

In [None]:
files = findExt("data/top40/old/World Singles Official Top 100/", ext=".p")

In [None]:
bs = getHTML(files[0])

In [None]:
bs.find("Iglesias")

In [None]:
url = "https://top40-charts.com/chart.php?cid=35&date=2002-01-07"

In [None]:
user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
headers={'User-Agent':user_agent,}

savename = setFile(savedir, "mytest.p")

request=urllib.request.Request(url,None,headers) #The assembled request
response = urllib.request.urlopen(request)
data = response.read() # The data u need

print(idts,'/',len(dates),"\tSaving {0}".format(savename))
saveJoblib(data=data, filename=savename, compress=True)

In [None]:
str(data).find("Enya")