# Top 40 Charts Functions

In [1]:
## Basic stuff
%load_ext autoreload
%autoreload
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))

## Python Version
import sys
from glob import glob
from os import getcwd
from os.path import join
from fileUtils import getBasename, getDirname, getBaseFilename
from fsUtils import isFile, isDir, moveFile, removeFile, mkDir, setFile, moveFile
from searchUtils import findDirs, findExt, findNearest

################################################################################
## General Stuff
################################################################################
from timeUtils import clock, elapsed
from webUtils import getHTML, getWebData
from timeUtils import getDateTime, isDate
from ioUtils import saveJoblib, loadJoblib, saveFile, getFile
import urllib
from time import sleep

################################################################################
## Database Stuff
################################################################################
from dbBase import dbBase
from mainDB import mainDB
from multiArtist import multiartist
from matchAlbums import matchAlbums
from masterdb import masterdb



################################################################################
## Music Stuff
################################################################################
from myMusicDBMap import myMusicDBMap
from musicBase import myMusicBase
from matchMyMusic import matchMyMusic
from matchMusicName import myMusicName
from mergeDB import searchForMutualDBEntries, searchForMutualArtistDBEntries


################################################################################
## Chart Stuff
################################################################################
from top40charts import top40, top40chart, top40starter
from matchChartMusic import matchChartMusic
from fullCharts import fullCharts

################################################################################
## Pandas Stuff
################################################################################
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

print("Python: {0}".format(sys.version))

from pandas import date_range
basedir = getcwd()
print("Basedir = {0}".format(basedir))

import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))



Python: 3.7.7 (default, Mar 26 2020, 10:32:53) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
Basedir = /Users/tgadfort/Documents/code/charts
Notebook Last Run Initiated: 2020-10-02 21:37:20.264491


# Global Paramters

In [2]:
download = False

### Find Charts

In [3]:
t40 = top40()
t40starter = top40starter()
t40starter.parse()
t40starter.chartIDs

{'USA Singles Top 40': {'ID': 27,
  'URL': 'https://top40-charts.com/chart.php?cid=27'},
 'UK Singles Top 40': {'ID': 25,
  'URL': 'https://top40-charts.com/chart.php?cid=25'},
 'Top40-Charts.com Web Top 100': {'ID': 39,
  'URL': 'https://top40-charts.com/chart.php?cid=39'},
 'China Top 20': {'ID': 42,
  'URL': 'https://top40-charts.com/chart.php?cid=42'},
 'German Top 40': {'ID': 12,
  'URL': 'https://top40-charts.com/chart.php?cid=12'},
 'Japan Top 20': {'ID': 16,
  'URL': 'https://top40-charts.com/chart.php?cid=16'},
 'Australia Top 20': {'ID': 4,
  'URL': 'https://top40-charts.com/chart.php?cid=4'},
 'Brazil Top 20': {'ID': 8, 'URL': 'https://top40-charts.com/chart.php?cid=8'},
 'Greece Top 20': {'ID': 2, 'URL': 'https://top40-charts.com/chart.php?cid=2'},
 'New Zealand Top 20': {'ID': 17,
  'URL': 'https://top40-charts.com/chart.php?cid=17'},
 'Bulgaria Top 20': {'ID': 49,
  'URL': 'https://top40-charts.com/chart.php?cid=49'},
 'Portugal Top 20': {'ID': 20,
  'URL': 'https://top40

### Download Chart Starter Files

In [4]:
for chartName, chartData in t40starter.chartIDs.items():
    chartURL = chartData["URL"]
    chartID  = chartData["ID"]
    
    ## Uncomment to run
    if download:
        t40chart = top40chart(chartID, chartName, chartURL)
        t40chart.downloadStarterChart()

### Download Chart Data

In [5]:
for chartName, chartData in t40starter.chartIDs.items():
    chartURL = chartData["URL"]
    chartID  = chartData["ID"]
    if download:
        t40chart = top40chart(chartID, chartName, chartURL)
        t40chart.downloadChartDates()

### Get Charts Not Previously Known

In [12]:
for chartName, chartData in t40starter.chartIDs.items():
    chartURL  = chartData["URL"]
    chartID   = chartData["ID"]
    t40chart  = top40chart(chartID, chartName, chartURL)
    newCharts = t40chart.getCharts()
    break
print("Found {0} new charts".format(len(newCharts)))

for chartName, chartData in newCharts.items():
    chartURL  = chartData["URL"]
    chartID   = chartData["ID"]
    t40chart  = top40chart(chartID, chartName, chartURL)
    if download:
        t40chart.downloadStarterChart()
            
if download:
    for chartName, chartData in newCharts.items():
        chartURL = chartData["URL"]
        chartID  = chartData["ID"]
        t40chart = top40chart(chartID, chartName, chartURL)
        t40chart.downloadChartDates()
            
    for chartName, chartData in newCharts.items():
        chartURL = chartData["URL"]
        chartID  = chartData["ID"]
        t40chart = top40chart(chartID, chartName, chartURL)
        t40chart.downloadChartDates()
        
    t40Charts = {**t40starter.chartIDs, **newCharts}
    len(t40Charts)

Found 48 new charts


### Combine All Charts

In [15]:
print("Found {0} old charts".format(len(t40starter.chartIDs)))
print("Found {0} new charts".format(len(newCharts)))
t40Charts = {**t40starter.chartIDs, **newCharts}
print("Found {0} all charts".format(len(t40Charts)))

Found 12 old charts
Found 48 new charts
Found 48 all charts


### Parse Chart Data

In [16]:
if download:
    for chartName, chartData in t40Charts.items():
        print("==> {0}".format(chartName))
        chartURL = chartData["URL"]
        chartID  = chartData["ID"]
        t40chart = top40chart(chartID, chartName, chartURL)
        t40chart.parseCharts()

***
****

# Match and Flag Artists

In [19]:
def getArtistRenames():
    artistRenames = getFile("masterRename.yaml")
    return artistRenames

In [22]:
%load_ext autoreload
%autoreload

artistRenames = getArtistRenames()
from fullCharts import fullCharts
fCharts = fullCharts(t40Charts, ctype=None, country=None, minYear=1, maxYear=20100) 
fCharts.setRenames(artistRenames)
fCharts.setFullChartData()
fCharts.setArtistAlbumData()
artistAlbumData = fCharts.getArtistAlbumData()
saveFile(idata=artistAlbumData, ifile="currentTop40ArtistAlbumData.p", debug=True)
fullChartData = fCharts.getFullChartData()
saveFile(idata=fullChartData, ifile="currentTop40FullChartArtistAlbumData.p", debug=True)

_, _ = clock("Last Run")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
==> USA Singles Top 40                      	1741
==> UK Singles Top 40                       	3523
==> Top40-Charts.com Web Top 100            	5218
==> China Top 20                            	5395
==> German Top 40                           	5940
==> Japan Top 20                            	6690
==> Australia Top 20                        	7110
==> Brazil Top 20                           	7511
==> Greece Top 20                           	8015
==> New Zealand Top 20                      	8294
==> Bulgaria Top 20                         	8660
==> Portugal Top 20                         	8931
==> Airplay World Official Top 100          	9861
==> Argentina Top 20                        	10135
==> Austria Top 20                          	10552
==> Belgium Top 20                          	10939
==> Canada Top 20                           	11192
==> Chile Top 20                            	11487
==> Den

***
***
***