# Top 40 Charts Functions

In [4]:
## Basic stuff
%load_ext autoreload
%autoreload

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))
#IPython.Cell.options_default.cm_config.lineNumbers = true;

################################################################################
## Python Version
################################################################################
import sys


################################################################################
## General Stuff
################################################################################
from multiprocessing import Pool
from tqdm import tqdm


################################################################################
## Util Stuff
################################################################################
from timeUtils import clock, elapsed
from ioUtils import saveFile, getFile


################################################################################
## Music DB
################################################################################
from mainDB import mainDB
from musicDBMap import musicDBMap
from masterDBMatchClass import masterDBMatchClass
from matchDBArtist import matchDBArtist


################################################################################
## Music Names
################################################################################
from masterArtistNameDB import masterArtistNameDB


################################################################################
## Chart Stuff
################################################################################
from artistIgnores import getArtistIgnores
from billboardData import billboardData
from top40Data import top40Data


################################################################################
## Pandas Stuff
################################################################################
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

print("Python: {0}".format(sys.version))
import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Python: 3.7.7 (default, Mar 26 2020, 10:32:53) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
Notebook Last Run Initiated: 2020-12-12 16:08:27.781433


# Final Aggregation

In [5]:
%load_ext autoreload
%autoreload
from top40Data import top40Data
td = top40Data(minYear=1, maxYear=2021)
td.setChartUsage(rank=[0])
td.setFullChartData()
td.setArtistAlbumData()
td.saveArtistAlbumData()
td.saveFullChartData()

_, _ = clock("Last Run")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
  Getting Chart For usa
  Using 2 Charts
  Using 2 Charts
  Using Charts (None): ['USA Singles Top 40', 'USA Albums']
Found 49 files.
==> USA Singles Top 40                      	1755
==> USA Albums                              	2926
Renamed 0 artists
Most Common Artists:
Saving 2926 Artist Album Data to currentTop40ArtistAlbumData.p
Saving data to currentTop40ArtistAlbumData.p
  --> This file is 149.7kB.
Saved data to currentTop40ArtistAlbumData.p
  --> This file is 149.7kB.
Saving 2926 Full Artist Data
Saving data to currentTop40FullChartArtistAlbumData.p
  --> This file is 489.6kB.
Saved data to currentTop40FullChartArtistAlbumData.p
  --> This file is 489.6kB.
Current Time is Sat Dec 12, 2020 16:08:34 for Last Run


# Global Paramters

In [4]:
download = False

### Find Charts

In [5]:
t40 = top40()
t40starter = top40starter()
t40starter.parse()
t40starter.chartIDs

{'USA Singles Top 40': {'ID': 27,
  'URL': 'https://top40-charts.com/chart.php?cid=27'},
 'UK Singles Top 40': {'ID': 25,
  'URL': 'https://top40-charts.com/chart.php?cid=25'},
 'Top40-Charts.com Web Top 100': {'ID': 39,
  'URL': 'https://top40-charts.com/chart.php?cid=39'},
 'China Top 20': {'ID': 42,
  'URL': 'https://top40-charts.com/chart.php?cid=42'},
 'German Top 40': {'ID': 12,
  'URL': 'https://top40-charts.com/chart.php?cid=12'},
 'Japan Top 20': {'ID': 16,
  'URL': 'https://top40-charts.com/chart.php?cid=16'},
 'Australia Top 20': {'ID': 4,
  'URL': 'https://top40-charts.com/chart.php?cid=4'},
 'Brazil Top 20': {'ID': 8, 'URL': 'https://top40-charts.com/chart.php?cid=8'},
 'Greece Top 20': {'ID': 2, 'URL': 'https://top40-charts.com/chart.php?cid=2'},
 'New Zealand Top 20': {'ID': 17,
  'URL': 'https://top40-charts.com/chart.php?cid=17'},
 'Bulgaria Top 20': {'ID': 49,
  'URL': 'https://top40-charts.com/chart.php?cid=49'},
 'Portugal Top 20': {'ID': 20,
  'URL': 'https://top40

### Download Chart Starter Files

In [6]:
for chartName, chartData in t40starter.chartIDs.items():
    chartURL = chartData["URL"]
    chartID  = chartData["ID"]
    
    ## Uncomment to run
    if download:
        t40chart = top40chart(chartID, chartName, chartURL)
        t40chart.downloadStarterChart()

### Download Chart Data

In [7]:
for chartName, chartData in t40starter.chartIDs.items():
    chartURL = chartData["URL"]
    chartID  = chartData["ID"]
    if download:
        t40chart = top40chart(chartID, chartName, chartURL)
        t40chart.downloadChartDates()

### Get Charts Not Previously Known

In [8]:
for chartName, chartData in t40starter.chartIDs.items():
    chartURL  = chartData["URL"]
    chartID   = chartData["ID"]
    t40chart  = top40chart(chartID, chartName, chartURL)
    newCharts = t40chart.getCharts()
    break
print("Found {0} new charts".format(len(newCharts)))

for chartName, chartData in newCharts.items():
    chartURL  = chartData["URL"]
    chartID   = chartData["ID"]
    t40chart  = top40chart(chartID, chartName, chartURL)
    if download:
        t40chart.downloadStarterChart()
            
if download:
    for chartName, chartData in newCharts.items():
        chartURL = chartData["URL"]
        chartID  = chartData["ID"]
        t40chart = top40chart(chartID, chartName, chartURL)
        t40chart.downloadChartDates()
            
    for chartName, chartData in newCharts.items():
        chartURL = chartData["URL"]
        chartID  = chartData["ID"]
        t40chart = top40chart(chartID, chartName, chartURL)
        t40chart.downloadChartDates()
        
    t40Charts = {**t40starter.chartIDs, **newCharts}
    len(t40Charts)

Found 48 new charts


### Combine All Charts

In [9]:
print("Found {0} old charts".format(len(t40starter.chartIDs)))
print("Found {0} new charts".format(len(newCharts)))
t40Charts = {**t40starter.chartIDs, **newCharts}
print("Found {0} all charts".format(len(t40Charts)))

Found 12 old charts
Found 48 new charts
Found 48 all charts


### Parse Chart Data

In [10]:
if download:
    for chartName, chartData in t40Charts.items():
        print("==> {0}".format(chartName))
        chartURL = chartData["URL"]
        chartID  = chartData["ID"]
        t40chart = top40chart(chartID, chartName, chartURL)
        t40chart.parseCharts()

In [23]:
t40Charts

{'USA Singles Top 40': {'ID': '27',
  'URL': 'https://top40-charts.com/chart.php?cid=27'},
 'UK Singles Top 40': {'ID': '25',
  'URL': 'https://top40-charts.com/chart.php?cid=25'},
 'Top40-Charts.com Web Top 100': {'ID': '39',
  'URL': 'https://top40-charts.com/chart.php?cid=39'},
 'China Top 20': {'ID': '42',
  'URL': 'https://top40-charts.com/chart.php?cid=42'},
 'German Top 40': {'ID': '12',
  'URL': 'https://top40-charts.com/chart.php?cid=12'},
 'Japan Top 20': {'ID': '16',
  'URL': 'https://top40-charts.com/chart.php?cid=16'},
 'Australia Top 20': {'ID': '4',
  'URL': 'https://top40-charts.com/chart.php?cid=4'},
 'Brazil Top 20': {'ID': '8',
  'URL': 'https://top40-charts.com/chart.php?cid=8'},
 'Greece Top 20': {'ID': '2',
  'URL': 'https://top40-charts.com/chart.php?cid=2'},
 'New Zealand Top 20': {'ID': '17',
  'URL': 'https://top40-charts.com/chart.php?cid=17'},
 'Bulgaria Top 20': {'ID': '49',
  'URL': 'https://top40-charts.com/chart.php?cid=49'},
 'Portugal Top 20': {'ID': '

***
****

# Match and Flag Artists

In [9]:
def getArtistRenames():
    artistRenames = getFile("masterRename.yaml")
    return artistRenames

def getDBRenames():
    dbRenames = getFile("../music/dbRenames.yaml")
    return dbRenames

In [34]:
#from top40Data import top40Data
td = top40Data(minYear=1, maxYear=2021)

In [35]:
td.setChartUsage(rank=0)
td.findFiles()
td.setFullChartData()

  Getting Chart For usa
  Using 2 Charts
  Using 2 Charts
  Using Charts (None): ['USA Singles Top 40', 'USA Albums']
Found 49 files.
Found 49 files.
==> USA Singles Top 40                      	1755
==> USA Albums                              	2926
Renamed 0 artists
Most Common Artists:


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
  Getting Chart For usa
  Using 2 Charts
  Using 2 Charts
  Getting Chart For uk
  Using 3 Charts
  Using 3 Charts
  Using Charts (None): ['USA Singles Top 40', 'USA Albums', 'UK Singles Top 40', 'UK Top 20 Albums', 'Canada Top 20']
Found 49 files.
==> USA Singles Top 40                      	1755
==> UK Singles Top 40                       	3543
==> Canada Top 20                           	3917
==> UK Top 20 Albums                        	4114
==> USA Albums                              	5035
Renamed 0 artists
Most Common Artists:
Saving 5035 Artist Album Data to currentTop40ArtistAlbumData.p
Saving data to currentTop40ArtistAlbumData.p
  --> This file is 252.1kB.
Saved data to currentTop40ArtistAlbumData.p
  --> This file is 252.1kB.
Saving 5035 Full Artist Data
Saving data to currentTop40FullChartArtistAlbumData.p
  --> This file is 954.6kB.
Saved data to currentTop40FullChartArtistAlbumData.p
  

In [10]:
%load_ext autoreload
%autoreload

artistRenames = getArtistRenames()
dbRenames     = getDBRenames()

from fullCharts import fullCharts
#fCharts = fullCharts(t40Charts, ctype=None, country=None, minYear=2000, maxYear=2005) 
fCharts = fullCharts(t40Charts, ctype=None, country=None, minYear=1, maxYear=2021) 
fCharts.setRenames(artistRenames)
fCharts.setRenames(dbRenames)
fCharts.setFullChartData()
fCharts.setArtistAlbumData()
artistAlbumData = fCharts.getArtistAlbumData()
saveFile(idata=artistAlbumData, ifile="currentTop40ArtistAlbumData.p", debug=True)
fullChartData = fCharts.getFullChartData()
saveFile(idata=fullChartData, ifile="currentTop40FullChartArtistAlbumData.p", debug=True)

_, _ = clock("Last Run")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
==> USA Singles Top 40                      	1754
==> UK Singles Top 40                       	3542
==> Top40-Charts.com Web Top 100            	5239
==> China Top 20                            	5417
==> German Top 40                           	5962
==> Japan Top 20                            	6719
==> Australia Top 20                        	7142
==> Brazil Top 20                           	7547
==> Greece Top 20                           	8053
==> New Zealand Top 20                      	8333
==> Bulgaria Top 20                         	8699
==> Portugal Top 20                         	8970
==> Airplay World Official Top 100          	9901
==> Argentina Top 20                        	10175
==> Austria Top 20                          	10593
==> Belgium Top 20                          	10981
==> Canada Top 20                           	11235
==> Chile Top 20                            	11532
==> Den

***
***
***