# Top 40 Charts Functions

In [18]:
## Basic stuff
%load_ext autoreload
%autoreload
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))

## Python Version
import sys
from glob import glob
from os import getcwd
from os.path import join
from fileUtils import getBasename, getDirname, getBaseFilename
from fsUtils import isFile, isDir, moveFile, removeFile, mkDir, setFile, moveFile
from searchUtils import findDirs, findExt, findNearest

################################################################################
## General Stuff
################################################################################
from timeUtils import clock, elapsed
from webUtils import getHTML, getWebData
from timeUtils import getDateTime, isDate
from ioUtils import saveJoblib, loadJoblib, saveFile, getFile
import urllib
from time import sleep

################################################################################
## Database Stuff
################################################################################
from dbBase import dbBase
from mainDB import mainDB
from multiArtist import multiartist
from matchAlbums import matchAlbums
from masterdb import masterdb



################################################################################
## Music Stuff
################################################################################
from myMusicDBMap import myMusicDBMap
from musicBase import myMusicBase
from matchMyMusic import matchMyMusic
from matchMusicName import myMusicName
from mergeDB import searchForMutualDBEntries, searchForMutualArtistDBEntries


################################################################################
## Chart Stuff
################################################################################
from top40charts import top40, top40chart, top40starter
from matchChartMusic import matchChartMusic
from fullCharts import fullCharts

################################################################################
## Pandas Stuff
################################################################################
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

print("Python: {0}".format(sys.version))

from pandas import date_range
basedir = getcwd()
print("Basedir = {0}".format(basedir))

import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Python: 3.7.7 (default, Mar 26 2020, 10:32:53) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
Basedir = /Users/tgadfort/Documents/code/charts
Notebook Last Run Initiated: 2020-07-31 22:46:02.173496


In [3]:
vals = ["hiY", 'yo', 'yes']
tofind = ['thomas', 'yesX please', 'hXiX thomas']

In [4]:
from listUtils import isIn

In [5]:
isIn(vals, tofind)

True

### Find Charts

In [6]:
t40 = top40()
t40starter = top40starter()
t40starter.parse()
t40starter.chartIDs

{'USA Singles Top 40': {'ID': 27,
  'URL': 'https://top40-charts.com/chart.php?cid=27'},
 'UK Singles Top 40': {'ID': 25,
  'URL': 'https://top40-charts.com/chart.php?cid=25'},
 'Top40-Charts.com Web Top 100': {'ID': 39,
  'URL': 'https://top40-charts.com/chart.php?cid=39'},
 'China Top 20': {'ID': 42,
  'URL': 'https://top40-charts.com/chart.php?cid=42'},
 'German Top 40': {'ID': 12,
  'URL': 'https://top40-charts.com/chart.php?cid=12'},
 'Japan Top 20': {'ID': 16,
  'URL': 'https://top40-charts.com/chart.php?cid=16'},
 'Australia Top 20': {'ID': 4,
  'URL': 'https://top40-charts.com/chart.php?cid=4'},
 'Brazil Top 20': {'ID': 8, 'URL': 'https://top40-charts.com/chart.php?cid=8'},
 'Greece Top 20': {'ID': 2, 'URL': 'https://top40-charts.com/chart.php?cid=2'},
 'New Zealand Top 20': {'ID': 17,
  'URL': 'https://top40-charts.com/chart.php?cid=17'},
 'Bulgaria Top 20': {'ID': 49,
  'URL': 'https://top40-charts.com/chart.php?cid=49'},
 'Portugal Top 20': {'ID': 20,
  'URL': 'https://top40

### Download Chart Starter Files

In [7]:
for chartName, chartData in t40starter.chartIDs.items():
    chartURL = chartData["URL"]
    chartID  = chartData["ID"]
    ## Uncomment to run
    # t40chart = top40chart(chartID, chartName, chartURL)
    # t40chart.downloadStarterChart()

### Download Chart Data

In [8]:
for chartName, chartData in t40starter.chartIDs.items():
    chartURL = chartData["URL"]
    chartID  = chartData["ID"]
    ## Uncomment to run
    #t40chart = top40chart(chartID, chartName, chartURL)
    #t40chart.downloadChartDates()

### Get Charts Not Previously Known

In [9]:
for chartName, chartData in t40starter.chartIDs.items():
    chartURL  = chartData["URL"]
    chartID   = chartData["ID"]
    t40chart  = top40chart(chartID, chartName, chartURL)
    newCharts = t40chart.getCharts()
    break
print("Found {0} new charts".format(len(newCharts)))

Found 48 new charts


In [10]:
for chartName, chartData in newCharts.items():
    chartURL  = chartData["URL"]
    chartID   = chartData["ID"]
    ## Uncomment to run
    # t40chart  = top40chart(chartID, chartName, chartURL)
    # t40chart.downloadStarterChart()

In [11]:
for chartName, chartData in newCharts.items():
    chartURL = chartData["URL"]
    chartID  = chartData["ID"]
    ## Uncomment to run
    #t40chart = top40chart(chartID, chartName, chartURL)
    #t40chart.downloadChartDates()

### Combine All Charts

In [12]:
t40Charts = {**t40starter.chartIDs, **newCharts}
len(t40Charts)

48

### Parse Chart Data

In [13]:
for chartName, chartData in t40Charts.items():
    print("==> {0}".format(chartName))
    chartURL = chartData["URL"]
    chartID  = chartData["ID"]
    ## Uncomment to run
    #t40chart = top40chart(chartID, chartName, chartURL)
    #t40chart.parseCharts()

==> USA Singles Top 40
==> UK Singles Top 40
==> Top40-Charts.com Web Top 100
==> China Top 20
==> German Top 40
==> Japan Top 20
==> Australia Top 20
==> Brazil Top 20
==> Greece Top 20
==> New Zealand Top 20
==> Bulgaria Top 20
==> Portugal Top 20
==> Airplay World Official Top 100
==> Argentina Top 20
==> Austria Top 20
==> Belgium Top 20
==> Canada Top 20
==> Chile Top 20
==> Denmark Top 20
==> Digital Sales Top 100
==> Europe Official Top 100
==> Finland Top 20
==> France Top 20
==> HeatSeekers Radio Tracks
==> Hispanic America Top 40
==> India Top 20
==> Ireland Top 20
==> Italy Top 20
==> Muchmusic Top 30
==> Netherlands Top 20
==> Norway Top 20
==> Russia Top 20
==> Spain Top 20
==> Sweden Top 20
==> Switzerland Top 20
==> Taiwan Top 10
==> UK Top 20 Albums
==> Ukraine Top 20
==> USA Albums
==> World Adult Top 20 Singles
==> World Country Top 20 Singles
==> World Dance / Trance Top 30 Singles
==> World Jazz Top 20 Singles
==> World Latin Top 30 Singles
==> World Modern Rock Top

***
****

# Match and Flag Artists

In [14]:
def getArtistRenames():
    artistRenames = getFile("masterRename.yaml")
    return artistRenames
artistRenames = getArtistRenames()



In [20]:
%load_ext autoreload
%autoreload

from fullCharts import fullCharts
fCharts = fullCharts(t40Charts, ctype="Singles", country=None, minYear=2020)
fCharts.setRenames(artistRenames)
fCharts.setFullChartData()
fCharts.setArtistAlbumData()
artistAlbumData = fCharts.getArtistAlbumData()
saveFile(idata=artistAlbumData, ifile="currentArtistAlbumData.p", debug=True)
fullChartData = fCharts.getFullChartData()
saveFile(idata=fullChartData, ifile="currentFullChartArtistAlbumData.p", debug=True)

_, _ = clock("Last Run")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
==> USA Singles Top 40                      	71
==> UK Singles Top 40                       	114
==> Top40-Charts.com Web Top 100            	228
==> China Top 20                            	240
==> German Top 40                           	261
==> Japan Top 20                            	279
==> Australia Top 20                        	280
==> Brazil Top 20                           	299
==> Greece Top 20                           	311
==> New Zealand Top 20                      	315
==> Bulgaria Top 20                         	323
==> Portugal Top 20                         	332
==> Airplay World Official Top 100          	357
==> Argentina Top 20                        	375
==> Austria Top 20                          	389
==> Belgium Top 20                          	395
==> Canada Top 20                           	396
==> Chile Top 20                            	410
==> Denmark Top 20             

***
***
***

In [15]:
artistAlbumData

{'Eminem': ['Revival', 'Kamikaze', 'Music To Be Murdered By'],
 'Taylor Swift': ['Reputation', 'Lover'],
 'G-Eazy': ['The Beautiful & Damned', 'Scary Nights'],
 'Ed Sheeran': ['Divide', 'No.6 Collaborations Project'],
 'Pentatonix': ['A Pentatonix Christmas',
  "That's Christmas To Me",
  'PTX Presents: Top Pop, Vol. I',
  'Christmas Is Here!',
  'The Best Of Pentatonix Christmas'],
 'Jeezy': ['Pressure', 'TM104: The Legend Of The Snowman'],
 'Sam Smith': ['The Thrill Of It All'],
 'Luke Bryan': ['What Makes You Country'],
 'Garth Brooks': ['The Anthology: Part I, The First Five Years'],
 'Chris Stapleton': ['From A Room: Volume 2',
  'Traveller',
  'From A Room: Volume 1'],
 'Michael Bublé': ['Christmas', 'Love'],
 'Post Malone': ['Stoney', 'Beerbongs & Bentleys', "Hollywood's Bleeding"],
 'Brockhampton': ['Saturation III', 'iridescence', 'Ginger'],
 'Kendrick Lamar': ['DAMN.'],
 'Imagine Dragons': ['Evolve', 'Origins'],
 'Lil Uzi Vert': ['Luv Is Rage 2'],
 'Various Artists': ['Qualit