### Reach out to iTunes and pull recent albums from specified artists

In [1]:
import requests
import json
!pip install pandas
import pandas as pd
!pip install jsontable
import jsontable
import io

Collecting pandas
  Downloading pandas-1.2.2-cp37-cp37m-manylinux1_x86_64.whl (9.9 MB)
[K     |████████████████████████████████| 9.9 MB 4.3 MB/s eta 0:00:01
Collecting numpy>=1.16.5
  Downloading numpy-1.20.1-cp37-cp37m-manylinux2010_x86_64.whl (15.3 MB)
[K     |████████████████████████████████| 15.3 MB 44.6 MB/s eta 0:00:01
[?25hCollecting pytz>=2017.3
  Downloading pytz-2021.1-py2.py3-none-any.whl (510 kB)
[K     |████████████████████████████████| 510 kB 44.7 MB/s eta 0:00:01
Installing collected packages: numpy, pytz, pandas
Successfully installed numpy-1.20.1 pandas-1.2.2 pytz-2021.1
Collecting jsontable
  Downloading jsontable-0.1.1-py3-none-any.whl (7.8 kB)
Installing collected packages: jsontable
Successfully installed jsontable-0.1.1


Read in artists pulled manually from iTunes

In [2]:
# Some gymnastics here to retrieve the raw CSV from GitHub and parse
url = "https://raw.githubusercontent.com/patrick-still/music-engine/itunes/sampleArtists.csv"
fetch = requests.get(url).content
artists = pd.read_csv (io.StringIO(fetch.decode('utf-8')))
artists.style

Unnamed: 0,Date Pulled,Artist
0,9/28/19,Mt. Feral
1,9/28/19,Red River Dialect
2,9/28/19,Chance Pena
3,9/28/19,FUTURE FEATS
4,9/28/19,"nothing, nowhere."
5,9/28/19,American Authors
6,9/28/19,Said The Whale
7,9/28/19,Bishop Briggs
8,9/28/19,pretty havoc.
9,9/28/19,Ackerman


Cycle through artists, ping iTunes, and store most recent album

In [None]:
newAlbums = pd.DataFrame()

for index, row in artists.iterrows():
    # Wait five seconds to avoid 20 calls/minute limit on iTunes API
    time.sleep(5)
    newArtist = row["Artist"]
    target = "https://itunes.apple.com/search?term={}&entity=album".format(newArtist)
    request = requests.get(target).json()
    paths = [{"$.results.collectionType":"collectionType"}, {"$.results.collectionName":"collectionName"}, {"$.results.releaseDate":"releaseDate"}]
    converter = jsontable.converter()
    converter.set_paths(paths)
    resultTable = converter.convert_json(request)
    df = pd.DataFrame(data=resultTable)
    df.columns = ["Album Type", "Album Name", "Release Date"]
    df = df.drop(df.index[0])
    df = df.sort_values(by = 'Release Date', ascending = False)
    newest = pd.DataFrame(data=df.iloc[[0]])
    newAlbums = newAlbums.append(newest)
    
newAlbums.style

Trying to ping iTunes to pull info about their "Alternative" genre

In [3]:
target = "https://itunes.apple.com/WebObjects/MZStoreServices.woa/ws/genres?id=20"
request = requests.get(target).json()
print(json.dumps(request, indent=2))

{
  "20": {
    "name": "Alternative",
    "id": "20",
    "url": "https://music.apple.com/us/genre/music-alternative/id20",
    "rssUrls": {
      "topAlbums": "https://itunes.apple.com/us/rss/topalbums/genre=20/json",
      "topSongs": "https://itunes.apple.com/us/rss/topsongs/genre=20/json"
    },
    "chartUrls": {
      "albums": "https://itunes.apple.com/WebObjects/MZStoreServices.woa/ws/charts?cc=us&g=20&name=Albums",
      "songs": "https://itunes.apple.com/WebObjects/MZStoreServices.woa/ws/charts?cc=us&g=20&name=Songs"
    },
    "subgenres": {
      "1230": {
        "name": "Chinese Alt",
        "id": "1230",
        "url": "https://music.apple.com/us/genre/music-alternative-chinese-alt/id1230",
        "rssUrls": {
          "topAlbums": "https://itunes.apple.com/us/rss/topalbums/genre=1230/json",
          "topSongs": "https://itunes.apple.com/us/rss/topsongs/genre=1230/json"
        },
        "chartUrls": {
          "albums": "https://itunes.apple.com/WebObjects/MZStor

Requesting albums for a particular artist looks like this

In [4]:
target = "https://itunes.apple.com/search?term=bastille&entity=album"
request = requests.get(target).json()
print(json.dumps(request, indent=2))

{
  "resultCount": 50,
  "results": [
    {
      "wrapperType": "collection",
      "collectionType": "Album",
      "artistId": 420203509,
      "collectionId": 1440858222,
      "amgArtistId": 2528804,
      "artistName": "Bastille",
      "collectionName": "Bad Blood (Bonus Track Version)",
      "collectionCensoredName": "Bad Blood (Bonus Track Version)",
      "artistViewUrl": "https://music.apple.com/us/artist/bastille/420203509?uo=4",
      "collectionViewUrl": "https://music.apple.com/us/album/bad-blood-bonus-track-version/1440858222?uo=4",
      "artworkUrl60": "https://is3-ssl.mzstatic.com/image/thumb/Music114/v4/d7/e7/1f/d7e71fa0-99b1-f725-4e8b-4ea87808c874/source/60x60bb.jpg",
      "artworkUrl100": "https://is3-ssl.mzstatic.com/image/thumb/Music114/v4/d7/e7/1f/d7e71fa0-99b1-f725-4e8b-4ea87808c874/source/100x100bb.jpg",
      "collectionPrice": 6.99,
      "collectionExplicitness": "notExplicit",
      "trackCount": 15,
      "copyright": "\u2117 2013 Virgin Records Limite

Store pieces of the response in a table

In [5]:
paths = [{"$.results.collectionType":"collectionType"}, {"$.results.collectionName":"collectionName"}, {"$.results.releaseDate":"releaseDate"}]
converter = jsontable.converter()
converter.set_paths(paths)
resultTable = converter.convert_json(request)
# print(resultTable)

Convert the table to a Pandas dataframe

In [24]:
df = pd.DataFrame(data=resultTable)
df.columns = ["Album Type", "Album Name", "Release Date"]
df = df.drop(df.index[0])
# Some back and forth trying to specify that values in "Release Date" are, in fact, dates
#
#      df["Release Date"] = pd.to_datetime(df["Release Date"], format = "%Y-%m-%d%H:%M:%S")
#      df.sort("Release Date")
#
# Except that doesn't feel necessary; just sort on that column
df = df.sort_values(by = 'Release Date', ascending = False)
df.style

Unnamed: 0,Album Type,Album Name,Release Date
43,Album,Merry Xmas Everybody (For Nest Audio Sessions) - Single,2020-12-11T08:00:00Z
23,Album,Goosebumps - EP,2020-12-04T08:00:00Z
17,Album,survivin' - Single,2020-09-22T07:00:00Z
28,Album,WHAT YOU GONNA DO??? (feat. Graham Coxon) - Single,2020-07-30T07:00:00Z
12,Album,Doom Days (This Got Out of Hand Edition),2019-12-06T08:00:00Z
29,Album,Doom Days (This Got Out of Hand Edition),2019-12-06T08:00:00Z
44,Album,Can’t Fight This Feeling (feat. London Contemporary Orchestra) - Single,2019-11-19T08:00:00Z
22,Album,Another Place - Single,2019-10-25T07:00:00Z
45,Album,Million Pieces (M-22 Remix) - Single,2019-09-27T07:00:00Z
7,Album,Doom Days,2019-06-14T07:00:00Z


Pull out most recent album for the artist and add it to a new cumulative dataframe

In [42]:
# Extra brackets necessary or content will be transposed
newest = pd.DataFrame(data=df.iloc[[0]])
newAlbums = pd.DataFrame()
newAlbums = newAlbums.append(newest)
newAlbums.style

Unnamed: 0,Album Type,Album Name,Release Date
43,Album,Merry Xmas Everybody (For Nest Audio Sessions) - Single,2020-12-11T08:00:00Z
