<a href="https://colab.research.google.com/github/krishnavarathan/REST-API/blob/main/iTunes__search__API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [212]:
# The documentation for this particular API can be found here:
# https://developer.apple.com/library/archive/documentation/AudioVideo/Conceptual/iTuneSearchAPI/
import requests
import json
import pandas as pd
# pd.set_option('display.max_rows', None)
# pd.set_option('display.max_columns', None)

## Another example: the iTunes search API

### Passing parameters in the request

In [173]:
# define base URL
base_site = "https://itunes.apple.com/search"

In [174]:
# We can manually add parameters to the URL, as seen before
# E.G., searching for 'the beatles'
url = base_site + "?term=the+beatles&country=us"
print(url)
# submit a GET request with parameters needed
param=requests.get(url)
print(param)

https://itunes.apple.com/search?term=the+beatles&country=us
<Response [200]>


In [175]:
# Note, that the space in 'the beatles' was replaced with a '+' in the URL
# Having to worry about special symbols in the URL can make the code harder to write and 'more buggy'

In [176]:
# Another way of expressing the parameters is to pass them to the get() method
# We pass the key/value parameter pairs as a dictionary to 'params'

r = requests.get(base_site, params = {"term": "the beatles", "country": "us"})
print(type(r))
print(r)
r.status_code

<class 'requests.models.Response'>
<Response [200]>


200

In [177]:
# The request package incorporates those paramaters into the URL automatically
# check the URL we submitted the request to
r.url

'https://itunes.apple.com/search?term=the+beatles&country=us'

In [178]:
# Inspecting the response's JSON
import json
info = r.json()
# print(info)
# print(json.dumps(info, indent=4))

In [179]:
# This way of stating parameters is the preffered one

### Investigating the output and parameters

In [180]:
# The request went through OK
r.status_code

200

In [181]:
# This seems to contain a lot of data
# Let's check if there are some keys we don't see at first glance in the outermost dictionary
info.keys()

dict_keys(['resultCount', 'results'])

In [182]:
# There are, indeed, only these two keys

In [183]:
# The second one contains a list of all the results
# Let's look at one such result

print(json.dumps(info['results'][0], indent=3))

# It's a simple dictionary with a lot of data



{
   "wrapperType": "track",
   "kind": "feature-movie",
   "trackId": 498304439,
   "artistName": "Gillian Bartlett",
   "trackName": "The Beatles: Parting Ways",
   "trackCensoredName": "The Beatles: Parting Ways",
   "trackViewUrl": "https://itunes.apple.com/us/movie/the-beatles-parting-ways/id498304439?uo=4",
   "previewUrl": "https://video-ssl.itunes.apple.com/itunes-assets/Video128/v4/08/d1/6b/08d16b45-ff55-b4bb-8c25-d483d3193bbf/mzvf_5312432095614841236.640x480.h264lc.U.p.m4v",
   "artworkUrl30": "https://is1-ssl.mzstatic.com/image/thumb/Video/82/77/b7/mzl.cdosabnq.jpg/30x30bb.jpg",
   "artworkUrl60": "https://is1-ssl.mzstatic.com/image/thumb/Video/82/77/b7/mzl.cdosabnq.jpg/60x60bb.jpg",
   "artworkUrl100": "https://is1-ssl.mzstatic.com/image/thumb/Video/82/77/b7/mzl.cdosabnq.jpg/100x100bb.jpg",
   "collectionPrice": 3.99,
   "trackPrice": 3.99,
   "trackRentalPrice": 1.99,
   "releaseDate": "2010-07-27T07:00:00Z",
   "collectionExplicitness": "notExplicit",
   "trackExplicitnes

In [184]:
# Dictionary keys of 'info'
info.keys()

dict_keys(['resultCount', 'results'])

In [185]:
# Finally, let's check the response to an invalid input
check_resp = requests.get(base_site, params = {"term": "alternative", "country": "us", "media": "hahaha"})
check_resp.ok

False

In [186]:
# Status code is 400 - meaning 'Bad request'
check_resp.status_code

400

In [187]:
# Error message
check_resp.json()

{'errorMessage': 'Invalid value(s) for key(s): [mediaType]',
 'queryParameters': {'output': 'json',
  'callback': 'A javascript function to handle your search results',
  'country': 'ISO-2A country code',
  'limit': 'The number of search results to return',
  'term': 'A search string',
  'lang': 'ISO-2A language code'}}

### Structuring and exporting the data

In [188]:
# It may be useful to store the data in a structured form
# The pandas package is great for that, as we can use its dataframe (basically a table)
# Since the results is a list of 'shallow' dictionaries, it neatly fits into a table
# A more complicated, nested dictionary may not be easily transformable into a table

In [189]:
import pandas as pd

In [190]:
# Creating the dataframe and populating it with the results of our search
print(info['results'])
songs_df = pd.DataFrame(info["results"])
print(songs_df.columns.isna())
print(songs_df.index)

[{'wrapperType': 'track', 'kind': 'feature-movie', 'trackId': 498304439, 'artistName': 'Gillian Bartlett', 'trackName': 'The Beatles: Parting Ways', 'trackCensoredName': 'The Beatles: Parting Ways', 'trackViewUrl': 'https://itunes.apple.com/us/movie/the-beatles-parting-ways/id498304439?uo=4', 'previewUrl': 'https://video-ssl.itunes.apple.com/itunes-assets/Video128/v4/08/d1/6b/08d16b45-ff55-b4bb-8c25-d483d3193bbf/mzvf_5312432095614841236.640x480.h264lc.U.p.m4v', 'artworkUrl30': 'https://is1-ssl.mzstatic.com/image/thumb/Video/82/77/b7/mzl.cdosabnq.jpg/30x30bb.jpg', 'artworkUrl60': 'https://is1-ssl.mzstatic.com/image/thumb/Video/82/77/b7/mzl.cdosabnq.jpg/60x60bb.jpg', 'artworkUrl100': 'https://is1-ssl.mzstatic.com/image/thumb/Video/82/77/b7/mzl.cdosabnq.jpg/100x100bb.jpg', 'collectionPrice': 3.99, 'trackPrice': 3.99, 'trackRentalPrice': 1.99, 'releaseDate': '2010-07-27T07:00:00Z', 'collectionExplicitness': 'notExplicit', 'trackExplicitness': 'notExplicit', 'trackTimeMillis': 3073040, 'cou

In [191]:
# songs_df.to_excel("songs_info.xlsx")
# #print(songs_df)
# with open("songs_info.xlsx", 'r') as xl:
#   xls=xl.read()
#   print(xls)

In [287]:
# Exporting the data to a CSV (Comma Separated Values) file
x=songs_df.to_csv("songs_info.csv")
# with open("songs_info.csv", 'r') as f:
#   co=f.read()
#   #print(co)
# # with open("note.txt","r") as f:
# #   con = f.read()
# #   print(con)
# import pandas as pd
# dc=pd.read_csv("songs_info.csv")
# dc.head()
# #print(dc)

# fitting all simliar look-up  requests in one session with error handling


In [271]:
# Look up base url
look_base_url='https://itunes.apple.com/lookup'

# Look up Jack Johnson by iTunes artist ID
url_id = base_site + "?term=jack+jhnoson&limit=50"

# Look up Yelp Software application by iTunes ID
url__yelp_id=look_base_url+'?id=284910350'


# Look up multiple artists by their AMG artist IDs: https://itunes.apple.com/lookup?amgArtistId=468749,5723.
url_AMG=look_base_url+'?amgArtistId=468749,5723'

# Look up all albums for Jack Johnson: https://itunes.apple.com/lookup?id=909253&entity=album
url_albums=look_base_url+'?id=909253&entity=album'

# Look up an album by its AMG Album ID: https://itunes.apple.com/lookup?amgAlbumId=15175,15176,15177,15178,15183,15184,15187,1519,15191,15195,15197,15198.
url_AMG_ID=look_base_url+'?amgAlbumId=15175,15176,15177,15178,15183,15184,15187,1519,15191,15195,15197,15198'

# Look up a book by its 13 digit ISBN: https://itunes.apple.com/lookup?isbn=9780316069359.
url_ISBN=look_base_url+'?isbn=9780316069359'

all_url=[look_base_url, url__yelp_id, url_id, url_AMG, url_albums,url_AMG_ID,url_ISBN]
all_url

['https://itunes.apple.com/lookup',
 'https://itunes.apple.com/lookup?id=284910350',
 'https://itunes.apple.com/search?term=jack+jhnoson&limit=50',
 'https://itunes.apple.com/lookup?amgArtistId=468749,5723',
 'https://itunes.apple.com/lookup?id=909253&entity=album',
 'https://itunes.apple.com/lookup?amgAlbumId=15175,15176,15177,15178,15183,15184,15187,1519,15191,15195,15197,15198',
 'https://itunes.apple.com/lookup?isbn=9780316069359']

In [300]:
with requests.Session() as s:
  for index, url in enumerate(all_url):
    try:
      res = s.get(url, timeout=5)
      res.raise_for_status()

      res_js=res.json()
      df=pd.DataFrame(res_js['results'])

      filename = f"songs_info_{index}.csv"
      x=df.to_csv(f"songs_info_{index}.csv")

      print(f"{filename} file saved!")
    except requests.exceptions.TimeOut as e:
      print("Request is is faild: {}".format(e))
    except requests.exceptions.RequestException as e:
      print("Request is faild: {}".format(e))
    except requests.exceptions.ConnectionError as e:
      print("Request is faild: {}".format(e))
    except requests.exceptions.HTTPError as e:
      print("Request is faild: {}".format(e))

songs_info_0.csv file saved!
songs_info_1.csv file saved!
songs_info_2.csv file saved!
songs_info_3.csv file saved!
songs_info_4.csv file saved!
songs_info_5.csv file saved!
songs_info_6.csv file saved!


In [None]:
import os
import pandas as pd
import requests
import json # Import json for JSONDecodeError

# Define output directory for CSV files
output_dir = "/content/itunes_api_data"
os.makedirs(output_dir, exist_ok=True) # Create directory if it doesn't exist

session = requests.Session()
with session as s:
  for idx, url in enumerate(all_url): # Use enumerate to get an index for filenames
    print(f"\nAttempting request to: {url}")
    try:
      res = s.get(url)
      res.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
      res_js = res.json()

      # Check if 'results' key exists and is not empty before creating DataFrame
      if 'results' in res_js and res_js['results']:
        df = pd.DataFrame(res_js['results'])
        # Create a unique and valid filename for each output
        filename = os.path.join(output_dir, f"itunes_data_{idx}.csv")
        df.to_csv(filename, index=False) # index=False to avoid writing the DataFrame index as a column
        print(f"Successfully fetched data from {url} and saved to {filename}")
      else:
        print(f"No results found for {url} or 'results' key is missing/empty. Skipping CSV creation.")

    except requests.exceptions.HTTPError as e:
      print(f"Request to {url} failed with HTTP Error: {e}")
    except requests.exceptions.ConnectionError as e:
      print(f"Request to {url} failed with Connection Error: {e}")
    except requests.exceptions.Timeout as e: # Corrected 'TimeOut' to 'Timeout'
      print(f"Request to {url} failed with Timeout Error: {e}")
    except requests.exceptions.RequestException as e:
      print(f"Request to {url} failed with an unexpected Request Error: {e}")
    except json.decoder.JSONDecodeError as e:
      print(f"Failed to decode JSON response from {url}: {e}. Response content (first 200 chars): {res.text[:200]}...")
    except Exception as e:
      print(f"An unhandled error occurred for {url}: {e}")