In [8]:
import requests

# Your podcast ID
podcast_id = '1462776122'

# iTunes Search API endpoint with the podcast ID and entity type set to podcast episode
api_url = f'https://itunes.apple.com/lookup?id={podcast_id}&entity=podcastEpisode&limit=200'

# Make a request to the iTunes Search API
response = requests.get(api_url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the response as JSON
    data = response.json()

    # This will hold all the episode URLs
    episode_urls = []
    
    # Loop through the 'results' list
    for item in data['results']:
        # Check if the item is an episode
        if item.get('kind') == 'podcast-episode':
            # Add the episode URL to the list
            episode_urls.append(item['trackViewUrl'])
    
    # Output the URLs to the console
    for url in episode_urls:
        print(url)
    
    # If you need to save this to a file, you can do so like this:
    with open('episode_urls.txt', 'w') as file:
        for url in episode_urls:
            file.write(url + '\n')
    
    print(f"{len(episode_urls)} episode URLs have been extracted and saved to episode_urls.txt")

else:
    print('Failed to retrieve data:', response.status_code)


"""
However,the code above only retrieve 200 items, we have 450+ episodes to get...
"""


https://podcasts.apple.com/us/podcast/tackling-cold-chain-emissions-with-artyc/id1462776122?i=1000634243100&uo=4
https://podcasts.apple.com/us/podcast/capital-series-jeff-johnson-temasek/id1462776122?i=1000634116457&uo=4
https://podcasts.apple.com/us/podcast/creative-climate-campaigns-with-stand-earth/id1462776122?i=1000633829370&uo=4
https://podcasts.apple.com/us/podcast/capital-series-mark-berryman-and-nick-flores-caprock/id1462776122?i=1000633331566&uo=4
https://podcasts.apple.com/us/podcast/mobilizing-gen-z-for-climate-action-with-elise-joshi/id1462776122?i=1000633062944&uo=4
https://podcasts.apple.com/us/podcast/decarbonizing-commercial-real-estate-with-lumen-energy/id1462776122?i=1000632677888&uo=4
https://podcasts.apple.com/us/podcast/ubers-road-to-sustainability/id1462776122?i=1000632256223&uo=4
https://podcasts.apple.com/us/podcast/illuminating-indias-energy-future-with-residential-solar/id1462776122?i=1000631843465&uo=4
https://podcasts.apple.com/us/podcast/capital-series-reb

In [11]:
"""
To handle pagination with the iTunes API when you have more episodes than the maximum limit, 
you can use the offset parameter to fetch subsequent pages. 
Unfortunately, the iTunes API doesn't officially support the offset parameter for pagination. 
Therefore, if you have more episodes than the maximum limit, 
the recommended approach is to parse the podcast's RSS feed directly, as the feed typically contains all episodes.
"""

import requests
import xml.etree.ElementTree as ET

def parse_rss_feed(feed_url):
    response = requests.get(feed_url)
    if response.status_code != 200:
        print('Failed to retrieve feed data:', response.status_code)
        return []

    # Parse the XML response
    root = ET.fromstring(response.content)
    episode_urls = []

    # RSS feeds have namespaces, and we need to account for these when searching
    namespaces = {
        'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
        # Add additional namespaces if they are used in the RSS feed
    }

    # Find all 'item' elements, as these represent episodes
    for item in root.findall('channel/item', namespaces):
        # Check various places for a URL
        link = item.find('link', namespaces)
        enclosure = item.find('enclosure', namespaces)
        guid = item.find('guid', namespaces)
        
        # Prefer 'link' if available and it's a URL, otherwise check 'enclosure', then 'guid'
        if link is not None and 'http' in link.text:
            episode_urls.append(link.text)
        elif enclosure is not None and 'url' in enclosure.attrib:
            episode_urls.append(enclosure.attrib['url'])
        elif guid is not None and 'http' in guid.text:
            episode_urls.append(guid.text)

    return episode_urls

# Your podcast ID
podcast_id = '1462776122'

# iTunes Search API endpoint with the podcast ID
api_url = f'https://itunes.apple.com/lookup?id={podcast_id}'

# Make a request to the iTunes Search API
response = requests.get(api_url)

if response.status_code == 200:
    data = response.json()
    feed_url = data['results'][0]['feedUrl']
    episode_urls = parse_rss_feed(feed_url)
    
    for url in episode_urls:
        print(url)
    
    with open('episode_urls.txt', 'w') as file:
        for url in episode_urls:
            file.write(url + '\n')
    
    print(f"{len(episode_urls)} episode URLs have been extracted and saved to episode_urls.txt")
else:
    print('Failed to retrieve data:', response.status_code)



https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/media/podcast
https://www.mcjcollective.com/me

In [2]:
import requests

# Your podcast ID
podcast_id = '1462776122'

# Function to fetch episodes
def fetch_episodes(podcast_id, limit, offset):
    api_url = f'https://itunes.apple.com/lookup?id={podcast_id}&entity=podcastEpisode&limit={limit}&offset={offset}'
    response = requests.get(api_url)
    if response.status_code == 200:
        return response.json()
    else:
        print('Failed to retrieve data:', response.status_code)
        return None

# This will hold all the episode URLs
episode_urls = []

# Initial request to get the total number of episodes
initial_data = fetch_episodes(podcast_id, 1, 0)
total_episodes = initial_data['resultCount'] - 1  # Subtract 1 for the podcast itself

# Constants
limit = 200  # Max limit per request
offset = 0

# Fetch all episodes
while offset < total_episodes:
    data = fetch_episodes(podcast_id, limit, offset)
    if data:
        for item in data['results']:
            if item.get('kind') == 'podcast-episode':
                episode_urls.append(item['trackViewUrl'])
    offset += limit

# Output the URLs to the console and save to a file
with open('episode_urls.txt', 'w') as file:
    for url in episode_urls:
        print(url)
        file.write(url + '\n')

print(f"{len(episode_urls)} episode URLs have been extracted and saved to episode_urls.txt")


https://podcasts.apple.com/us/podcast/mining-the-future-with-glencore/id1462776122?i=1000634608473&uo=4
https://podcasts.apple.com/us/podcast/tackling-cold-chain-emissions-with-artyc/id1462776122?i=1000634243100&uo=4
https://podcasts.apple.com/us/podcast/capital-series-jeff-johnson-temasek/id1462776122?i=1000634116457&uo=4
https://podcasts.apple.com/us/podcast/creative-climate-campaigns-with-stand-earth/id1462776122?i=1000633829370&uo=4
https://podcasts.apple.com/us/podcast/capital-series-mark-berryman-and-nick-flores-caprock/id1462776122?i=1000633331566&uo=4
https://podcasts.apple.com/us/podcast/mobilizing-gen-z-for-climate-action-with-elise-joshi/id1462776122?i=1000633062944&uo=4
https://podcasts.apple.com/us/podcast/decarbonizing-commercial-real-estate-with-lumen-energy/id1462776122?i=1000632677888&uo=4
https://podcasts.apple.com/us/podcast/ubers-road-to-sustainability/id1462776122?i=1000632256223&uo=4
https://podcasts.apple.com/us/podcast/illuminating-indias-energy-future-with-resi