In [None]:
!pip install beautifulsoup4 feedparser

Collecting feedparser
  Downloading feedparser-6.0.11-py3-none-any.whl.metadata (2.4 kB)
Collecting sgmllib3k (from feedparser)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading feedparser-6.0.11-py3-none-any.whl (81 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.3/81.3 kB[0m [31m744.5 kB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: sgmllib3k
  Building wheel for sgmllib3k (setup.py) ... [?25l[?25hdone
  Created wheel for sgmllib3k: filename=sgmllib3k-1.0.0-py3-none-any.whl size=6047 sha256=e00286b4a65267988f4b7154c8a76455a9404017aa5dcc52e89a2552cb63fe5b
  Stored in directory: /root/.cache/pip/wheels/f0/69/93/a47e9d621be168e9e33c7ce60524393c0b92ae83cf6c6e89c5
Successfully built sgmllib3k
Installing collected packages: sgmllib3k, feedparser
Successfully installed feedparser-6.0.11 sgmllib3k-1.0.0


In [None]:
#This code was to check how many mp3 files were available for download from the RSS feed. This is more fo a check.
import feedparser

#RSS feed to check
feed_url = "https://feeds.megaphone.fm/BLU6112880430"
feed = feedparser.parse(feed_url)

#Count downloadable mp3s
mp3_count = 0
for entry in feed.entries:
    if (hasattr(entry, 'enclosures') and
        entry.enclosures and
        entry.enclosures[0].type == 'audio/mpeg'):
        mp3_count += 1

print(f"Actual MP3 files available: {mp3_count}")

In [None]:
import requests
from bs4 import BeautifulSoup
import os
import time
from datetime import datetime
import pandas as pd

def download_nba_podcast(podcast_name, url):
    try:
        # Get the RSS feed
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'xml')

        # Create directory for downloads if it doesn't exist
        download_dir = podcast_name
        if not os.path.exists(download_dir):
            os.makedirs(download_dir)

        # Find all episodes
        episodes = soup.find_all('item')

        # Create a list to store episode information
        episode_data = []

        # Define season date range
        season_start = datetime(2023, 10, 18)
        season_end = datetime(2024, 4, 14)

        for episode in episodes:
            # Parse publication date
            pub_date = datetime.strptime(episode.pubDate.text, '%a, %d %b %Y %H:%M:%S %z')
            pub_date_naive = pub_date.replace(tzinfo=None)  # Remove timezone for comparison

            # Check if episode is within season dates
            if season_start <= pub_date_naive <= season_end:
                title = episode.title.text
                audio_url = episode.enclosure['url']

                # Clean filename
                clean_title = "".join(x for x in title if x.isalnum() or x in (' ', '-', '_'))
                filename = f"{clean_title}.mp3"
                filepath = os.path.join(download_dir, filename)
                file_size = episode.enclosure.get('length', 0)
                duration = episode.find('itunes:duration').text if episode.find('itunes:duration') else None

                # Download if file doesn't exist
                if not os.path.exists(filepath):
                    print(f"Downloading: {title}")
                    audio_response = requests.get(audio_url)

                    with open(filepath, 'wb') as f:
                        f.write(audio_response.content)

                    # Delay for server
                    time.sleep(1)

                # Store episode information
                episode_data.append({
                    'title': title,
                    'publication_date': pub_date,
                    'file_path': filepath,
                    'duration': duration,
                    'file_size': file_size
                })

        if episode_data:
            # Create a DataFrame with episode information
            df = pd.DataFrame(episode_data)

            # Convert file size to MB and round
            df['file_size'] = df['file_size'].astype(float) / (1024 * 1024)
            df['file_size'] = df['file_size'].round(2)

            # Sort by publication date
            df = df.sort_values('publication_date', ascending=False)

            # Save to CSV
            df.to_csv(f'{podcast_name}_2023_24_season.csv', index=False)

            print(f"Downloaded {len(episode_data)} episodes from 2023-24 season to {download_dir}")
            return df
        else:
            print("No episodes found for the 2023-24 season")
            return None

    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None

# Ran one podcast at a time.
if __name__ == "__main__":
    # locked_on_url = "https://feeds.simplecast.com/vaGXFJKO"
    # podcast_name = "Locked On NBA"
    # locked_on_url =  "https://feeds.simplecast.com/LDTaSfes"
    # podcast_name = "Fantasy NBA Today"
    # locked_on_url = "https://feeds.megaphone.fm/the-ringer-nba-show"
    # podcast_name = "The Ringer NBA Show"
    # locked_on_url = "https://rss.art19.com/the-dunker-spot"
    # podcast_name = "The Dunker Spot"
    locked_on_url = "https://feeds.megaphone.fm/BLU6112880430"
    podcast_name = "RotoWire Fantasy Basketball"
    download_nba_podcast(podcast_name, locked_on_url)


Downloading: Fantasy Parting Thoughts, Shutdown Parade + Can the Red-Hot Mavs Win the Title?
Downloading: Recapping a Big NBA Tuesday: Giannis Goes Down, Warriors Go Off, Orlando Falls to Houston, What to Make of the Suns + More
Downloading: Trae Young, KAT Set to Return, Playoff Forecast + Full Tuesday Night Preview
Downloading: Final Week Adds/Drops, Playoff Races + Where to Take Zion, Chet, Wembanyama, Reaves & Others Next Season
Downloading: Friday Episode Finale: Six Waiver Suggestions, Reviewing Sleepers & Busts
Downloading: Bucks Fall to Washington, Wembanyama vs. Jokic + Most Disappointing Fantasy Players in 2023-24
Downloading: Late-Season Shutdowns, Pacers' Upside, Booker Goes Off + Should We Trust the Pels?
Downloading: Wembanyama's Outrageous Rookie Season, Kings Lose Malik Monk, Draft Decisions for 2024-25 + Live Q&A
Downloading: Seven Quick Waiver Wire Suggestions for Fantasy Basketball Playoffs
Downloading: Weekend Pickups to Bring Home Your Fantasy Championship
Download

In [None]:
# Compress to tar folder for ease of transferring multiple mp3 files
!tar -czvf /content/'Rotowire Fantasy Basketball'.tar.gz /content/'RotoWire Fantasy Basketball'

tar: Removing leading `/' from member names
/content/RotoWire Fantasy Basketball/
/content/RotoWire Fantasy Basketball/Haliburtons Rise Must-Add Players Desmond Bane  More.mp3
/content/RotoWire Fantasy Basketball/Luka Coby White Injuries Gafford Chasing Wilt Nuggets vs Celtics Bad-Team Concerns  More.mp3
/content/RotoWire Fantasy Basketball/Post-Ja Grizzlies Siakam and Bridges Trades Stash Targets  More with Brandon Kravitz.mp3
/content/RotoWire Fantasy Basketball/Weekend Recap  Buy LowSell High Targets  James Harden Expectations with Adam King.mp3
/content/RotoWire Fantasy Basketball/Weekend Recap Tales from the Waiver Wire  Who to Add Drop and Trade.mp3
/content/RotoWire Fantasy Basketball/Opening Week Fantasy Takeaways  30-Team Draft Review.mp3
/content/RotoWire Fantasy Basketball/NBA Fantasy Waiver Wire for Week 5.mp3
/content/RotoWire Fantasy Basketball/Embiid Goes Down All-Star Reserve Picks  Fantasy Hoops QA.mp3
/content/RotoWire Fantasy Basketball/Fantasy Basketball Waiver Wire