## Import Statements

In [1]:
import pandas as pd
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
from pytubefix import YouTube, Channel
import os

In [2]:
def get_videos_df_from_channel(channel_url):
    channel = Channel(channel_url)
    df = pd.DataFrame()
    print(f'There are {len(channel.videos)} videos in {channel.channel_name}')

    
    for idx, video in (enumerate(channel.videos)):
        if idx%10 == 0:
            print(f'processing {idx}/{len(channel.videos)} video')
        
        d = {}
        d['video_id'] = video.video_id
        d['title'] = video.title
        d['publish_date'] = video.publish_date
        d['length'] = video.length
        d['thumbnail'] = video.thumbnail_url
        d['vid_info'] = video.vid_info
        
        try:
            
            #transcript_list = YouTubeTranscriptApi.list_transcripts(v_id)
            transcript = YouTubeTranscriptApi.get_transcript(video.video_id)
            d['transcript'] = TextFormatter().format_transcript(transcript)
        except:
            print('No transcripts available for ', video.video_id)
            d['transcript'] = 'NA'
            continue

        df = pd.concat([df, pd.DataFrame([d])], ignore_index=True)
    return df

In [3]:
hub_df = get_videos_df_from_channel('https://www.youtube.com/@hubermanlab')

There are 217 videos in Andrew Huberman
processing 0/217 video
processing 10/217 video
processing 20/217 video
processing 30/217 video
processing 40/217 video
processing 50/217 video
processing 60/217 video
No transcripts available for  doupx8SAs5Y
processing 70/217 video
processing 80/217 video
No transcripts available for  ufsIA5NARIo
processing 90/217 video
processing 100/217 video
processing 110/217 video
processing 120/217 video
processing 130/217 video
processing 140/217 video
processing 150/217 video
processing 160/217 video
processing 170/217 video
processing 180/217 video
processing 190/217 video
No transcripts available for  JPX8g8ibKFc
No transcripts available for  x7qbJeRxWGw
No transcripts available for  17O5mgXZ9ZU
No transcripts available for  qJXKhu5UZwk
processing 200/217 video
No transcripts available for  rBdhqBGqiMc
No transcripts available for  J7SrAEacyf8
No transcripts available for  hcuMLQVAgEg
No transcripts available for  PZ-GvIOhcf8
No transcripts available f

In [4]:
hub_df.to_csv('huberman_transcripts_050124.csv', index=0)

In [8]:
# Iterate over each row in the DataFrame
for index, row in hub_df.iterrows():
    # Get the title and transcript
    title = row['title']
    transcript = row['transcript']
    
    # Define the filename for the markdown file
    filename = os.path.join('transcripts', f"{title}.md")
    
    # Write transcript to markdown file
    with open(filename, 'w') as file:
        file.write(transcript)

transcripts/Dr. Matt Walker: Improve Sleep to Boost Mood & Emotional Regulation | Huberman Lab Guest Series.md
transcripts/AMA #17: Making Time for Fitness, Top Sleep Tools & Best Learning Strategies.md
transcripts/Protocols to Strengthen & Pain Proof Your Back.md
transcripts/Dr. Matt Walker: Using Sleep to Improve Learning, Creativity & Memory | Huberman Lab Guest Series.md
transcripts/Dr. Bonnie Halpern-Felsher: Vaping, Alcohol Use & Other Risky Youth Behaviors.md
transcripts/LIVE EVENT Q&A: Dr. Andrew Huberman at the Sydney Opera House.md
transcripts/Dr. Matt Walker: How to Structure Your Sleep, Use Naps & Time Caffeine | Huberman Lab Guest Series.md
transcripts/Dr. David Yeager: How to Master Growth Mindset to Improve Performance.md
transcripts/Dr. Matt Walker: Protocols to Improve Your Sleep | Huberman Lab Guest Series.md
transcripts/Coleman Ruiz: Overcoming Physical & Emotional Challenges.md
transcripts/Dr. Matt Walker: The Biology of Sleep & Your Unique Sleep Needs | Huberman La