In [24]:
import pandas as pd
import json
import re
import isodate
import requests
from time import sleep
from dateutil import parser
from email import utils
from datetime import datetime
from rfeed import Item, Feed, iTunesItem, Guid, Enclosure
from collections import defaultdict
from more_itertools import unique_everseen

In [25]:
with open('../../metadata/youtube-api/videos_list_yutt.json') as f:
    videos_list_yutt = json.loads(f.read())

In [26]:
videos = [v for v in videos_list_yutt if 'Dhammapada' in v['snippet']['title']]
titles = [v['snippet']['title'] for v in videos]

In [27]:
numbers = []
mapping = {}
replacements = {
    'Dhammapada 1: ': 'Dhammapada Verse 1:',
    'Dhammapada Verse Two': 'Dhammapada Verse 2',
    'Dhammapada Verses Three and Four': 'Dhammapada Verses 3 & 4',
    'Dhammapada Verse Five': 'Dhammapada Verse 5',
    'Dhammapada Verse Six': 'Dhammapada Verse 6',
    'Dhammapada Verses Seven & Eight': 'Dhammapada Verses 7 & 8',
    'Dhammapada Verses Nine and Ten': 'Dhammapada Verses 9 & 10'
}

titles

for i, title in enumerate(titles):
    for old_title, new_title in replacements.items():
        if title.startswith(old_title):
            title = title.replace(old_title, new_title)
            videos[i]['snippet']['title'] = title
            break

    m = re.findall(r'([0-9]+)', title)

    new_numbers = []
    if len(m) == 3:
        new_numbers = [int(x) for x in m]
    elif len(m) == 2:
        new_numbers = list(range(int(m[0]), int(m[1]) + 1))
    elif len(m) == 1:
        new_numbers = [int(m[0])]
    
    numbers += new_numbers
    
    for n in new_numbers:
        mapping[n] = videos[i]

In [37]:
[x for x in range(1, 235) if not x in numbers] # written numbers missing

[]

In [38]:
with open('verses.txt') as f:
    verses = f.read().split('\n---\n')

In [44]:
exceptions = [207, 210, 198, 189, 190, 191, 184, 138, 139, 88, 232, 233, 228, 229]

for verse in verses:
    for num in [int(n) for n in re.findall(r'[0-9]+', verse)]:
        assert (num in exceptions or str(num) in mapping[num]['snippet']['title'])
        
last_nums = [235] # greatest number + 1
for i, verse in enumerate(verses):
    nums = sorted([int(n) for n in re.findall(r'[0-9]+', verse)])
    for num in nums:
        assert (num in exceptions or str(num) in videos[i]['snippet']['title'])
    
    if (set(nums) - set(exceptions)) != set():
        # check order
        assert nums[0] < last_nums[0]
    
    last_nums = nums

In [45]:
df = pd.read_json('../../metadata/subtitles/top_keywords_from_subtitles.json')
keywords = { row.link: row.keywords for row in df.itertuples() }

In [46]:
def create_desc(verse, verse_english, sc_link, video, chapter):
    verse = '<br>'.join(verse.split('\n'))
    template = """<p><b>Dhammapada verse(s) in Pali:</b></p>
<br><p>%s</p><br>
<br><p><b>English translation of verse(s):</b></p>
<br><p>%s</p>
<br><p>(Translated by Acharya Buddharakkhita)</p>
<br><hr>
<br><p>A series on the Dhammapada, a set of 423 verse teachings given by the Buddha, including explanation of the Pali verse, a synopsis of the background story and application of the teaching to our practice by Yuttadhammo Bhikkhu.</p>
<br><p>This Dhammapada Part I podcast includes Chapters 1-17 (Verses 1-234). Please subscribe to Dhammapada Part II for the verses in the rest of the chapters. New episodes are published each week in the Dhammapada Part II podcast.</p>
<br><p>Yuttadhammo Bhikkhu is a Canadian-born Theravada Buddhist monk, ordained in 2001 under the guidance of Venerable Ajaan Tong Sirimangalo. He has taught meditation courses in the Mahasi Sayadaw tradition since 2003 and gives online teachings via YouTube, as well as Dhamma talks to both intensive meditators and by invitation to the general public.</p>
<br><hr>
<br><p>Original Youtube Video: <a href="%s" target="_blank">%s</a></p>
<p>Podcast Website: <a href="https://podcast.sirimangalo.org/dhammapada/" target="_blank">https://podcast.sirimangalo.org/dhammapada/</a></p>
<p>How To Meditate Booklet: <a href=" https://htm.sirimangalo.org/" target="_blank"> https://htm.sirimangalo.org/</a></p>
<p>Our Meditation Community and At-Home Meditation Course signup page: <a href="https://meditation.sirimangalo.org/" target="_blank">https://meditation.sirimangalo.org/</a></p>
<p>Our Website: <a href="https://www.sirimangalo.org/" target="_blank">https://www.sirimangalo.org/</a></p>
<p>Supporting This Work: <a href="https://www.sirimangalo.org/support" target="_blank">https://www.sirimangalo.org/support</a></p>
<p>Translations from: <a href="%s" target="_blank">%s</a></p>
<br><p>Contact: podcast@sirimangalo.org</p>
<br><p>Keywords: Buddhism, Dhamma, Buddha, Monk, Bhikkhu, Sirimangalo, Yuttadhammo, Tipitaka, Pali, %s</p>
""" % (verse, verse_english, video, video, sc_link, sc_link, chapter)
    return template

In [47]:
def create_summary(translated_verse):
    return '%s (Translated by Acharya Buddharakkhita)' % (translated_verse)

In [52]:
files_sizes = {}
with open('file-sizes.json') as f:
    files_sizes = json.loads(f.read())
    
#for v in videos:
#    audioUrl = 'https://almedia.blob.core.windows.net/audio/%s.mp3' % v['id']
#    
#    try:
#        files_sizes[v['id']] = int(requests.head(audioUrl).headers['Content-Length'])
#    except:
#        print('Error', v['id'])
#    
#    sleep(0.5)
#
#with open('file-sizes.json', 'w') as f:
#    f.write(json.dumps(files_sizes))

In [51]:
# for v in videos:
#    audioUrl = 'https://almedia.blob.core.windows.net/audio/%s.mp3' % v['id']
   
#    try:
#        files_sizes[v['id']] = int(requests.head(audioUrl).headers['Content-Length'])
#    except:
#        print('Error', v['id'])
   
#    sleep(0.5)

# with open('file-sizes.json', 'w') as f:
#    f.write(json.dumps(files_sizes))

In [53]:
with open('verses_english.json') as f:
    dhp_english = json.loads(f.read())

with open('verses_english_links.json') as f:
    verses_english_links = json.loads(f.read())

In [54]:
verses_english = []

all_nums = []
for v in verses:
    nums = []
    
    for line in v.split('\n'):
        temp = line.strip().replace('.', '')
        
        if re.match(r'^[0-9]+$', temp):
            nums.append(int(temp))
        
    all_nums += nums
    
    translation = ''
    
    for i in nums:
        translation += 'Verse %i ' % i
        translation += '<br>'
        translation += re.sub(r'^(SC [0-9]+)+', '', dhp_english[str(i)])
        translation += '<br><br>'

    verses_english.append([translation, verses_english_links[str(nums[0])]])

In [55]:
with open('chapters.txt') as f:
    chapters = [x for x in f.read().split('\n') if len(x) > 0]
    
chapters = chapters[::-1]

In [56]:
verses_updated = []

for verse in verses:
    lines = verse.split('\n')
    new_lines = []
    
    for line in lines:
        temp = line.strip().replace('.', '')
        
        if re.match(r'^[0-9]+$', temp):
            new_lines.append('Verse ' + temp)
        else: 
            new_lines.append(line)
        
    verses_updated.append('\n'.join(new_lines))

In [70]:
items = []
audio_metadata = {}

unique_chapters = list(unique_everseen(chapters))
current_episode = defaultdict(int)

for i in range(len(videos)):
    video = videos[::-1][i]
    verse = verses_updated[::-1][i]
    verse_english, translation_link = verses_english[::-1][i]
    chapter = chapters[i]

    # episode = len(videos) - i
    title = video['snippet']['title'].replace('&', '&amp;')
    link = 'https://youtu.be/' + video['id']
    tags = 'None' if not link in keywords else keywords[link]
    tags = tags.split(',')[:5]
    duration = isodate.parse_duration(video['contentDetails']['duration']).seconds
    pubDate = utils.format_datetime(parser.parse(video['snippet']['publishedAt']))
    guid = video['id']
    audioUrl = 'https://almedia.blob.core.windows.net/audio/%s.mp3' % video['id']
    audioType = 'audio/mpeg'
    audioFileSize = files_sizes[guid] if guid in files_sizes else 0
    subtitle = '%s from the Chapter "%s"' % (title.split(':')[0], chapter)
    content_encoded = create_desc(verse, verse_english, translation_link, link, chapter)
    summary = create_summary(verse_english)
    categories = ["Buddhism", "Dhamma", "Buddha", "Monk", "Bhikkhu", "Sirimangalo", "Yuttadhammo", "Tipitaka", "Pali", chapter]
    #season = unique_chapters.index(chapter) + 1
    #current_episode[season] += 1
    episode = i + 1
    
    if audioFileSize <= 0:
        print(link)
    
    pubDay = str(parser.parse(video['snippet']['publishedAt']))[:10]
    pubHour = parser.parse(video['snippet']['publishedAt']).hour
    pubMin = parser.parse(video['snippet']['publishedAt']).minute

    items.append({
        'pubDay': pubDay,
        'pubHour': pubHour,
        'pubMin': pubMin,
        'guid': guid,
        'title': title,
        'subtitle': subtitle,
        'contentEncoded': content_encoded,
        'description': summary,
        'pubDate': pubDate,
        'duration': duration,
        'audioUrl': audioUrl,
        'audioType': audioType,
        'audioSize': audioFileSize,
        'duration': duration,
        'categories': categories,
        'categoriesList': ','.join(categories),
        'episode': episode,
        #'season': season
    })
    
    audio_metadata[audioUrl] = {
        'guid': guid,
        'duration': duration,
        'type': audioType,
        'size': audioFileSize
    }


#items = items[::-1]

In [69]:
audio_metadata['https://almedia.blob.core.windows.net/audio/teUXh0wcU64.mp3']

{'guid': 'IWGVfVN7rKA',
 'duration': 981,
 'type': 'audio/mpeg',
 'size': 15689703}

In [64]:
with open('dhammapada-items.json', 'w') as f:
    f.write(json.dumps(items, indent=2))

In [71]:
with open('audio_metadata.json', 'w') as f:
    f.write(json.dumps(audio_metadata, indent=2))

In [69]:
description = """
<![CDATA[
<p>A series on the Dhammapada, a set of 423 verse teachings given by the Buddha, including explanation of the Pali verse, a synopsis of the background story and application of the teaching to our practice by Yuttadhammo Bhikkhu.</p>
<br>
<p>This Dhammapada Part I podcast includes Chapters 1-17 (Verses 1-234). Please subscribe to Dhammapada Part II for the verses in the rest of the chapters. New episodes are published each week in the Dhammapada Part II podcast.</p>
<p>Yuttadhammo Bhikkhu is a Canadian-born Theravada Buddhist monk, ordained in 2001 under the guidance of Venerable Ajaan Tong Sirimangalo. He has taught meditation courses in the Mahasi Sayadaw tradition since 2003 and gives online teachings via YouTube, as well as Dhamma talks to both intensive meditators and by invitation to the general public.</p>
<br>
<p>How To Meditate Booklet: <a href="https://htm.sirimangalo.org/" target="_blank"> https://htm.sirimangalo.org/</a></p>
<p>To join our meditation community and sign up for an at-home meditation course, please visit <a href="https://meditation.sirimangalo.org/" target="_blank">https://meditation.sirimangalo.org/</a></p>
<p>Our Website: <a href="https://www.sirimangalo.org/" target="_blank">https://www.sirimangalo.org/</a></p>
<p>Supporting This Work: <a href="https://www.sirimangalo.org/support" target="_blank">https://www.sirimangalo.org/support</a></p>
<p>YouTube: <a href="https://www.youtube.com/yuttadhammo" target="_blank">https://www.youtube.com/yuttadhammo</a></p>
<p>Keywords: Buddhism, Dhamma, Buddha, Monk, Bhikkhu, Sirimangalo, Yuttadhammo, Tipitaka, Pali</p>
]]>
"""
channel = {
    "lastBuildDate": utils.format_datetime(datetime.now()),
    "ownerName": "Sirimangalo International",
    "ownerEmail": "podcast@sirimangalo.org",
    "link": "https://podcast.sirimangalo.org/dhammapada/",
    "atomLink": "https://podcast.sirimangalo.org/dhammapada/rss.xml",
    "title": "Dhammapada",
    "description": description,
    "subtitle": "A series on the Dhammapada",
    "imageUrl": "fd",
    "category": "Religion &amp; Spirituality",
    "subcategory": "Buddhism",
    "author": "Yuttadhammo Bhikkhu",
    "language": "en",
    "type": "serial",
    "complete": "No",
    "episodes": items
}

In [70]:
with open('dhammapada-channel.json', 'w') as f:
    f.write(json.dumps(channel, indent=2))

In [71]:
items

[{'pubDay': '2011-08-06',
  'pubHour': 13,
  'pubMin': 18,
  'guid': 'lZZiE-EofAE',
  'title': 'Dhammapada Verse 1: Suffering is Mind-made',
  'subtitle': 'Dhammapada Verse 1 from the Chapter "The Pairs"',
  'contentEncoded': '<p>Dhammapada verse(s) in Pali:</p>\n<p>Verse 1<br>manopubbaṅgamā dhammā, manoseṭṭhā manomayā.<br>manasā ce paduṭṭhena, bhāsati vā karoti vā.<br>tato naṃ dukkhamanveti, cakkaṃva vahato padaṃ.<br></p>\n<p>English translation of verse(s):</p>\n<p>Verse 1<br>Mind precedes all mental states. Mind is their chief; they are all mind-wrought. If with an impure mind a person speaks or acts suffering follows him like the wheel that follows the foot of the ox.<br><br></p>\n<p>(Translated by Acharya Buddharakkhita)</p>\n<br><hr><br>\n<p>A series on the Dhammapada, a set of 423 verse teachings given by the Buddha, including explanation of the Pali verse, a synopsis of the background story and application of the teaching to our practice by Yuttadhammo Bhikkhu.</p>\n<p>This Dha

In [67]:
pd.DataFrame(items)[['title', 'subtitle', 'description', 'contentEncoded', 'audioUrl', 'pubDay', 'pubHour', 'pubMin', 'categoriesList', 'episode']].to_excel('episodes-sheet.xlsx')

In [25]:
#feed = Feed(
#    title = "Sample RSS Feed",
#    link = "http://www.example.com/rss",
#    description = "This is an example of how to use rfeed to generate an RSS 2.0 feed",
#    language = "en-US",
#    lastBuildDate = datetime.now(),
#    items = items,
#)
#with open('test-feed.xml', 'w') as f:
#    f.write(feed.rss())