In [63]:
import pandas as pd
import json
import re
import isodate
import requests
from time import sleep
from dateutil import parser
from email import utils
from datetime import datetime
from rfeed import Item, Feed, iTunesItem, Guid, Enclosure

In [64]:
with open('../../metadata/youtube-api/videos_list_yutt.json') as f:
    videos_list_yutt = json.loads(f.read())

In [65]:
videos = [v for v in videos_list_yutt if 'Dhammapada' in v['snippet']['title']]
titles = [v['snippet']['title'] for v in videos]

In [66]:
numbers = []
mapping = {}

for i, title in enumerate(titles):
    m = re.findall(r'([0-9]+)', title)
    
    new_numbers = []
    if len(m) == 3:
        new_numbers = [int(x) for x in m]
    elif len(m) == 2:
        new_numbers = list(range(int(m[0]), int(m[1]) + 1))
    elif len(m) == 1:
        new_numbers = [int(m[0])]
    
    numbers += new_numbers
    
    for n in new_numbers:
        mapping[n] = videos[i]

In [67]:
[x for x in range(1, 224) if not x in numbers] # written numbers missing

[2, 3, 4, 5, 6, 7, 8, 9, 10]

In [68]:
with open('verses.txt') as f:
    verses = f.read().split('\n---\n')

In [69]:
exceptions = [207, 210, 198, 189, 190, 191, 184, 138, 139, 88, 10, 9, 8, 7, 6, 5, 4, 3, 2]

for verse in verses:
    for num in [int(n) for n in re.findall(r'[0-9]+', verse)]:
        assert (num in exceptions or str(num) in mapping[num]['snippet']['title'])
        
last_nums = [224]
for i, verse in enumerate(verses):
    nums = sorted([int(n) for n in re.findall(r'[0-9]+', verse)])
    for num in nums:
        assert (num in exceptions or str(num) in videos[i]['snippet']['title'])
    
    if (set(nums) - set(exceptions)) != set():
        # check order
        assert nums[0] < last_nums[0]
    
    last_nums = nums

In [70]:
df = pd.read_json('../../metadata/subtitles/top_keywords_from_subtitles.json')
keywords = { row.link: row.keywords for row in df.itertuples() }

In [71]:
def create_desc(verse, video, tags):
    template = """%s

Part of a series on the Dhammapada, a set of 423 verse teachings given by the Buddha, including explanation of the Pali verse, a synopsis of the background story and application of the teaching to our practice.

Original YouTube-Video: <![CDATA[<a href="%s" target="_blank">%s</a>]]>
For more information on the Dhammapada: <![CDATA[<a href="http://www.buddhanet.net/e-learning/buddhism/dhammapada.htm" target="_blank">http://www.buddhanet.net/e-learning/buddhism/dhammapada.htm</a>]]>
Automatic Keywords: %s
""" % (verse, video, video, tags)
    return template

In [72]:
#files_sizes = {}
    
#for v in videos:
#    audioUrl = 'https://almedia.blob.core.windows.net/audio/%s.mp3' % v['id']
#    
#    try:
#        fileSizes[v['id']] = int(requests.head(audioUrl).headers['Content-Length'])
#    except:
#        print('Error', v['id'])
#    
#    sleep(0.5)

#with open('file-sizes.json', 'w') as f:
#    f.write(json.dumps(files_sizes))

In [73]:
items = []

for i in range(len(videos)):
    episode = len(videos) - i
    link = 'https://youtu.be/' + videos[i]['id']
    tags = 'None' if not link in keywords else keywords[link]
    tags = ', '.join(tags.split(',')[:5])
    desc = create_desc(verses[i], link, tags)
    duration = isodate.parse_duration(videos[i]['contentDetails']['duration']).seconds
    pubDate = utils.format_datetime(parser.parse(videos[i]['snippet']['publishedAt']))
    guid = videos[i]['id']
    audioUrl = 'https://almedia.blob.core.windows.net/audio/%s.mp3' % videos[i]['id']
    audioType = 'audio/mpeg'
    audioFileSize = fileSizes[guid] if guid in fileSizes else 0

    items.append({
        'episode': episode,
        'guid': guid,
        'title': videos[i]['snippet']['title'],
        'description': desc,
        'pubDate': pubDate,
        'duration': duration,
        'audioUrl': audioUrl,
        'audioType': audioType,
        'audioFileSize': audioFileSize
    })

In [74]:
with open('dhammapada-items.json', 'w') as f:
    f.write(json.dumps(items, indent=2))

In [49]:
#feed = Feed(
#    title = "Sample RSS Feed",
#    link = "http://www.example.com/rss",
#    description = "This is an example of how to use rfeed to generate an RSS 2.0 feed",
#    language = "en-US",
#    lastBuildDate = datetime.now(),
#    items = items,
#)
#with open('test-feed.xml', 'w') as f:
#    f.write(feed.rss())