In [20]:
from glob import glob
from os import path
from mutagen.id3 import ID3, ID3NoHeaderError
from datetime import datetime
import re
import json

In [21]:
AUDIO_DIRS = ["2010", "Blessings", "Chanting", "Daily", "Dhammapada", "Internet", "Live", "Los Angeles Course", "Old", "Pali", "Stories", "Study", "Talks", "Television", "Thai", "Visuddhimagga", "ask-a-monk"]

In [41]:
res = []
all_tags = set()

for audio_dir in AUDIO_DIRS:
    for file in glob(path.join('./share.sirimangalo.org/', audio_dir, '*.mp3')):
        file_base = path.basename(file)

        meta = { 
            'file': path.join(audio_dir, file_base),
            'folder': audio_dir,
            'title': '', 
            'album': '',
            'year': '',
            'comment': '',
            'date_from_filename': ''
        }
        
        # get date from filename
        try:
            s = file_base.split('_')
            
            if len(s) > 1:
                if len(s[0]) == 6 and re.match('^[0-9]{6}', s[0]):
                    meta['date_from_filename'] = datetime.strptime(s[0][:6], "%y%m%d").isoformat()[:10]
                elif len(s[0]) == 8 and re.match('^[0-9]{8}', s[0]):
                    meta['date_from_filename'] = datetime.strptime(s[0][:8], "%Y%m%d").isoformat()[:10]
        except ValueError:
            pass
        
        try:
            # get data from id3 tags
            # ref: https://static.sirimangalo.org/diraudio/Yuttadhammo/rss.php
            i3tags = ID3(file)
            
            all_tags.update(i3tags.keys())
            
            if 'TIT2' in i3tags:
                meta['title'] = i3tags['TIT2'].text[0]
                
            if 'TALB' in i3tags:
                meta['album'] = i3tags['TALB'].text[0]
                
            if 'TDRC' in i3tags:
                year = int(str(i3tags['TDRC'].text[0]))
                
                if year <= 2020:
                    meta['year'] = str(year)

            comment1 = ''
            if 'COMM::XXX' in i3tags:
                comment1 = i3tags['COMM::XXX'].text[0]

            comment2 = ''
            if 'COMM::\x00\x00\x00' in i3tags:
                comment2 = i3tags['COMM::\x00\x00\x00'].text[0]

            comment3 = ''
            if 'COMM:ID3v1 Comment:eng' in i3tags:
                comment3 = i3tags['COMM:ID3v1 Comment:eng'].text[0]
                
            comment4 = ''
            if 'COMM:ID3v1 Comment:XXX' in i3tags:
                comment4 = i3tags['COMM:ID3v1 Comment:XXX'].text[0]
            
            comment = comment2
            
            if len(comment1) > len(comment):
                comment = comment1
            
            if not comment and (comment3 or comment4):
                comment = comment4 if len(comment4) > len(comment3) else comment3

            meta['comment'] = comment

            res.append(meta)
        except ID3NoHeaderError:
            pass

In [44]:
res

[{'file': '2010/100202_WhatIsMeditation.mp3',
  'folder': '2010',
  'title': 'What Is Meditation?',
  'album': '2010',
  'year': '',
  'comment': 'Discussion different types of meditation',
  'date_from_filename': '2010-02-02'},
 {'file': '2010/100204_IntroBuddhism.mp3',
  'folder': '2010',
  'title': 'Introduction To Buddhism',
  'album': '2010',
  'year': '',
  'comment': 'An introduction to the three most important things in the Buddhist religion.',
  'date_from_filename': '2010-02-04'},
 {'file': '2010/100218_Walking5.mp3',
  'folder': '2010',
  'title': 'Benefits of Walking Meditation',
  'album': '2010',
  'year': '',
  'comment': 'The five canonical benefits of walking meditation.',
  'date_from_filename': '2010-02-18'},
 {'file': '2010/100318_Visuddhi7.mp3',
  'folder': '2010',
  'title': 'Seven Stage in the Path to Purity',
  'album': '2010',
  'year': '',
  'comment': 'Outline of the step-by-step path to purification outlined in the TIpitaka and expanded upon in the treatise 

In [45]:
with open('metadata_old_feed.json', 'w') as f:
    f.write(json.dumps(res))