In [61]:
import json
import dateutil

In [62]:
# Create a mapping of videos between channels

In [63]:
with open('videos_list_siri.json') as f:
    videos_list_siri = json.loads(f.read())

In [64]:
with open('videos_list_yutt.json') as f:
    videos_list_yutt = json.loads(f.read())

In [65]:
mapping_manual = {
    'e6108Dz6vHw': 'YATsIGQcfp8',
    'e4Ua2eDR9k8': 'R-S5w13OjRY',
    'RhjR1Fj5HiE': '3xwrFVO7d-I',
    'R4yTIFg62Wo': '8XZENpuvGc8',
    'W-vbP3RWlFA': 'GmCCqdp1ae4',
    'eW8FfJUj824': 'g8j_iIgP6mE',
    '87xmlIP9zpg': 'F6VlYzdrS_U', # video length shorter than original
    '4P3hspZ6LXM': 'aODw-l_KpW4',
    '345xt2D7gXw': 'n7arvhZHXtg', # video length shorter than original
    'YUhCwgq8mOg': 'buRcDhcn8Ko'
}

In [66]:
mapping_duration = []
mapping_on_title = []
multiple_matches = []
unknown_videos = []

for item_siri in videos_list_siri:
    if item_siri['id'] == 'IvERrjyIMb4':
        continue # skip welcome video

    if item_siri['id'] in mapping_manual:
        continue

    dt_siri = dateutil.parser.isoparse(item_siri['snippet']['publishedAt'])
    dur_siri = item_siri['contentDetails']['duration']
    
    candidates = []
    for item_yutt in videos_list_yutt:
        dt_yutt = dateutil.parser.isoparse(item_yutt['snippet']['publishedAt'])
        dur_yutt = item_yutt['contentDetails']['duration']
        
        if dt_yutt <= dt_siri and dur_yutt == dur_siri:
            candidates.append(item_yutt)
            
    if len(candidates) == 0:
        unknown_videos.append(item_siri)
    elif len(candidates) == 1:
        if candidates[0]['snippet']['title'] == item_siri['snippet']['title']:
            mapping_on_title.append([item_siri, candidates[0]])
        else:
            mapping_duration.append([item_siri, candidates[0]])
    elif len(candidates) > 1:
        title_match = False
        for c in candidates:
            if c['snippet']['title'] == item_siri['snippet']['title']:
                if title_match:
                    raise Error
                mapping_on_title.append([item_siri, c])
                title_match = True
        
        if not title_match:
            multiple_matches.append([item_siri, candidates])

In [67]:
len(mapping_duration)

17

In [68]:
# print duration mapping for manual check
for i, (x,y) in enumerate(mapping_duration):
    print(i, '\t', 'https://youtu.be/' + x['id'], 'https://youtu.be/' + y['id'])   
    print('\t', x['snippet']['title'], y['snippet']['title'])

0 	 https://youtu.be/RrsTPykeHGw https://youtu.be/VluD7eAbtLM
	 Q&A:  Anxiety and Depression Q&A: Anxiety and Depression
1 	 https://youtu.be/jyVPJ19lVmw https://youtu.be/KTl2ctFHTQE
	 Q&A: Avoidance of Someone Who Betrayed You Q&A: Avoidance
2 	 https://youtu.be/FY8f-XDh7ZM https://youtu.be/N3x9BQi9G7E
	 Dhammapada 2: Happiness is Mind-made Dhammapada Verse Two: Happiness Follows a Pure Mind
3 	 https://youtu.be/QypozUHuBq0 https://youtu.be/i4OWeSHv3NM
	 Weekly Q&A | 2018-08-08 Dhamma Q&A, Aug 8, 2018
4 	 https://youtu.be/bBn9QRBKxf0 https://youtu.be/Qqkd5Tt7q9g
	 Weekly Q&A | 2018-08-01 Questions and Answer, August 1, 2018
5 	 https://youtu.be/eQK2z57IVIE https://youtu.be/LnB93-gFnmw
	 Weekly Q&A | 2018-07-25 Q&A July 25, 2018
6 	 https://youtu.be/4bcB5X_hdmk https://youtu.be/HWkx-wUQuTE
	 Weekly Q&A | 2018-07-18 Weekly Q&A, July 18, 2018
7 	 https://youtu.be/KTTKAO9FGk0 https://youtu.be/YF3VEqtwmGQ
	 Weekly Q&A | 2018-07-11 Q&A July 11, 2018
8 	 https://youtu.be/0BpY84HcmMQ https://

In [69]:
unknown_videos

[{'kind': 'youtube#video',
  'etag': 'HGRsIrwRyTLqB5wUq_341YdBIT0',
  'id': '24TeXXsi75Q',
  'snippet': {'publishedAt': '2018-04-14T13:46:29Z',
   'channelId': 'UCFVuMJgj2QWlg-6JZ1ye9qA',
   'title': 'Full Moon Q&A | Mahāpuṇṇama Sutta (MN 109)',
   'description': 'Mahāpuṇṇama Sutta (MN 109) - The Great Full-moon Night Discourse can be found here: http://obo.genaud.net/dhamma-vinaya/ati/mn/mn.109.than.ati.htm\n \nTo ask questions and join our community, please visit http://meditation.sirimangalo.org/\nTo support our work, please visit http://www.sirimangalo.org/support/\n\nRe-upload of dhammatalk from March 31, 2018.\n\n.',
   'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/24TeXXsi75Q/default.jpg',
     'width': 120,
     'height': 90},
    'medium': {'url': 'https://i.ytimg.com/vi/24TeXXsi75Q/mqdefault.jpg',
     'width': 320,
     'height': 180},
    'high': {'url': 'https://i.ytimg.com/vi/24TeXXsi75Q/hqdefault.jpg',
     'width': 480,
     'height': 360}},
   'channelTitle

In [70]:
mapping = mapping_manual.copy()

for item_siri, item_yutt in mapping_duration:
    assert item_siri['id'] not in mapping
    mapping[item_siri['id']] = item_yutt['id']

for item_siri, item_yutt in mapping_on_title:
    assert item_siri['id'] not in mapping
    mapping[item_siri['id']] = item_yutt['id']

In [71]:
len(mapping)

111

In [72]:
with open('mapping_videos.json', 'w') as f:
    f.write(json.dumps(mapping))