In [1]:
import json
import random
import numpy as np
from firebase import Firebase
from tqdm import tqdm

In [2]:
with open('src/Data/newtest_trope10.json', 'r') as f:
    test_data = json.load(f)
    f.close()

In [3]:
print ('#data in test:', len(test_data))

#data in test: 243


In [4]:
# example of data format
# {
#     "data-video-sub": "",

#     "data-video-tropename": "Bittersweet Ending",

#     "data-video-descrip": "After the day and Peach are saved and all is well, Mario is about to pop the question to Peach when Bowser intervenes. When the two argue for quite some time, Peach shuts them up and rejects both of them, but does offer a ride back home which Mario happily accepts, but Bowser isn't lucky.",

#     "data-video-name": "5375.mp4"
# }

In [5]:
test_data[0]

{'data-video-sub': '',
 'data-video-tropename': 'Bittersweet Ending',
 'data-video-descrip': "After the day and Peach are saved and all is well, Mario is about to pop the question to Peach when Bowser intervenes. When the two argue for quite some time, Peach shuts them up and rejects both of them, but does offer a ride back home which Mario happily accepts, but Bowser isn't lucky.",
 'data-video-name': '5375.mp4'}

## Create trope2category mapper

In [6]:
with open('src/Data/category.json', 'r') as f:
    tropes = json.load(f)
    f.close()

In [7]:
tropes

{'Character trait': ['Establishing Character Moment',
  'The Hero',
  'Big Bad',
  'One-Winged Angel',
  'Villain Song',
  'Face\x96Heel Turn',
  'Big Damn Heroes',
  'An Ice Person',
  'Animal-Themed Superbeing',
  'Big Eater',
  'Satanic Archetype',
  'Hoist by His Own Petard',
  'Eldritch Abomination',
  'Arc Villain',
  'Bad Boss',
  'Transformation Sequence',
  'Lethal Chef'],
 'Role Interaction': ['Abusive Parents',
  '"The Reason You Suck" Speech',
  'Curb-Stomp Battle',
  'Fusion Dance',
  'Showdown at High Noon',
  'Bad Boss',
  'Even Evil Has Standards',
  'Totem Pole Trench',
  'Right Behind Me',
  'Catch Phrase'],
 'Specific scene or object': ['Nightmare Face',
  'Playing with Fire',
  'An Ice Person',
  'Eldritch Location',
  'Latex Perfection',
  'Vomit Indiscretion Shot',
  "Suck E. Cheese's",
  'Shock and Awe',
  'Making a Splash',
  'Christmas Songs',
  'Logo Joke',
  'Slippy-Slidey Ice World',
  'Botanical Abomination',
  'Summon Magic',
  'Totem Pole Trench',
  'Pape

In [8]:
categories = list(tropes.keys())

In [9]:
total_num_tropes = 0
trope2category = {}
category_statistics = {}
for category in categories:
    print ('[%s] - %d tropes' % (category, len(tropes[category])))
    category_statistics[category] = len(tropes[category])
    for trope in tropes[category]:
        trope2category[trope] = category
        # print('- %s' % (trope))
    total_num_tropes += len(tropes[category])

[Character trait] - 17 tropes
[Role Interaction] - 10 tropes
[Specific scene or object] - 18 tropes
[Story understanding] - 37 tropes
[Situation understanding] - 51 tropes
[Sentiment analysis] - 14 tropes
[Audio] - 22 tropes
[Definition understanding] - 8 tropes


In [10]:
print ('Total #category:', len(categories))
print ('Total #trope:', total_num_tropes)

Total #category: 8
Total #trope: 177


In [11]:
test_videos = list(set([ x['data-video-name'] for x in test_data ]))
test_tropes = list(set([ x['data-video-tropename'] for x in test_data ]))
test_categories = list(set([ trope2category[trope] for trope in test_tropes ]))

In [12]:
print ('#video in test:', len(test_videos))
print ('#trope in test:', len(test_tropes))
print ('#category in test:', len(test_categories))

#video in test: 243
#trope in test: 109
#category in test: 8


In [13]:
test_category_statistics = {}
for trope in test_tropes:
    category = trope2category[trope]
    if category not in test_category_statistics:
        test_category_statistics[category] = 1
    else:
        test_category_statistics[category] += 1

for category in test_category_statistics:
    print ('[%s] - %d/%d (%.1f %%)' % (category, 
        test_category_statistics[category], category_statistics[category],
        test_category_statistics[category] / category_statistics[category] * 100))

[Audio] - 18/22 (81.8 %)
[Situation understanding] - 30/51 (58.8 %)
[Specific scene or object] - 14/18 (77.8 %)
[Story understanding] - 23/37 (62.2 %)
[Character trait] - 8/17 (47.1 %)
[Sentiment analysis] - 6/14 (42.9 %)
[Definition understanding] - 7/8 (87.5 %)
[Role Interaction] - 3/10 (30.0 %)


In [14]:
test_videos

['16720.mp4',
 '24773.mp4',
 '317.mp4',
 '18749.mp4',
 '41910.mp4',
 '24843.mp4',
 '23274.mp4',
 '10852.mp4',
 '24769.mp4',
 '2676.mp4',
 '9847.mp4',
 '6011.mp4',
 '33302.mp4',
 '5036.mp4',
 '40496.mp4',
 '1675.mp4',
 '15086.mp4',
 '12146.mp4',
 '13289.mp4',
 '6800.mp4',
 '15223.mp4',
 '11306.mp4',
 '34076.mp4',
 '11845.mp4',
 '40596.mp4',
 '1830.mp4',
 '27223.mp4',
 '5038.mp4',
 '6170.mp4',
 '9851.mp4',
 '4490.mp4',
 '10777.mp4',
 '25415.mp4',
 '6538.mp4',
 '3418.mp4',
 '30816.mp4',
 '25908.mp4',
 '13452.mp4',
 '9342.mp4',
 '13450.mp4',
 '28062.mp4',
 '15021.mp4',
 '18902.mp4',
 '10791.mp4',
 '5189.mp4',
 '6182.mp4',
 '25303.mp4',
 '4143.mp4',
 '3307.mp4',
 '40608.mp4',
 '13291.mp4',
 '39904.mp4',
 '19356.mp4',
 '13662.mp4',
 '4882.mp4',
 '3846.mp4',
 '5288.mp4',
 '11726.mp4',
 '9024.mp4',
 '6188.mp4',
 '7731.mp4',
 '25133.mp4',
 '6192.mp4',
 '6041.mp4',
 '17270.mp4',
 '732.mp4',
 '6844.mp4',
 '27122.mp4',
 '9008.mp4',
 '28941.mp4',
 '9048.mp4',
 '14341.mp4',
 '25904.mp4',
 '1685.mp4'

In [15]:
with open('test_videos.txt', 'w') as f:
    for video in test_videos:
        f.write('%s\n' % (video.strip()))
    f.close()

In [16]:
with open('trope2category.json', 'w') as f:
    json.dump(trope2category, f)
    f.close()

## Create options for each video

In [26]:
for idx, data in tqdm(enumerate(test_data), total=len(test_data), desc='create options'):
    target_trope = data['data-video-tropename']
    target_category = trope2category[target_trope];
    options = [ target_trope ]
    
    tropes_with_same_category = list(filter(lambda tr: tr != target_trope, tropes[target_category]))
    max_num_sample_from_same_category = min(2, len(tropes_with_same_category))
    same_catrgory_options = np.random.choice(tropes_with_same_category, max_num_sample_from_same_category, replace=False)
    options = options + same_catrgory_options.tolist()
    
    other_categories = list(filter(lambda c: c != target_category, list(tropes.keys())))
    while len(options) < 5:
        sampled_category = other_categories[random.randrange(len(other_categories))]
        sampled_trope = tropes[sampled_category][random.randrange(len(tropes[sampled_category]))]
        if sampled_trope not in options:
            options.append(sampled_trope)
    options = list(set(options))
    
    try:
        assert len(options) == 5, '#option should be 5!'
    except:
        print ('Error')
        print (options)
    
    data['options'] = options
    

create options: 100%|██████████| 243/243 [00:00<00:00, 4609.00it/s]


In [27]:
with open('src/Data/newtest_trope10_with_options.json', 'w') as f:
    json.dump(test_data, f)
    f.close()

## Create video2url mapper

In [15]:
config = {
    "apiKey": "AIzaSyCL5liImL_Nl2QIOi568pw9KxMEERVpT_c",
    "authDomain": "trope-annotation.firebaseapp.com",
    "storageBucket": "trope-annotation.appspot.com",
    "databaseURL": "https://trope-annotation-default-rtdb.firebaseio.com",
    "serviceAccount": "trope-annotation-firebase-adminsdk-rdbls-1b8ab39545.json"
}

In [16]:
firebase = Firebase(config)

In [17]:
storage = firebase.storage()

In [33]:
video2url = {}
for file in storage.list_files():
    video2url[file.name] = storage.child(file.name).get_url(None)

In [34]:
with open('video2url.json', 'w') as f:
    json.dump(video2url, f)
    f.close()