In [21]:
# 导入模块
import json
import os
import pandas as pd
from tqdm import tqdm
from munch import Munch

# MovieNet

## 制作scene_annotation.pkl

In [7]:
anno_root = '/data/shared_dataset/MovieNet/annotations/annotation'

In [13]:
# 获取当前目录下的所有json文件名
json_files = [f for f in os.listdir(anno_root) if f.endswith('.json')]
# 创建一个空的DataFrame，用来存储json文件的数据
df = pd.DataFrame()
# 遍历每个json文件
for file in tqdm(json_files):
  # 读取json文件为一个字典
  with open(os.path.join(anno_root,file), 'r', encoding='utf-8') as f:
    data = json.load(f)
  # 检查scene字段是否为空
  if data['scene']:
    # 如果不为空，把文件名和scene字段添加到DataFrame中
    df = df.append({'file': file, 'scene': data['scene']}, ignore_index=True)


100%|██████████| 8918/8918 [00:16<00:00, 555.19it/s]


In [15]:
df

Unnamed: 0,file,scene
0,tt0079417.json,"[{'id': 'tt0079417_0000', 'shot': [0, 1], 'fra..."
1,tt1068680.json,"[{'id': 'tt1068680_0000', 'shot': [0, 1], 'fra..."
2,tt1409024.json,"[{'id': 'tt1409024_0000', 'shot': [0, 1], 'fra..."
3,tt0082971.json,"[{'id': 'tt0082971_0000', 'shot': [0, 1], 'fra..."
4,tt1411238.json,"[{'id': 'tt1411238_0000', 'shot': [0, 1], 'fra..."
...,...,...
343,tt0123755.json,"[{'id': 'tt0123755_0000', 'shot': [0, 1], 'fra..."
344,tt0209144.json,"[{'id': 'tt0209144_0000', 'shot': [0, 1], 'fra..."
345,tt0167260.json,"[{'id': 'tt0167260_0000', 'shot': [0, 1], 'fra..."
346,tt0049730.json,"[{'id': 'tt0049730_0000', 'shot': [0, 1], 'fra..."


In [22]:
scene_pickle = {}

for row in df.itertuples():
    vid = row.file[:-5]
    cache = []
    for scene in row.scene:
        del scene['id']
        del scene['action_tag']
        cache.append(Munch(scene))
    scene_pickle[vid] = cache

In [None]:
pd.to_pickle(scene_pickle, 'data/MovieNet/scene_annotation.pkl')

## 制作shot_annotation.pkl

In [54]:
shot_root = '/data/shared_dataset/MovieNet/annotations/shot'

In [56]:
# 创建一个空字典
shot_pickle = {}

# 遍历当前目录下的所有txt文件
import os
for file in tqdm(os.listdir(shot_root)):
    if file.endswith(".txt"):
        # 用不带后缀的文件名作为键
        key = file[:-4]
        # 打开文件并读取所有行
        with open(os.path.join(shot_root,file), "r") as f:
            lines = f.readlines()
        # 创建一个空列表
        value = []
        # 遍历每一行
        for line in lines:
            # 将每一行分割成整数列表
            nums = [int(x) for x in line.split()]
            # 取前两个整数并添加到列表中
            value.append(nums[:2])
        # 将键值对添加到字典中
        shot_pickle[key] = value

100%|██████████| 1100/1100 [00:02<00:00, 382.32it/s]


In [None]:
pd.to_pickle(shot_pickle, 'data/MovieNet/shot_annotation.pkl')

## 制作label_dict.pkl

In [42]:
shot_root = '/data/shared_dataset/MovieNet/annotations/shot'

In [44]:
import os
import glob

# 镜头数量
shot_num = {}

for fn in glob.glob(os.path.join(shot_root, '*.txt')):
    with open(fn) as f:
        shot_num[os.path.basename(fn).split('.')[0]] = sum(1 for line in f if line.strip())

print(shot_num)

{'tt0086250': 1560, 'tt0217869': 389, 'tt0265666': 833, 'tt5580036': 1452, 'tt1392190': 2585, 'tt4176826': 1635, 'tt0363589': 116, 'tt0209958': 1595, 'tt1292566': 1851, 'tt3960412': 2015, 'tt0350258': 1935, 'tt0068935': 936, 'tt0079945': 1445, 'tt0142342': 1566, 'tt2409818': 635, 'tt1371111': 3109, 'tt0250797': 1655, 'tt2053463': 934, 'tt0316654': 2087, 'tt0469494': 660, 'tt2980516': 1372, 'tt0944835': 1964, 'tt0115956': 1684, 'tt1255953': 478, 'tt0272152': 1437, 'tt3766394': 1288, 'tt0082971': 1489, 'tt3319920': 835, 'tt2884018': 897, 'tt2800240': 1772, 'tt0053221': 969, 'tt4781612': 1165, 'tt0117509': 1923, 'tt0116922': 1212, 'tt0119174': 1819, 'tt0103064': 2778, 'tt0373469': 1660, 'tt3553442': 1461, 'tt0098258': 876, 'tt1188729': 2439, 'tt1971325': 1425, 'tt1731141': 2007, 'tt0103855': 2151, 'tt2132285': 598, 'tt0103776': 1779, 'tt0120912': 1442, 'tt1220634': 1413, 'tt0083866': 1192, 'tt1131729': 2540, 'tt0294870': 1405, 'tt1324999': 2010, 'tt0116209': 1759, 'tt0383574': 2600, 'tt01

In [48]:
label_dict = {}
for vid,scenes in scene_pickle.items():
    if vid not in shot_num.keys(): continue
    first_shots = set([scene.shot[0] for scene in scenes] + [scene.shot[1] for scene in scenes])
    labels = [1 if i in first_shots else 0 for i in range(shot_num[vid])]
    label_dict[vid]=labels

In [49]:
len(label_dict)

318

In [None]:
pd.to_pickle(label_dict, 'data/MovieNet/label_dict.pkl')

# OVSD

## 制作label_dict.pkl

In [None]:
from tqdm import tqdm
import pandas as pd, os, numpy as np


join_path = lambda *p: os.path.join('/data/shared_dataset/OVSD', *p)
anno = pd.read_pickle(join_path('scene_annotation.pkl'))
shot_bound = pd.read_pickle(join_path('shot_annotation.pkl'))


# Nearest neighbor matching
label_dict = {vid:[] for vid in anno.keys()}
for vid in tqdm(anno.keys()):
    fixed_bounds = set()
    scene_boundaries = np.array(sorted(set(anno[vid][:,0])))
    shot_boundaries = np.array(shot_bound[vid][:,0])
    for shid, shb in enumerate(shot_boundaries):
        # Pair by distance
        sc = np.argmin(np.abs(scene_boundaries-shb))
        sh = np.argmin(np.abs(shot_boundaries-scene_boundaries[sc]))
        label_dict[vid].append(int(shid == sh))

pd.to_pickle(label_dict, 'data/OVSD/label_dict.pkl')