In [1]:
# Load MVBench dataset

import argparse
import itertools
import json
import os
import random
import time
from functools import partial

import cv2
import imageio
import torchvision.transforms as T
import numpy as np
import torch
from decord import VideoReader, cpu
from PIL import Image
from torch.utils.data import Dataset
from torchvision.transforms.functional import InterpolationMode
from tqdm import tqdm



#shares/SR004.nfs2/lipengyi/1_Checked_idea

data_list = {
    'Action Sequence': ('action_sequence.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/star/star/Charades_v1_480/', 'video', True),
    # has start & end
    'Action Prediction': ('action_prediction.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/star/star/Charades_v1_480/', 'video', True),
    'Action Antonym': ('action_antonym.json', '/home/jovyanshares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/ssv2_video/ssv2_video/', 'video', False),
    'Fine-grained Action': ('fine_grained_action.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/Moments_in_Time_Raw/Moments_in_Time_Raw/videos/', 'video', False),
    'Unexpected Action': ('unexpected_action.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/FunQA_test/FunQA_test/test/', 'video', False),
    'Object Existence': ('object_existence.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/clevrer/clevrer/video_validation/', 'video', False),
    'Object Interaction': ('object_interaction.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/star/star/Charades_v1_480/', 'video', True),
    # has start & end
    'Object Shuffle': ('object_shuffle.json', '/home/jovyanshares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/perception/perception/videos/', 'video', False),
    'Moving Direction': ('moving_direction.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/clevrer/clevrer/video_validation/', 'video', False),
    'Action Localization': ('action_localization.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/sta/sta/sta_video/', 'video', True),
    # has start & end
    'Scene Transition': ('scene_transition.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/scene_qa/scene_qa/video/', 'video', False),
    'Action Count': ('action_count.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/perception/perception/videos/', 'video', False),
    'Moving Count': ('moving_count.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/clevrer/clevrer/video_validation/', 'video', False),
    'Moving Attribute': ('moving_attribute.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/clevrer/clevrer/video_validation/', 'video', False),
    'State Change': ('state_change.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/perception/perception/videos/', 'video', False),
    'Fine-grained Pose': ('fine_grained_pose.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/nturgbd/nturgbd/', 'video', False),
    'Character Order': ('character_order.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/perception/perception/videos/', 'video', False),
    'Egocentric Navigation': ('egocentric_navigation.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/vlnqa/vlnqa/', 'video', False),
    'Episodic Reasoning': ('episodic_reasoning.json', '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/tvqa/tvqa/frames_fps3_hq/', 'frame', True),
    # has start & end, read frame
    'Counterfactual Inference': (
    'counterfactual_inference.json', '1_Checked_idea/MVBench/video/clevrer/clevrer/video_validation/', 'video', False),
    
}

data_dir = '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/json'

class MVBenchDataset(torch.utils.data.Dataset):

    def __init__(self, data_dir, data_list, prompt, question_prompt, num_segments=16, input_size=224,
                 dynamic_image_size=False, use_thumbnail=False, max_num=6):
        
        self.data_list = []

        self.prompt = prompt
        self.question_prompt = question_prompt
        self.input_size = input_size
        self.num_segments = num_segments
        self.dynamic_image_size = dynamic_image_size
        self.use_thumbnail = use_thumbnail
        self.max_num = max_num
        #self.transform = build_transform(is_train=False, input_size=input_size)
        
        for k, v in data_list.items():
            with open(os.path.join(data_dir, v[0]), 'r') as f:
                json_data = json.load(f)
            for data in json_data:
                video_path = os.path.join(v[1], data['video'])
                if os.path.exists(video_path):  # 只添加存在的视频文件对应的数据
                    self.data_list.append({
                        'task_type': k,
                        'prefix': v[1],
                        'data_type': v[2],
                        'bound': v[3],
                        'data': data
                    })

        self.decord_method = {
            'video': self.read_video,
            'gif': self.read_gif,
            'frame': self.read_frame,
        }

    def __len__(self):
        return len(self.data_list)

    def __str__(self):
        len_list = {}
        option_list = {}
        for data in self.data_list:
            if data['task_type'] not in len_list:
                len_list[data['task_type']] = 0
            len_list[data['task_type']] += 1
            if data['task_type'] not in option_list:
                option_list[data['task_type']] = 0
            option_list[data['task_type']] += len(data['data']['candidates'])

        correct = 0
        total = 0
        res = f'There are {len(self.data_list)} videos as follow:\n'
        for k, v in len_list.items():
            correct += len_list[k]
            total += option_list[k]
            res += f'{v} for {k} ({option_list[k]} options => {len_list[k] / option_list[k] * 100:.2f}%)\n'
            correct = correct + 1 / option_list[k]
        res += f'Total random accuracy: {correct / total * 100:.2f}%'
        return res.rstrip()

    def get_index(self, bound, fps, max_frame, first_idx=0):
        if bound:
            start, end = bound[0], bound[1]
        else:
            start, end = -100000, 100000
        start_idx = max(first_idx, round(start * fps))
        end_idx = min(round(end * fps), max_frame)
        seg_size = float(end_idx - start_idx) / self.num_segments
        frame_indices = np.array([
            int(start_idx + (seg_size / 2) + np.round(seg_size * idx))
            for idx in range(self.num_segments)
        ])
        return frame_indices

    def read_video(self, video_path, bound=None):
        vr = VideoReader(video_path, ctx=cpu(0), num_threads=1)
        max_frame = len(vr) - 1
        fps = float(vr.get_avg_fps())

        images_group = list()
        frame_indices = self.get_index(bound, fps, max_frame, first_idx=0)
        for frame_index in frame_indices:
            img = Image.fromarray(vr[frame_index].asnumpy())
            images_group.append(img)

        return images_group

    def read_gif(self, video_path, bound=None, fps=25):
        gif = imageio.get_reader(video_path)
        max_frame = len(gif) - 1

        images_group = list()
        frame_indices = self.get_index(bound, fps, max_frame, first_idx=0)
        for index, frame in enumerate(gif):
            if index in frame_indices:
                img = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
                img = Image.fromarray(img)
                images_group.append(img)

        return images_group

    def read_frame(self, video_path, bound=None, fps=3):
        max_frame = len(os.listdir(video_path))
        images_group = list()
        frame_indices = self.get_index(bound, fps, max_frame, first_idx=1)  # frame_idx starts from 1
        for frame_index in frame_indices:
            img = Image.open(os.path.join(video_path, f'{frame_index:05d}.jpg'))
            images_group.append(img)

        return images_group

    def qa_template(self, data):
        question = f"Question: {data['question']}\n"
        question += 'Options:\n'
        answer = data['answer']
        answer_idx = -1
        for idx, c in enumerate(data['candidates']):
            question += f"({chr(ord('A') + idx)}) {c}\n"
            if c == answer:
                answer_idx = idx
        question = question.rstrip()
        answer = f"({chr(ord('A') + answer_idx)}) {answer}"
        return question, answer

    def __getitem__(self, idx):
        data_item = self.data_list[idx]
        video_path = os.path.join(data_item['prefix'], data_item['data']['video'])
        
        if not os.path.exists(video_path):
            print(f"Warning: Video file not found at {video_path}, skipping this item.")
            return None  # 或者返回一个特定的占位符数据结构
    
        decord_method = self.decord_method[data_item['data_type']]
        bound = None
        if data_item['bound']:
            bound = (data_item['data']['start'], data_item['data']['end'])
    
        image_list = decord_method(video_path, bound)
        if not image_list:  # 检查返回的列表是否为空
            print(f"Warning: No images were extracted from video at {video_path}, skipping this item.")
            return None  # 或者返回一个特定的占位符数据结构
    
        # 处理图像和问题-答案数据
        #special_tokens = '\n'.join(['Frame{}: <image>'.format(i + 1) for i in range(len(image_list))])
        question, answer = self.qa_template(data_item['data'])
        question = self.prompt + '\n' + question + self.question_prompt
        #question = question + self.question_prompt
    
        #raw_images = []
        #num_patches_list = []
        #pixel_values = []
        #for image in image_list:
            #raw_images.append(image)
            #if self.dynamic_image_size:
                #patches = dynamic_preprocess(image, image_size=self.input_size,
                                             #use_thumbnail=self.use_thumbnail,
                                             #max_num=self.max_num)
            #else:
                #patches = [image]
            #num_patches_list.append(len(patches))
            #pixel_values.extend([self.transform(patch) for patch in patches])
    
        #if not pixel_values:  # 检查是否有有效的像素值数据
            #return None  # 返回占位符或None
    
        #pixel_values = torch.stack(pixel_values)
    
        return {
            'question': question,
            'video_path': video_path,
            #'pixel_values': pixel_values,
            'answer': answer,
            'task_type': data_item['task_type']
        }

In [2]:
prompt = 'Carefully watch the video and pay attention to the cause and sequence of events, the detail and movement of objects, and the action and pose of persons. Based on your observations, select the best option that accurately addresses the question.\n'
question_prompt = '\nOnly give the best option.'

dataset = MVBenchDataset(
    data_dir=data_dir,          
    data_list=data_list, 
    prompt = prompt,
    question_prompt=question_prompt,
    num_segments=16,              
    input_size=448,              
    dynamic_image_size=True,     
    use_thumbnail=False,          
    max_num=6                     
)

In [3]:
print(dataset)

There are 3186 videos as follow:
188 for Action Sequence (752 options => 25.00%)
200 for Action Prediction (800 options => 25.00%)
200 for Fine-grained Action (800 options => 25.00%)
200 for Unexpected Action (800 options => 25.00%)
198 for Object Existence (594 options => 33.33%)
200 for Object Interaction (800 options => 25.00%)
200 for Moving Direction (800 options => 25.00%)
200 for Action Localization (800 options => 25.00%)
200 for Scene Transition (800 options => 25.00%)
200 for Action Count (600 options => 33.33%)
200 for Moving Count (800 options => 25.00%)
200 for Moving Attribute (601 options => 33.28%)
200 for State Change (600 options => 33.33%)
200 for Character Order (600 options => 33.33%)
200 for Egocentric Navigation (800 options => 25.00%)
200 for Episodic Reasoning (1000 options => 20.00%)
Total random accuracy: 26.67%


In [4]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from models import build_model

Please 'pip install apex'
Please 'pip install apex'
Please 'pip install apex'
Please 'pip install apex'


In [5]:
model_path = "/home/jovyan/shares/SR004.nfs2/chekalina/LaVIT/VideoLaVIT/models/language_model_sft"#"/home/jinyang06/models/VideoLaVIT-v1/language_model_sft"
model_dtype='bf16'

max_video_clips = 16
device_id = 0
torch.cuda.set_device(device_id)
device = torch.device('cuda')

seed = 42
#torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# For Multi-Modal Understanding
#runner = build_model(model_path=model_path, model_dtype=model_dtype, understanding=True, 
#        device_id=device_id, use_xformers=False, max_video_clips=max_video_clips,)

In [7]:
dict_ = torch.load("/home/jovyan/shares/SR004.nfs2/chekalina/LaVIT/VideoLaVIT/custom_ckpts/visual_tokenizer/model.pth")
#runner.model.model.visual_tokenizer.load_state_dict(dict_)

In [8]:
import transformers

tokenizer = transformers.LlamaTokenizer.from_pretrained(model_path, use_fast=False, padding_side='left')
tokenizer.pad_token = tokenizer.unk_token

In [9]:
import torch
from torch.utils.data import DataLoader
from functools import partial


def collate_fn(batches, tokenizer):
    
    questions = [_['question'] for _ in batches]
    video_path = [_['video_path'] for _ in batches]
    answer = [_['answer'] for _ in batches]
    task_types = [_['task_type'] for _ in batches]
    
    return questions, video_path, answer, task_types

dataloader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=1,
        num_workers=1,
        pin_memory=True,
        drop_last=False,
        collate_fn=partial(collate_fn, tokenizer=tokenizer)
    )

first_batch = next(iter(dataloader))


question = first_batch[0]
video_path = first_batch[1]
answer = first_batch[2]
task_type = first_batch[3]

print(question)
print(answer)
print(video_path)
print(task_type)

['Carefully watch the video and pay attention to the cause and sequence of events, the detail and movement of objects, and the action and pose of persons. Based on your observations, select the best option that accurately addresses the question.\n\nQuestion: What happened after the person took the food?\nOptions:\n(A) Ate the medicine.\n(B) Tidied up the blanket.\n(C) Put down the cup/glass/bottle.\n(D) Took the box.\nOnly give the best option.']
['(A) Ate the medicine.']
['/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/star/star/Charades_v1_480/ZS9XR.mp4']
['Action Sequence']


In [10]:
from transformers import AutoModel, AutoTokenizer, AutoProcessor

video_path = '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/star/star/Charades_v1_480/ZS9XR.mp4'
prompt = "What happened after the person took the food?\nOptions:\n(A) Ate the medicine.\n(B) Tidied up the blanket.\n(C) Put down the cup/glass/bottle.\n(D) Took the box.\nOnly give the best option."
answer = "(A) Ate the medicine."
output = runner({"video": video_path, "text_input": question[0].split('Question:')[1]}, length_penalty=1, \
        use_nucleus_sampling=True, num_beams=1, max_length=512, temperature=1.0)[0]
print(output)



::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::


In [10]:
output.strip()

'C. Put down the cup/glass/bottle.'

In [None]:
generation_config = dict(max_new_tokens=1024, do_sample=False)
epoch = 1

y_pred = []
y_real = []

progress_bar = tqdm(
        dataloader, total=len(dataloader), desc=f"Epoch {epoch}/1"
    )

for step, batch in enumerate(progress_bar, start=1):
    print (step)
    question = batch[0]
    video_path = batch[1]
    answer = batch[2]
    task_type = batch[3]

    # print(pixel_values.size())
    # print(question)
    # print(answer)
    # print(num_patches_list)
    # print(task_type)
    
    # with autocast():
        # 执行推理
    outputs = runner({"video": video_path, "text_input": question[0].split('Question: ')[1]}, length_penalty=1, \
        use_nucleus_sampling=True, num_beams=1, max_length=512, temperature=1.0)[0]

    y_pred.append(outputs.strip())
    y_real.append(answer[0].strip())
    if ((step-1)%100 == 0):
        print("="*20 + "output" + "="*20)
        print(outputs)
        print("="*20 + "real answers" + "="*20)
        print(answer[0], flush = True)

Epoch 1/1:   0%|          | 0/3186 [00:00<?, ?it/s]

1
(C) Put down the cup/glass/bottle.
(A) Ate the medicine.


Epoch 1/1:   0%|          | 1/3186 [00:07<7:04:13,  7.99s/it]

2


Epoch 1/1:   0%|          | 2/3186 [00:11<4:51:35,  5.49s/it]

3


Epoch 1/1:   0%|          | 3/3186 [00:16<4:27:28,  5.04s/it]

4


Epoch 1/1:   0%|          | 4/3186 [00:18<3:28:27,  3.93s/it]

5


Epoch 1/1:   0%|          | 5/3186 [00:22<3:38:45,  4.13s/it]

6


Epoch 1/1:   0%|          | 6/3186 [00:27<3:48:50,  4.32s/it]

7


Epoch 1/1:   0%|          | 7/3186 [00:35<4:42:29,  5.33s/it]

8


Epoch 1/1:   0%|          | 8/3186 [00:43<5:29:16,  6.22s/it]

9


Epoch 1/1:   0%|          | 9/3186 [00:45<4:30:42,  5.11s/it]

10


Epoch 1/1:   0%|          | 10/3186 [00:51<4:31:56,  5.14s/it]

11


Epoch 1/1:   0%|          | 11/3186 [00:52<3:37:52,  4.12s/it]

12


Epoch 1/1:   0%|          | 12/3186 [00:54<2:54:57,  3.31s/it]

13


Epoch 1/1:   0%|          | 13/3186 [00:55<2:20:49,  2.66s/it]

14


Epoch 1/1:   0%|          | 14/3186 [00:58<2:25:44,  2.76s/it]

15


Epoch 1/1:   0%|          | 15/3186 [01:01<2:32:46,  2.89s/it]

16


Epoch 1/1:   1%|          | 16/3186 [01:04<2:30:35,  2.85s/it]

17


Epoch 1/1:   1%|          | 17/3186 [01:09<3:00:08,  3.41s/it]

18


Epoch 1/1:   1%|          | 18/3186 [01:11<2:36:01,  2.95s/it]

19


Epoch 1/1:   1%|          | 19/3186 [01:13<2:28:01,  2.80s/it]

20


Epoch 1/1:   1%|          | 20/3186 [01:16<2:37:46,  2.99s/it]

21


Epoch 1/1:   1%|          | 21/3186 [01:19<2:37:21,  2.98s/it]

22


Epoch 1/1:   1%|          | 22/3186 [01:21<2:08:51,  2.44s/it]

23


Epoch 1/1:   1%|          | 23/3186 [01:23<2:10:50,  2.48s/it]

24


Epoch 1/1:   1%|          | 24/3186 [01:26<2:16:16,  2.59s/it]

25


Epoch 1/1:   1%|          | 25/3186 [01:29<2:18:51,  2.64s/it]

26


Epoch 1/1:   1%|          | 26/3186 [01:30<1:57:07,  2.22s/it]

27


Epoch 1/1:   1%|          | 27/3186 [01:33<2:12:30,  2.52s/it]

28


Epoch 1/1:   1%|          | 28/3186 [01:35<1:58:46,  2.26s/it]

29


Epoch 1/1:   1%|          | 29/3186 [01:38<2:07:19,  2.42s/it]

30


Epoch 1/1:   1%|          | 30/3186 [01:40<2:06:55,  2.41s/it]

31


Epoch 1/1:   1%|          | 31/3186 [01:43<2:11:38,  2.50s/it]

32


Epoch 1/1:   1%|          | 32/3186 [01:47<2:42:50,  3.10s/it]

33


Epoch 1/1:   1%|          | 33/3186 [01:49<2:24:20,  2.75s/it]

34


Epoch 1/1:   1%|          | 34/3186 [01:52<2:22:18,  2.71s/it]

35


Epoch 1/1:   1%|          | 35/3186 [01:54<2:22:34,  2.71s/it]

36


Epoch 1/1:   1%|          | 36/3186 [01:58<2:31:36,  2.89s/it]

37


Epoch 1/1:   1%|          | 37/3186 [02:00<2:27:35,  2.81s/it]

38


Epoch 1/1:   1%|          | 38/3186 [02:04<2:37:19,  3.00s/it]

39


Epoch 1/1:   1%|          | 39/3186 [02:07<2:42:02,  3.09s/it]

40


Epoch 1/1:   1%|▏         | 40/3186 [02:10<2:39:33,  3.04s/it]

41


Epoch 1/1:   1%|▏         | 41/3186 [02:12<2:26:36,  2.80s/it]

42


Epoch 1/1:   1%|▏         | 42/3186 [02:14<2:04:52,  2.38s/it]

43


Epoch 1/1:   1%|▏         | 43/3186 [02:18<2:30:47,  2.88s/it]

44


Epoch 1/1:   1%|▏         | 44/3186 [02:22<2:54:04,  3.32s/it]

45


Epoch 1/1:   1%|▏         | 45/3186 [02:25<2:50:18,  3.25s/it]

46


Epoch 1/1:   1%|▏         | 46/3186 [02:28<2:50:33,  3.26s/it]

47


Epoch 1/1:   1%|▏         | 47/3186 [02:32<2:54:53,  3.34s/it]

48


Epoch 1/1:   2%|▏         | 48/3186 [02:34<2:37:59,  3.02s/it]

49


Epoch 1/1:   2%|▏         | 49/3186 [02:38<2:48:09,  3.22s/it]

50


Epoch 1/1:   2%|▏         | 50/3186 [02:41<2:46:40,  3.19s/it]

51


Epoch 1/1:   2%|▏         | 51/3186 [02:45<3:04:19,  3.53s/it]

52


Epoch 1/1:   2%|▏         | 52/3186 [02:48<2:53:23,  3.32s/it]

53


Epoch 1/1:   2%|▏         | 53/3186 [02:51<2:45:16,  3.17s/it]

54


Epoch 1/1:   2%|▏         | 54/3186 [02:57<3:23:24,  3.90s/it]

55


Epoch 1/1:   2%|▏         | 55/3186 [03:01<3:26:08,  3.95s/it]

56


Epoch 1/1:   2%|▏         | 56/3186 [03:04<3:15:58,  3.76s/it]

57


Epoch 1/1:   2%|▏         | 57/3186 [03:10<3:49:27,  4.40s/it]

58


Epoch 1/1:   2%|▏         | 58/3186 [03:13<3:30:25,  4.04s/it]

59


Epoch 1/1:   2%|▏         | 59/3186 [03:15<2:59:38,  3.45s/it]

60


Epoch 1/1:   2%|▏         | 60/3186 [03:18<2:52:25,  3.31s/it]

61


Epoch 1/1:   2%|▏         | 61/3186 [03:20<2:35:12,  2.98s/it]

62


Epoch 1/1:   2%|▏         | 62/3186 [03:24<2:37:53,  3.03s/it]

63


Epoch 1/1:   2%|▏         | 63/3186 [03:27<2:47:40,  3.22s/it]

64


Epoch 1/1:   2%|▏         | 64/3186 [03:32<3:11:09,  3.67s/it]

65


Epoch 1/1:   2%|▏         | 65/3186 [03:36<3:16:53,  3.79s/it]

66


Epoch 1/1:   2%|▏         | 66/3186 [03:39<3:04:10,  3.54s/it]

67


Epoch 1/1:   2%|▏         | 67/3186 [03:41<2:46:20,  3.20s/it]

68


Epoch 1/1:   2%|▏         | 68/3186 [03:43<2:21:49,  2.73s/it]

69


Epoch 1/1:   2%|▏         | 69/3186 [03:46<2:30:15,  2.89s/it]

70


Epoch 1/1:   2%|▏         | 70/3186 [03:49<2:32:18,  2.93s/it]

71


Epoch 1/1:   2%|▏         | 71/3186 [03:52<2:24:11,  2.78s/it]

72


Epoch 1/1:   2%|▏         | 72/3186 [03:55<2:26:10,  2.82s/it]

73


Epoch 1/1:   2%|▏         | 73/3186 [03:58<2:27:52,  2.85s/it]

74


Epoch 1/1:   2%|▏         | 74/3186 [04:01<2:43:11,  3.15s/it]

75


Epoch 1/1:   2%|▏         | 75/3186 [04:04<2:28:58,  2.87s/it]

76


Epoch 1/1:   2%|▏         | 76/3186 [04:11<3:34:44,  4.14s/it]

77


Epoch 1/1:   2%|▏         | 77/3186 [04:15<3:29:08,  4.04s/it]

78


Epoch 1/1:   2%|▏         | 78/3186 [04:17<3:09:52,  3.67s/it]

79


Epoch 1/1:   2%|▏         | 79/3186 [04:18<2:30:32,  2.91s/it]

80


Epoch 1/1:   3%|▎         | 80/3186 [04:21<2:32:10,  2.94s/it]

81


Epoch 1/1:   3%|▎         | 81/3186 [04:25<2:35:51,  3.01s/it]

82


Epoch 1/1:   3%|▎         | 82/3186 [04:27<2:24:08,  2.79s/it]

83


Epoch 1/1:   3%|▎         | 83/3186 [04:30<2:30:32,  2.91s/it]

84


Epoch 1/1:   3%|▎         | 84/3186 [04:34<2:45:51,  3.21s/it]

85


Epoch 1/1:   3%|▎         | 85/3186 [04:37<2:47:52,  3.25s/it]

86


Epoch 1/1:   3%|▎         | 86/3186 [04:40<2:33:14,  2.97s/it]

87


Epoch 1/1:   3%|▎         | 87/3186 [04:43<2:37:17,  3.05s/it]

88


Epoch 1/1:   3%|▎         | 88/3186 [04:45<2:28:00,  2.87s/it]

89


Epoch 1/1:   3%|▎         | 89/3186 [04:47<2:10:59,  2.54s/it]

90


Epoch 1/1:   3%|▎         | 90/3186 [04:50<2:10:42,  2.53s/it]

91


Epoch 1/1:   3%|▎         | 91/3186 [04:53<2:21:06,  2.74s/it]

92


Epoch 1/1:   3%|▎         | 92/3186 [04:55<2:08:59,  2.50s/it]

93


Epoch 1/1:   3%|▎         | 93/3186 [05:01<3:02:56,  3.55s/it]

94


Epoch 1/1:   3%|▎         | 94/3186 [05:03<2:46:02,  3.22s/it]

95


Epoch 1/1:   3%|▎         | 95/3186 [05:05<2:27:22,  2.86s/it]

96


Epoch 1/1:   3%|▎         | 96/3186 [05:08<2:23:07,  2.78s/it]

97


Epoch 1/1:   3%|▎         | 97/3186 [05:11<2:36:18,  3.04s/it]

98


Epoch 1/1:   3%|▎         | 98/3186 [05:15<2:46:11,  3.23s/it]

99


Epoch 1/1:   3%|▎         | 99/3186 [05:17<2:21:12,  2.74s/it]

100


Epoch 1/1:   3%|▎         | 100/3186 [05:19<2:13:58,  2.60s/it]

101
Only give the best option.
(A) Put down the towel.


Epoch 1/1:   3%|▎         | 101/3186 [05:22<2:17:17,  2.67s/it]

102


Epoch 1/1:   3%|▎         | 102/3186 [05:25<2:18:01,  2.69s/it]

103


Epoch 1/1:   3%|▎         | 103/3186 [05:27<2:20:22,  2.73s/it]

104


Epoch 1/1:   3%|▎         | 104/3186 [05:30<2:15:14,  2.63s/it]

105


Epoch 1/1:   3%|▎         | 105/3186 [05:32<2:09:01,  2.51s/it]

106


Epoch 1/1:   3%|▎         | 106/3186 [05:34<2:00:48,  2.35s/it]

107


Epoch 1/1:   3%|▎         | 107/3186 [05:38<2:19:39,  2.72s/it]

108


Epoch 1/1:   3%|▎         | 108/3186 [05:43<2:55:50,  3.43s/it]

109


Epoch 1/1:   3%|▎         | 109/3186 [05:45<2:42:51,  3.18s/it]

110


Epoch 1/1:   3%|▎         | 110/3186 [05:48<2:35:39,  3.04s/it]

111


Epoch 1/1:   3%|▎         | 111/3186 [05:50<2:22:34,  2.78s/it]

112


Epoch 1/1:   4%|▎         | 112/3186 [05:53<2:19:52,  2.73s/it]

113


Epoch 1/1:   4%|▎         | 113/3186 [05:56<2:29:25,  2.92s/it]

114


Epoch 1/1:   4%|▎         | 114/3186 [06:00<2:43:16,  3.19s/it]

115


Epoch 1/1:   4%|▎         | 115/3186 [06:02<2:22:34,  2.79s/it]

116


Epoch 1/1:   4%|▎         | 116/3186 [06:05<2:33:17,  3.00s/it]

117


Epoch 1/1:   4%|▎         | 117/3186 [06:09<2:47:54,  3.28s/it]

118


Epoch 1/1:   4%|▎         | 118/3186 [06:12<2:36:37,  3.06s/it]

119


Epoch 1/1:   4%|▎         | 119/3186 [06:15<2:42:52,  3.19s/it]

120


Epoch 1/1:   4%|▍         | 120/3186 [06:19<2:57:56,  3.48s/it]

121


Epoch 1/1:   4%|▍         | 121/3186 [06:22<2:51:01,  3.35s/it]

122


Epoch 1/1:   4%|▍         | 122/3186 [06:25<2:44:40,  3.22s/it]

123


Epoch 1/1:   4%|▍         | 123/3186 [06:27<2:19:05,  2.72s/it]

124


Epoch 1/1:   4%|▍         | 124/3186 [06:30<2:19:25,  2.73s/it]

125


Epoch 1/1:   4%|▍         | 125/3186 [06:32<2:18:48,  2.72s/it]

126


Epoch 1/1:   4%|▍         | 126/3186 [06:37<2:40:18,  3.14s/it]

127


Epoch 1/1:   4%|▍         | 127/3186 [06:40<2:45:16,  3.24s/it]

128


Epoch 1/1:   4%|▍         | 128/3186 [06:42<2:33:06,  3.00s/it]

129


Epoch 1/1:   4%|▍         | 129/3186 [06:45<2:23:56,  2.83s/it]

130


Epoch 1/1:   4%|▍         | 130/3186 [06:47<2:17:26,  2.70s/it]

131


Epoch 1/1:   4%|▍         | 131/3186 [06:49<2:00:09,  2.36s/it]

132


Epoch 1/1:   4%|▍         | 132/3186 [06:53<2:23:41,  2.82s/it]

133


Epoch 1/1:   4%|▍         | 133/3186 [06:56<2:24:01,  2.83s/it]

134


Epoch 1/1:   4%|▍         | 134/3186 [06:58<2:18:17,  2.72s/it]

135


Epoch 1/1:   4%|▍         | 135/3186 [07:00<2:02:16,  2.40s/it]

136


Epoch 1/1:   4%|▍         | 136/3186 [07:02<1:55:56,  2.28s/it]

137


Epoch 1/1:   4%|▍         | 137/3186 [07:06<2:28:18,  2.92s/it]

138


Epoch 1/1:   4%|▍         | 138/3186 [07:09<2:19:44,  2.75s/it]

139


Epoch 1/1:   4%|▍         | 139/3186 [07:10<2:05:06,  2.46s/it]

140


Epoch 1/1:   4%|▍         | 140/3186 [07:12<1:55:56,  2.28s/it]

141


Epoch 1/1:   4%|▍         | 141/3186 [07:15<2:00:04,  2.37s/it]

142


Epoch 1/1:   4%|▍         | 142/3186 [07:17<1:56:48,  2.30s/it]

143


Epoch 1/1:   4%|▍         | 143/3186 [07:19<2:01:27,  2.39s/it]

144


Epoch 1/1:   5%|▍         | 144/3186 [07:23<2:21:38,  2.79s/it]

145


Epoch 1/1:   5%|▍         | 145/3186 [07:26<2:17:51,  2.72s/it]

146


Epoch 1/1:   5%|▍         | 146/3186 [07:31<2:54:31,  3.44s/it]

147


Epoch 1/1:   5%|▍         | 147/3186 [07:34<2:50:45,  3.37s/it]

148


Epoch 1/1:   5%|▍         | 148/3186 [07:36<2:34:07,  3.04s/it]

149


Epoch 1/1:   5%|▍         | 149/3186 [07:40<2:41:40,  3.19s/it]

150


Epoch 1/1:   5%|▍         | 150/3186 [07:43<2:44:15,  3.25s/it]

151


Epoch 1/1:   5%|▍         | 151/3186 [07:45<2:25:01,  2.87s/it]

152


Epoch 1/1:   5%|▍         | 152/3186 [07:47<2:07:27,  2.52s/it]

153


Epoch 1/1:   5%|▍         | 153/3186 [07:50<2:09:10,  2.56s/it]

154


Epoch 1/1:   5%|▍         | 154/3186 [07:52<2:12:14,  2.62s/it]

155


Epoch 1/1:   5%|▍         | 155/3186 [07:55<2:05:51,  2.49s/it]

156


Epoch 1/1:   5%|▍         | 156/3186 [07:57<2:11:13,  2.60s/it]

157


Epoch 1/1:   5%|▍         | 157/3186 [08:02<2:34:15,  3.06s/it]

158


Epoch 1/1:   5%|▍         | 158/3186 [08:03<2:10:59,  2.60s/it]

159


Epoch 1/1:   5%|▍         | 159/3186 [08:07<2:33:03,  3.03s/it]

160


Epoch 1/1:   5%|▌         | 160/3186 [08:11<2:40:05,  3.17s/it]

161


Epoch 1/1:   5%|▌         | 161/3186 [08:14<2:44:12,  3.26s/it]

162


Epoch 1/1:   5%|▌         | 162/3186 [08:17<2:35:47,  3.09s/it]

163


Epoch 1/1:   5%|▌         | 163/3186 [08:19<2:28:42,  2.95s/it]

164


Epoch 1/1:   5%|▌         | 164/3186 [08:21<2:11:12,  2.61s/it]

165


Epoch 1/1:   5%|▌         | 165/3186 [08:25<2:32:00,  3.02s/it]

166


Epoch 1/1:   5%|▌         | 166/3186 [08:29<2:48:39,  3.35s/it]

167


Epoch 1/1:   5%|▌         | 167/3186 [08:33<2:48:32,  3.35s/it]

168


Epoch 1/1:   5%|▌         | 168/3186 [08:35<2:31:12,  3.01s/it]

169


Epoch 1/1:   5%|▌         | 169/3186 [08:39<2:52:17,  3.43s/it]

170


Epoch 1/1:   5%|▌         | 170/3186 [08:42<2:39:15,  3.17s/it]

171


Epoch 1/1:   5%|▌         | 171/3186 [08:46<2:47:49,  3.34s/it]

172


Epoch 1/1:   5%|▌         | 172/3186 [08:48<2:33:44,  3.06s/it]

173


Epoch 1/1:   5%|▌         | 173/3186 [08:50<2:21:02,  2.81s/it]

174


Epoch 1/1:   5%|▌         | 174/3186 [08:53<2:27:08,  2.93s/it]

175


Epoch 1/1:   5%|▌         | 175/3186 [08:56<2:22:44,  2.84s/it]

176


Epoch 1/1:   6%|▌         | 176/3186 [08:57<1:58:33,  2.36s/it]

177


Epoch 1/1:   6%|▌         | 177/3186 [09:01<2:14:00,  2.67s/it]

178


Epoch 1/1:   6%|▌         | 178/3186 [09:03<2:08:40,  2.57s/it]

179


Epoch 1/1:   6%|▌         | 179/3186 [09:06<2:09:55,  2.59s/it]

180


Epoch 1/1:   6%|▌         | 180/3186 [09:09<2:16:18,  2.72s/it]

181


Epoch 1/1:   6%|▌         | 181/3186 [09:12<2:23:13,  2.86s/it]

182


Epoch 1/1:   6%|▌         | 182/3186 [09:14<2:13:03,  2.66s/it]

183


Epoch 1/1:   6%|▌         | 183/3186 [09:18<2:28:28,  2.97s/it]

184


Epoch 1/1:   6%|▌         | 184/3186 [09:21<2:25:50,  2.91s/it]

185


Epoch 1/1:   6%|▌         | 185/3186 [09:24<2:27:26,  2.95s/it]

186


Epoch 1/1:   6%|▌         | 186/3186 [09:25<2:11:57,  2.64s/it]

187


Epoch 1/1:   6%|▌         | 187/3186 [09:28<2:11:16,  2.63s/it]

188


Epoch 1/1:   6%|▌         | 188/3186 [09:31<2:22:37,  2.85s/it]

189


Epoch 1/1:   6%|▌         | 189/3186 [09:35<2:27:05,  2.94s/it]

190


Epoch 1/1:   6%|▌         | 190/3186 [09:37<2:13:34,  2.67s/it]

191


Epoch 1/1:   6%|▌         | 191/3186 [09:39<2:01:48,  2.44s/it]

192


Epoch 1/1:   6%|▌         | 192/3186 [09:42<2:12:52,  2.66s/it]

193


Epoch 1/1:   6%|▌         | 193/3186 [09:45<2:16:13,  2.73s/it]

194


Epoch 1/1:   6%|▌         | 194/3186 [09:48<2:23:44,  2.88s/it]

195


Epoch 1/1:   6%|▌         | 195/3186 [09:51<2:25:09,  2.91s/it]

196


Epoch 1/1:   6%|▌         | 196/3186 [09:54<2:30:43,  3.02s/it]

197


Epoch 1/1:   6%|▌         | 197/3186 [09:56<2:08:34,  2.58s/it]

198


Epoch 1/1:   6%|▌         | 198/3186 [10:01<2:55:03,  3.52s/it]

199


Epoch 1/1:   6%|▌         | 199/3186 [10:03<2:33:03,  3.07s/it]

200


Epoch 1/1:   6%|▋         | 200/3186 [10:06<2:25:50,  2.93s/it]

201
The boy will take the broom next to him to clean the room.
(D) Put down the paper/notebook.


Epoch 1/1:   6%|▋         | 201/3186 [10:08<2:16:31,  2.74s/it]

202


Epoch 1/1:   6%|▋         | 202/3186 [10:10<2:06:50,  2.55s/it]

203


Epoch 1/1:   6%|▋         | 203/3186 [10:13<2:08:22,  2.58s/it]

204


Epoch 1/1:   6%|▋         | 204/3186 [10:16<2:07:46,  2.57s/it]

205


Epoch 1/1:   6%|▋         | 205/3186 [10:18<2:07:36,  2.57s/it]

206


Epoch 1/1:   6%|▋         | 206/3186 [10:22<2:19:16,  2.80s/it]

207


Epoch 1/1:   6%|▋         | 207/3186 [10:24<2:12:35,  2.67s/it]

208


Epoch 1/1:   7%|▋         | 208/3186 [10:26<2:07:02,  2.56s/it]

209


Epoch 1/1:   7%|▋         | 209/3186 [10:28<1:57:02,  2.36s/it]

210


Epoch 1/1:   7%|▋         | 210/3186 [10:31<2:06:12,  2.54s/it]

211


Epoch 1/1:   7%|▋         | 211/3186 [10:34<2:09:55,  2.62s/it]

212


Epoch 1/1:   7%|▋         | 212/3186 [10:37<2:11:44,  2.66s/it]

213


Epoch 1/1:   7%|▋         | 213/3186 [10:39<2:10:40,  2.64s/it]

214


Epoch 1/1:   7%|▋         | 214/3186 [10:42<2:10:33,  2.64s/it]

215


Epoch 1/1:   7%|▋         | 215/3186 [10:44<2:05:33,  2.54s/it]

216


Epoch 1/1:   7%|▋         | 216/3186 [10:48<2:22:57,  2.89s/it]

217


Epoch 1/1:   7%|▋         | 217/3186 [10:50<2:11:29,  2.66s/it]

218


Epoch 1/1:   7%|▋         | 218/3186 [10:53<2:14:58,  2.73s/it]

219


Epoch 1/1:   7%|▋         | 219/3186 [10:58<2:44:46,  3.33s/it]

220


Epoch 1/1:   7%|▋         | 220/3186 [11:01<2:46:53,  3.38s/it]

221


Epoch 1/1:   7%|▋         | 221/3186 [11:06<3:10:47,  3.86s/it]

222


Epoch 1/1:   7%|▋         | 222/3186 [11:09<2:57:12,  3.59s/it]

223


Epoch 1/1:   7%|▋         | 223/3186 [11:12<2:42:32,  3.29s/it]

224


Epoch 1/1:   7%|▋         | 224/3186 [11:15<2:49:03,  3.42s/it]

225


Epoch 1/1:   7%|▋         | 225/3186 [11:18<2:39:37,  3.23s/it]

226


Epoch 1/1:   7%|▋         | 226/3186 [11:22<2:44:24,  3.33s/it]

227


Epoch 1/1:   7%|▋         | 227/3186 [11:25<2:46:28,  3.38s/it]

228


Epoch 1/1:   7%|▋         | 228/3186 [11:28<2:31:10,  3.07s/it]

229


Epoch 1/1:   7%|▋         | 229/3186 [11:30<2:24:22,  2.93s/it]

230


Epoch 1/1:   7%|▋         | 230/3186 [11:32<2:07:16,  2.58s/it]

231


Epoch 1/1:   7%|▋         | 231/3186 [11:35<2:15:19,  2.75s/it]

232


Epoch 1/1:   7%|▋         | 232/3186 [11:39<2:35:57,  3.17s/it]

233


Epoch 1/1:   7%|▋         | 233/3186 [11:43<2:38:20,  3.22s/it]

234


Epoch 1/1:   7%|▋         | 234/3186 [11:45<2:26:58,  2.99s/it]

235


Epoch 1/1:   7%|▋         | 235/3186 [11:47<2:13:34,  2.72s/it]

236


Epoch 1/1:   7%|▋         | 236/3186 [11:49<1:59:48,  2.44s/it]

237


Epoch 1/1:   7%|▋         | 237/3186 [11:53<2:30:01,  3.05s/it]

238


Epoch 1/1:   7%|▋         | 238/3186 [11:55<2:13:52,  2.72s/it]

239


Epoch 1/1:   8%|▊         | 239/3186 [11:58<2:13:16,  2.71s/it]

240


Epoch 1/1:   8%|▊         | 240/3186 [12:00<2:03:06,  2.51s/it]

241


Epoch 1/1:   8%|▊         | 241/3186 [12:04<2:21:00,  2.87s/it]

242


Epoch 1/1:   8%|▊         | 242/3186 [12:05<2:02:49,  2.50s/it]

243


Epoch 1/1:   8%|▊         | 243/3186 [12:08<2:10:49,  2.67s/it]

244


Epoch 1/1:   8%|▊         | 244/3186 [12:11<2:13:01,  2.71s/it]

245


Epoch 1/1:   8%|▊         | 245/3186 [12:15<2:34:59,  3.16s/it]

246


Epoch 1/1:   8%|▊         | 246/3186 [12:18<2:23:27,  2.93s/it]

247


Epoch 1/1:   8%|▊         | 247/3186 [12:21<2:27:58,  3.02s/it]

248


Epoch 1/1:   8%|▊         | 248/3186 [12:25<2:44:25,  3.36s/it]

249


Epoch 1/1:   8%|▊         | 249/3186 [12:28<2:36:23,  3.20s/it]

250


Epoch 1/1:   8%|▊         | 250/3186 [12:31<2:35:02,  3.17s/it]

251


Epoch 1/1:   8%|▊         | 251/3186 [12:34<2:29:41,  3.06s/it]

252


Epoch 1/1:   8%|▊         | 252/3186 [12:37<2:25:48,  2.98s/it]

253


Epoch 1/1:   8%|▊         | 253/3186 [12:39<2:14:02,  2.74s/it]

254


Epoch 1/1:   8%|▊         | 254/3186 [12:42<2:16:08,  2.79s/it]

255


Epoch 1/1:   8%|▊         | 255/3186 [12:44<2:03:02,  2.52s/it]

256


Epoch 1/1:   8%|▊         | 256/3186 [12:46<1:59:36,  2.45s/it]

257


Epoch 1/1:   8%|▊         | 257/3186 [12:49<2:03:52,  2.54s/it]

258


Epoch 1/1:   8%|▊         | 258/3186 [12:51<1:59:50,  2.46s/it]

259


Epoch 1/1:   8%|▊         | 259/3186 [12:54<2:00:42,  2.47s/it]

260


Epoch 1/1:   8%|▊         | 260/3186 [12:55<1:41:57,  2.09s/it]

261


Epoch 1/1:   8%|▊         | 261/3186 [12:58<2:04:01,  2.54s/it]

262


Epoch 1/1:   8%|▊         | 262/3186 [13:00<1:49:34,  2.25s/it]

263


Epoch 1/1:   8%|▊         | 263/3186 [13:03<1:57:40,  2.42s/it]

264


Epoch 1/1:   8%|▊         | 264/3186 [13:05<1:54:54,  2.36s/it]

265


Epoch 1/1:   8%|▊         | 265/3186 [13:09<2:12:57,  2.73s/it]

266


Epoch 1/1:   8%|▊         | 266/3186 [13:11<2:07:48,  2.63s/it]

267


Epoch 1/1:   8%|▊         | 267/3186 [13:13<2:02:06,  2.51s/it]

268


Epoch 1/1:   8%|▊         | 268/3186 [13:16<2:02:37,  2.52s/it]

269


Epoch 1/1:   8%|▊         | 269/3186 [13:18<2:05:07,  2.57s/it]

270


Epoch 1/1:   8%|▊         | 270/3186 [13:22<2:15:41,  2.79s/it]

271


Epoch 1/1:   9%|▊         | 271/3186 [13:24<2:04:18,  2.56s/it]

272


Epoch 1/1:   9%|▊         | 272/3186 [13:26<2:05:03,  2.57s/it]

273


Epoch 1/1:   9%|▊         | 273/3186 [13:29<2:10:54,  2.70s/it]

274


Epoch 1/1:   9%|▊         | 274/3186 [13:33<2:26:12,  3.01s/it]

275


Epoch 1/1:   9%|▊         | 275/3186 [13:38<2:54:31,  3.60s/it]

276


Epoch 1/1:   9%|▊         | 276/3186 [13:41<2:41:07,  3.32s/it]

277


Epoch 1/1:   9%|▊         | 277/3186 [13:43<2:32:49,  3.15s/it]

278


Epoch 1/1:   9%|▊         | 278/3186 [13:49<3:12:05,  3.96s/it]

279


Epoch 1/1:   9%|▉         | 279/3186 [13:52<2:54:23,  3.60s/it]

280


Epoch 1/1:   9%|▉         | 280/3186 [13:54<2:36:58,  3.24s/it]

281


Epoch 1/1:   9%|▉         | 281/3186 [13:57<2:22:51,  2.95s/it]

282


Epoch 1/1:   9%|▉         | 282/3186 [14:00<2:32:17,  3.15s/it]

283


Epoch 1/1:   9%|▉         | 283/3186 [14:03<2:18:50,  2.87s/it]

284


Epoch 1/1:   9%|▉         | 284/3186 [14:05<2:09:47,  2.68s/it]

285


Epoch 1/1:   9%|▉         | 285/3186 [14:07<2:06:04,  2.61s/it]

286


Epoch 1/1:   9%|▉         | 286/3186 [14:10<2:01:10,  2.51s/it]

287


Epoch 1/1:   9%|▉         | 287/3186 [14:12<1:55:13,  2.38s/it]

288


Epoch 1/1:   9%|▉         | 288/3186 [14:14<1:48:33,  2.25s/it]

289


Epoch 1/1:   9%|▉         | 289/3186 [14:16<1:56:59,  2.42s/it]

290


Epoch 1/1:   9%|▉         | 290/3186 [14:19<1:58:37,  2.46s/it]

291


Epoch 1/1:   9%|▉         | 291/3186 [14:21<1:58:25,  2.45s/it]

292


Epoch 1/1:   9%|▉         | 292/3186 [14:24<1:58:43,  2.46s/it]

293


Epoch 1/1:   9%|▉         | 293/3186 [14:27<2:07:57,  2.65s/it]

294


Epoch 1/1:   9%|▉         | 294/3186 [14:29<1:55:06,  2.39s/it]

295


Epoch 1/1:   9%|▉         | 295/3186 [14:31<1:53:54,  2.36s/it]

296


Epoch 1/1:   9%|▉         | 296/3186 [14:33<1:54:13,  2.37s/it]

297


Epoch 1/1:   9%|▉         | 297/3186 [14:35<1:49:32,  2.27s/it]

298


Epoch 1/1:   9%|▉         | 298/3186 [14:37<1:45:03,  2.18s/it]

299


Epoch 1/1:   9%|▉         | 299/3186 [14:40<1:48:24,  2.25s/it]

300


Epoch 1/1:   9%|▉         | 300/3186 [14:43<2:05:56,  2.62s/it]

301
The person throws the clothes on the floor and leaves the room next to the television.
(C) Throw the clothes.


Epoch 1/1:   9%|▉         | 301/3186 [14:46<2:04:36,  2.59s/it]

302


Epoch 1/1:   9%|▉         | 302/3186 [14:49<2:15:46,  2.82s/it]

303


Epoch 1/1:  10%|▉         | 303/3186 [14:51<2:05:18,  2.61s/it]

304


Epoch 1/1:  10%|▉         | 304/3186 [14:55<2:15:20,  2.82s/it]

305


Epoch 1/1:  10%|▉         | 305/3186 [14:57<2:11:15,  2.73s/it]

306


Epoch 1/1:  10%|▉         | 306/3186 [14:59<1:54:13,  2.38s/it]

307


Epoch 1/1:  10%|▉         | 307/3186 [15:01<1:48:41,  2.27s/it]

308


Epoch 1/1:  10%|▉         | 308/3186 [15:03<1:49:28,  2.28s/it]

309


Epoch 1/1:  10%|▉         | 309/3186 [15:04<1:33:55,  1.96s/it]

310


Epoch 1/1:  10%|▉         | 310/3186 [15:07<1:45:09,  2.19s/it]

311


Epoch 1/1:  10%|▉         | 311/3186 [15:10<1:51:34,  2.33s/it]

312


Epoch 1/1:  10%|▉         | 312/3186 [15:12<1:50:03,  2.30s/it]

313


Epoch 1/1:  10%|▉         | 313/3186 [15:15<1:55:27,  2.41s/it]

314


Epoch 1/1:  10%|▉         | 314/3186 [15:17<1:52:16,  2.35s/it]

315


Epoch 1/1:  10%|▉         | 315/3186 [15:19<1:46:44,  2.23s/it]

316


Epoch 1/1:  10%|▉         | 316/3186 [15:21<1:54:42,  2.40s/it]

317


Epoch 1/1:  10%|▉         | 317/3186 [15:24<1:54:52,  2.40s/it]

318


Epoch 1/1:  10%|▉         | 318/3186 [15:27<2:09:14,  2.70s/it]

319


Epoch 1/1:  10%|█         | 319/3186 [15:30<2:04:37,  2.61s/it]

320


Epoch 1/1:  65%|██████▍   | 2056/3186 [1:46:18<15:40,  1.20it/s]

2057


Epoch 1/1:  65%|██████▍   | 2057/3186 [1:46:19<15:22,  1.22it/s]

2058


Epoch 1/1:  65%|██████▍   | 2058/3186 [1:46:20<13:27,  1.40it/s]

2059


Epoch 1/1:  65%|██████▍   | 2059/3186 [1:46:21<16:25,  1.14it/s]

2060


Epoch 1/1:  65%|██████▍   | 2060/3186 [1:46:22<15:45,  1.19it/s]

2061


Epoch 1/1:  65%|██████▍   | 2061/3186 [1:46:23<19:00,  1.01s/it]

2062


Epoch 1/1:  65%|██████▍   | 2062/3186 [1:46:25<21:53,  1.17s/it]

2063


Epoch 1/1:  65%|██████▍   | 2063/3186 [1:46:25<18:11,  1.03it/s]

2064


Epoch 1/1:  65%|██████▍   | 2064/3186 [1:46:26<16:39,  1.12it/s]

2065


Epoch 1/1:  65%|██████▍   | 2065/3186 [1:46:27<15:54,  1.17it/s]

2066


Epoch 1/1:  65%|██████▍   | 2066/3186 [1:46:27<14:16,  1.31it/s]

2067


Epoch 1/1:  65%|██████▍   | 2067/3186 [1:46:29<18:50,  1.01s/it]

2068


Epoch 1/1:  65%|██████▍   | 2068/3186 [1:46:29<16:37,  1.12it/s]

2069


Epoch 1/1:  65%|██████▍   | 2069/3186 [1:46:31<17:34,  1.06it/s]

2070


Epoch 1/1:  65%|██████▍   | 2070/3186 [1:46:31<16:22,  1.14it/s]

2071


Epoch 1/1:  65%|██████▌   | 2071/3186 [1:46:32<16:40,  1.11it/s]

2072


Epoch 1/1:  65%|██████▌   | 2072/3186 [1:46:33<14:48,  1.25it/s]

2073


Epoch 1/1:  65%|██████▌   | 2073/3186 [1:46:34<17:11,  1.08it/s]

2074


Epoch 1/1:  65%|██████▌   | 2074/3186 [1:46:35<14:59,  1.24it/s]

2075


Epoch 1/1:  65%|██████▌   | 2075/3186 [1:46:35<15:46,  1.17it/s]

2076


Epoch 1/1:  65%|██████▌   | 2076/3186 [1:46:36<13:59,  1.32it/s]

2077


Epoch 1/1:  65%|██████▌   | 2077/3186 [1:46:37<12:48,  1.44it/s]

2078


Epoch 1/1:  65%|██████▌   | 2078/3186 [1:46:37<13:23,  1.38it/s]

2079


Epoch 1/1:  65%|██████▌   | 2079/3186 [1:46:38<14:52,  1.24it/s]

2080


Epoch 1/1:  65%|██████▌   | 2080/3186 [1:46:39<13:44,  1.34it/s]

2081


Epoch 1/1:  65%|██████▌   | 2081/3186 [1:46:39<12:24,  1.48it/s]

2082


Epoch 1/1:  65%|██████▌   | 2082/3186 [1:46:41<18:51,  1.02s/it]

2083


Epoch 1/1:  65%|██████▌   | 2083/3186 [1:46:42<17:02,  1.08it/s]

2084


Epoch 1/1:  65%|██████▌   | 2084/3186 [1:46:43<19:53,  1.08s/it]

2085


Epoch 1/1:  65%|██████▌   | 2085/3186 [1:46:44<16:45,  1.09it/s]

2086


Epoch 1/1:  65%|██████▌   | 2086/3186 [1:46:45<19:37,  1.07s/it]

2087


Epoch 1/1:  66%|██████▌   | 2087/3186 [1:46:46<17:17,  1.06it/s]

2088


Epoch 1/1:  66%|██████▌   | 2088/3186 [1:46:47<15:46,  1.16it/s]

2089


Epoch 1/1:  66%|██████▌   | 2089/3186 [1:46:47<14:12,  1.29it/s]

2090


Epoch 1/1:  66%|██████▌   | 2090/3186 [1:46:48<15:16,  1.20it/s]

2091


Epoch 1/1:  66%|██████▌   | 2091/3186 [1:46:52<31:56,  1.75s/it]

2092


Epoch 1/1:  66%|██████▌   | 2092/3186 [1:46:53<26:55,  1.48s/it]

2093


Epoch 1/1:  66%|██████▌   | 2093/3186 [1:46:54<21:45,  1.19s/it]

2094


Epoch 1/1:  66%|██████▌   | 2094/3186 [1:46:54<17:47,  1.02it/s]

2095


Epoch 1/1:  66%|██████▌   | 2095/3186 [1:46:56<22:28,  1.24s/it]

2096


Epoch 1/1:  66%|██████▌   | 2096/3186 [1:46:56<18:45,  1.03s/it]

2097


Epoch 1/1:  66%|██████▌   | 2097/3186 [1:46:57<16:11,  1.12it/s]

2098


Epoch 1/1:  69%|██████▉   | 2194/3186 [1:48:21<17:06,  1.03s/it]

2195


Epoch 1/1:  69%|██████▉   | 2195/3186 [1:48:22<15:34,  1.06it/s]

2196


Epoch 1/1:  69%|██████▉   | 2196/3186 [1:48:23<14:47,  1.12it/s]

2197


Epoch 1/1:  69%|██████▉   | 2197/3186 [1:48:24<15:42,  1.05it/s]

2198


Epoch 1/1:  69%|██████▉   | 2198/3186 [1:48:24<14:04,  1.17it/s]

2199


Epoch 1/1:  69%|██████▉   | 2199/3186 [1:48:25<14:03,  1.17it/s]

2200


Epoch 1/1:  69%|██████▉   | 2200/3186 [1:48:26<12:48,  1.28it/s]

2201
What material is the stationary cylinder when the video ends?
(B) metal


Epoch 1/1:  69%|██████▉   | 2201/3186 [1:48:27<12:50,  1.28it/s]

2202


Epoch 1/1:  69%|██████▉   | 2202/3186 [1:48:28<13:32,  1.21it/s]

2203


Epoch 1/1:  69%|██████▉   | 2203/3186 [1:48:28<12:17,  1.33it/s]

2204


Epoch 1/1:  69%|██████▉   | 2204/3186 [1:48:29<11:42,  1.40it/s]

2205


Epoch 1/1:  69%|██████▉   | 2205/3186 [1:48:29<10:46,  1.52it/s]

2206


Epoch 1/1:  69%|██████▉   | 2206/3186 [1:48:31<14:08,  1.15it/s]

2207


Epoch 1/1:  69%|██████▉   | 2207/3186 [1:48:32<15:02,  1.08it/s]

2208


Epoch 1/1:  69%|██████▉   | 2208/3186 [1:48:32<12:57,  1.26it/s]

2209


Epoch 1/1:  69%|██████▉   | 2209/3186 [1:48:34<16:27,  1.01s/it]

2210


Epoch 1/1:  69%|██████▉   | 2210/3186 [1:48:35<16:37,  1.02s/it]

2211


Epoch 1/1:  69%|██████▉   | 2211/3186 [1:48:35<14:53,  1.09it/s]

2212


Epoch 1/1:  69%|██████▉   | 2212/3186 [1:48:37<17:30,  1.08s/it]

2213


Epoch 1/1:  69%|██████▉   | 2213/3186 [1:48:37<15:18,  1.06it/s]

2214


Epoch 1/1:  69%|██████▉   | 2214/3186 [1:48:39<17:31,  1.08s/it]

2215


Epoch 1/1:  70%|██████▉   | 2215/3186 [1:48:40<16:04,  1.01it/s]

2216


Epoch 1/1:  70%|██████▉   | 2216/3186 [1:48:41<18:54,  1.17s/it]

2217


Epoch 1/1:  70%|██████▉   | 2217/3186 [1:48:42<18:28,  1.14s/it]

2218


Epoch 1/1:  70%|██████▉   | 2218/3186 [1:48:43<16:39,  1.03s/it]

2219


Epoch 1/1:  70%|██████▉   | 2219/3186 [1:48:44<14:17,  1.13it/s]

2220


Epoch 1/1:  70%|██████▉   | 2220/3186 [1:48:45<16:37,  1.03s/it]

2221


Epoch 1/1:  70%|██████▉   | 2221/3186 [1:48:46<16:49,  1.05s/it]

2222


Epoch 1/1:  70%|██████▉   | 2222/3186 [1:48:48<19:02,  1.19s/it]

2223


Epoch 1/1:  70%|██████▉   | 2223/3186 [1:48:48<15:57,  1.01it/s]

2224


Epoch 1/1:  70%|██████▉   | 2224/3186 [1:48:49<14:01,  1.14it/s]

2225


Epoch 1/1:  70%|██████▉   | 2225/3186 [1:48:50<14:18,  1.12it/s]

2226


Epoch 1/1:  70%|██████▉   | 2226/3186 [1:48:51<14:19,  1.12it/s]

2227


Epoch 1/1:  70%|██████▉   | 2227/3186 [1:48:51<12:47,  1.25it/s]

2228


Epoch 1/1:  70%|██████▉   | 2228/3186 [1:48:52<12:47,  1.25it/s]

2229


Epoch 1/1:  70%|██████▉   | 2229/3186 [1:48:52<10:59,  1.45it/s]

2230


Epoch 1/1:  70%|██████▉   | 2230/3186 [1:48:54<14:34,  1.09it/s]

2231


Epoch 1/1:  70%|███████   | 2231/3186 [1:48:55<14:45,  1.08it/s]

2232


Epoch 1/1:  70%|███████   | 2232/3186 [1:48:56<13:53,  1.14it/s]

2233


Epoch 1/1:  70%|███████   | 2233/3186 [1:48:56<12:35,  1.26it/s]

2234


Epoch 1/1:  70%|███████   | 2234/3186 [1:48:57<13:27,  1.18it/s]

2235


Epoch 1/1:  70%|███████   | 2235/3186 [1:48:58<13:51,  1.14it/s]

2236


Epoch 1/1:  70%|███████   | 2236/3186 [1:48:59<13:48,  1.15it/s]

2237


Epoch 1/1:  70%|███████   | 2237/3186 [1:49:00<12:49,  1.23it/s]

2238


Epoch 1/1:  70%|███████   | 2238/3186 [1:49:01<13:37,  1.16it/s]

2239


Epoch 1/1:  70%|███████   | 2239/3186 [1:49:02<14:09,  1.12it/s]

2240


Epoch 1/1:  70%|███████   | 2240/3186 [1:49:02<13:05,  1.20it/s]

2241


Epoch 1/1:  70%|███████   | 2241/3186 [1:49:03<13:27,  1.17it/s]

2242


Epoch 1/1:  70%|███████   | 2242/3186 [1:49:05<16:55,  1.08s/it]

2243


Epoch 1/1:  70%|███████   | 2243/3186 [1:49:05<14:16,  1.10it/s]

2244


Epoch 1/1:  70%|███████   | 2244/3186 [1:49:06<12:20,  1.27it/s]

2245


Epoch 1/1:  70%|███████   | 2245/3186 [1:49:07<16:52,  1.08s/it]

2246


Epoch 1/1:  70%|███████   | 2246/3186 [1:49:09<17:06,  1.09s/it]

2247


Epoch 1/1:  71%|███████   | 2247/3186 [1:49:09<14:26,  1.08it/s]

2248


Epoch 1/1:  71%|███████   | 2248/3186 [1:49:10<13:42,  1.14it/s]

2249


Epoch 1/1:  71%|███████   | 2249/3186 [1:49:10<12:08,  1.29it/s]

2250


Epoch 1/1:  71%|███████   | 2250/3186 [1:49:11<10:55,  1.43it/s]

2251


Epoch 1/1:  71%|███████   | 2251/3186 [1:49:12<12:13,  1.27it/s]

2252


Epoch 1/1:  71%|███████   | 2252/3186 [1:49:13<12:54,  1.21it/s]

2253


Epoch 1/1:  71%|███████   | 2253/3186 [1:49:14<15:32,  1.00it/s]

2254


Epoch 1/1:  71%|███████   | 2254/3186 [1:49:15<15:53,  1.02s/it]

2255


Epoch 1/1:  71%|███████   | 2255/3186 [1:49:16<14:04,  1.10it/s]

2256


Epoch 1/1:  71%|███████   | 2256/3186 [1:49:17<16:04,  1.04s/it]

2257


Epoch 1/1:  71%|███████   | 2257/3186 [1:49:18<14:30,  1.07it/s]

2258


Epoch 1/1:  71%|███████   | 2258/3186 [1:49:19<15:01,  1.03it/s]

2259


Epoch 1/1:  71%|███████   | 2259/3186 [1:49:21<17:21,  1.12s/it]

2260


Epoch 1/1:  71%|███████   | 2260/3186 [1:49:21<15:26,  1.00s/it]

2261


Epoch 1/1:  71%|███████   | 2261/3186 [1:49:22<13:40,  1.13it/s]

2262


Epoch 1/1:  71%|███████   | 2262/3186 [1:49:23<15:06,  1.02it/s]

2263


Epoch 1/1:  71%|███████   | 2263/3186 [1:49:24<14:08,  1.09it/s]

2264


Epoch 1/1:  71%|███████   | 2264/3186 [1:49:25<12:41,  1.21it/s]

2265


Epoch 1/1:  71%|███████   | 2265/3186 [1:49:26<14:42,  1.04it/s]

2266


Epoch 1/1:  71%|███████   | 2266/3186 [1:49:26<12:38,  1.21it/s]

2267


Epoch 1/1:  71%|███████   | 2267/3186 [1:49:28<16:24,  1.07s/it]

2268


Epoch 1/1:  71%|███████   | 2268/3186 [1:49:28<13:52,  1.10it/s]

2269


Epoch 1/1:  71%|███████   | 2269/3186 [1:49:30<14:48,  1.03it/s]

2270


Epoch 1/1:  71%|███████   | 2270/3186 [1:49:30<14:00,  1.09it/s]

2271


Epoch 1/1:  71%|███████▏  | 2271/3186 [1:49:31<13:17,  1.15it/s]

2272


Epoch 1/1:  71%|███████▏  | 2272/3186 [1:49:33<16:01,  1.05s/it]

2273


Epoch 1/1:  71%|███████▏  | 2273/3186 [1:49:33<15:09,  1.00it/s]

2274


Epoch 1/1:  71%|███████▏  | 2274/3186 [1:49:37<25:05,  1.65s/it]

2275


Epoch 1/1:  71%|███████▏  | 2275/3186 [1:49:38<24:00,  1.58s/it]

2276


Epoch 1/1:  71%|███████▏  | 2276/3186 [1:49:39<20:19,  1.34s/it]

2277


Epoch 1/1:  71%|███████▏  | 2277/3186 [1:49:40<20:06,  1.33s/it]

2278


Epoch 1/1:  72%|███████▏  | 2278/3186 [1:49:41<18:16,  1.21s/it]

2279


Epoch 1/1:  72%|███████▏  | 2279/3186 [1:49:42<18:05,  1.20s/it]

2280


Epoch 1/1:  72%|███████▏  | 2280/3186 [1:49:43<16:25,  1.09s/it]

2281


Epoch 1/1:  72%|███████▏  | 2281/3186 [1:49:44<14:00,  1.08it/s]

2282


Epoch 1/1:  72%|███████▏  | 2282/3186 [1:49:44<13:00,  1.16it/s]

2283


Epoch 1/1:  72%|███████▏  | 2283/3186 [1:49:45<11:43,  1.28it/s]

2284


Epoch 1/1:  72%|███████▏  | 2284/3186 [1:49:46<12:22,  1.22it/s]

2285


Epoch 1/1:  72%|███████▏  | 2285/3186 [1:49:46<10:36,  1.42it/s]

2286


Epoch 1/1:  72%|███████▏  | 2286/3186 [1:49:47<11:58,  1.25it/s]

2287


Epoch 1/1:  72%|███████▏  | 2287/3186 [1:49:48<11:12,  1.34it/s]

2288


Epoch 1/1:  72%|███████▏  | 2288/3186 [1:49:48<09:59,  1.50it/s]

2289


Epoch 1/1:  72%|███████▏  | 2289/3186 [1:49:49<09:21,  1.60it/s]

2290


Epoch 1/1:  72%|███████▏  | 2290/3186 [1:49:50<09:04,  1.65it/s]

2291


Epoch 1/1:  72%|███████▏  | 2291/3186 [1:49:51<11:41,  1.28it/s]

2292


Epoch 1/1:  72%|███████▏  | 2292/3186 [1:49:51<10:17,  1.45it/s]

2293


Epoch 1/1:  72%|███████▏  | 2293/3186 [1:49:52<12:15,  1.21it/s]

2294


Epoch 1/1:  72%|███████▏  | 2294/3186 [1:49:53<12:01,  1.24it/s]

2295


Epoch 1/1:  72%|███████▏  | 2295/3186 [1:49:54<10:59,  1.35it/s]

2296


Epoch 1/1:  72%|███████▏  | 2296/3186 [1:49:54<09:49,  1.51it/s]

2297


Epoch 1/1:  72%|███████▏  | 2297/3186 [1:49:55<09:31,  1.56it/s]

2298


Epoch 1/1:  72%|███████▏  | 2298/3186 [1:49:55<09:13,  1.60it/s]

2299


Epoch 1/1:  72%|███████▏  | 2299/3186 [1:49:56<09:04,  1.63it/s]

2300


Epoch 1/1:  72%|███████▏  | 2300/3186 [1:49:57<09:58,  1.48it/s]

2301
(B) cube
(B) cube


Epoch 1/1:  72%|███████▏  | 2301/3186 [1:49:57<09:45,  1.51it/s]

2302


Epoch 1/1:  72%|███████▏  | 2302/3186 [1:49:58<10:39,  1.38it/s]

2303


Epoch 1/1:  72%|███████▏  | 2303/3186 [1:49:59<11:18,  1.30it/s]

2304


Epoch 1/1:  72%|███████▏  | 2304/3186 [1:50:00<10:34,  1.39it/s]

2305


Epoch 1/1:  72%|███████▏  | 2305/3186 [1:50:01<12:20,  1.19it/s]

2306


Epoch 1/1:  72%|███████▏  | 2306/3186 [1:50:02<11:56,  1.23it/s]

2307


Epoch 1/1:  72%|███████▏  | 2307/3186 [1:50:02<11:05,  1.32it/s]

2308


Epoch 1/1:  72%|███████▏  | 2308/3186 [1:50:05<17:52,  1.22s/it]

2309


Epoch 1/1:  72%|███████▏  | 2309/3186 [1:50:05<15:33,  1.06s/it]

2310


Epoch 1/1:  73%|███████▎  | 2310/3186 [1:50:06<14:52,  1.02s/it]

2311


Epoch 1/1:  73%|███████▎  | 2311/3186 [1:50:07<16:09,  1.11s/it]

2312


Epoch 1/1:  73%|███████▎  | 2312/3186 [1:50:08<13:56,  1.05it/s]

2313


Epoch 1/1:  73%|███████▎  | 2313/3186 [1:50:09<13:45,  1.06it/s]

2314


Epoch 1/1:  73%|███████▎  | 2314/3186 [1:50:10<13:41,  1.06it/s]

2315


Epoch 1/1:  73%|███████▎  | 2315/3186 [1:50:12<19:53,  1.37s/it]

2316


Epoch 1/1:  73%|███████▎  | 2316/3186 [1:50:13<17:54,  1.24s/it]

2317


Epoch 1/1:  73%|███████▎  | 2317/3186 [1:50:14<15:15,  1.05s/it]

2318


Epoch 1/1:  73%|███████▎  | 2318/3186 [1:50:14<13:18,  1.09it/s]

2319


Epoch 1/1:  73%|███████▎  | 2319/3186 [1:50:15<11:51,  1.22it/s]

2320


Epoch 1/1:  73%|███████▎  | 2320/3186 [1:50:16<11:04,  1.30it/s]

2321


Epoch 1/1:  73%|███████▎  | 2321/3186 [1:50:17<11:31,  1.25it/s]

2322


Epoch 1/1:  73%|███████▎  | 2322/3186 [1:50:18<12:29,  1.15it/s]

2323


Epoch 1/1:  73%|███████▎  | 2323/3186 [1:50:19<13:10,  1.09it/s]

2324


Epoch 1/1:  73%|███████▎  | 2324/3186 [1:50:20<15:13,  1.06s/it]

2325


Epoch 1/1:  73%|███████▎  | 2325/3186 [1:50:21<13:05,  1.10it/s]

2326


Epoch 1/1:  73%|███████▎  | 2326/3186 [1:50:21<11:42,  1.22it/s]

2327


Epoch 1/1:  73%|███████▎  | 2327/3186 [1:50:22<10:31,  1.36it/s]

2328


Epoch 1/1:  73%|███████▎  | 2328/3186 [1:50:22<09:39,  1.48it/s]

2329


Epoch 1/1:  73%|███████▎  | 2329/3186 [1:50:23<09:09,  1.56it/s]

2330


Epoch 1/1:  73%|███████▎  | 2330/3186 [1:50:24<13:37,  1.05it/s]

2331


Epoch 1/1:  73%|███████▎  | 2331/3186 [1:50:25<13:13,  1.08it/s]

2332


Epoch 1/1:  73%|███████▎  | 2332/3186 [1:50:26<13:15,  1.07it/s]

2333


Epoch 1/1:  73%|███████▎  | 2333/3186 [1:50:28<16:59,  1.20s/it]

2334


Epoch 1/1:  73%|███████▎  | 2334/3186 [1:50:29<15:38,  1.10s/it]

2335


Epoch 1/1:  73%|███████▎  | 2335/3186 [1:50:30<14:30,  1.02s/it]

2336


Epoch 1/1:  73%|███████▎  | 2336/3186 [1:50:30<12:07,  1.17it/s]

2337


Epoch 1/1:  73%|███████▎  | 2337/3186 [1:50:31<10:46,  1.31it/s]

2338


Epoch 1/1:  73%|███████▎  | 2338/3186 [1:50:32<13:24,  1.05it/s]

2339


Epoch 1/1:  73%|███████▎  | 2339/3186 [1:50:33<12:53,  1.10it/s]

2340


Epoch 1/1:  73%|███████▎  | 2340/3186 [1:50:34<13:41,  1.03it/s]

2341


Epoch 1/1:  73%|███████▎  | 2341/3186 [1:50:35<13:27,  1.05it/s]

2342


Epoch 1/1:  74%|███████▎  | 2342/3186 [1:50:36<11:47,  1.19it/s]

2343


Epoch 1/1:  74%|███████▎  | 2343/3186 [1:50:36<10:26,  1.35it/s]

2344


Epoch 1/1:  74%|███████▎  | 2344/3186 [1:50:37<10:57,  1.28it/s]

2345


Epoch 1/1:  74%|███████▎  | 2345/3186 [1:50:38<09:51,  1.42it/s]

2346


Epoch 1/1:  74%|███████▎  | 2346/3186 [1:50:39<12:36,  1.11it/s]

2347


Epoch 1/1:  74%|███████▎  | 2347/3186 [1:50:40<13:02,  1.07it/s]

2348


Epoch 1/1:  74%|███████▎  | 2348/3186 [1:50:41<13:43,  1.02it/s]

2349


Epoch 1/1:  74%|███████▎  | 2349/3186 [1:50:41<11:37,  1.20it/s]

2350


Epoch 1/1:  74%|███████▍  | 2350/3186 [1:50:42<10:36,  1.31it/s]

2351


Epoch 1/1:  74%|███████▍  | 2351/3186 [1:50:43<12:33,  1.11it/s]

2352


Epoch 1/1:  74%|███████▍  | 2352/3186 [1:50:44<10:59,  1.26it/s]

2353


Epoch 1/1:  74%|███████▍  | 2353/3186 [1:50:45<10:48,  1.28it/s]

2354


Epoch 1/1:  74%|███████▍  | 2354/3186 [1:50:45<10:24,  1.33it/s]

2355


Epoch 1/1:  74%|███████▍  | 2355/3186 [1:50:46<11:43,  1.18it/s]

2356


Epoch 1/1:  74%|███████▍  | 2356/3186 [1:50:47<11:43,  1.18it/s]

2357


Epoch 1/1:  74%|███████▍  | 2357/3186 [1:50:48<10:49,  1.28it/s]

2358


Epoch 1/1:  74%|███████▍  | 2358/3186 [1:50:49<11:06,  1.24it/s]

2359


Epoch 1/1:  74%|███████▍  | 2359/3186 [1:50:49<10:17,  1.34it/s]

2360


Epoch 1/1:  74%|███████▍  | 2360/3186 [1:50:50<11:12,  1.23it/s]

2361


Epoch 1/1:  74%|███████▍  | 2361/3186 [1:50:52<14:15,  1.04s/it]

2362


Epoch 1/1:  74%|███████▍  | 2362/3186 [1:50:53<14:21,  1.05s/it]

2363


Epoch 1/1:  74%|███████▍  | 2363/3186 [1:50:55<17:14,  1.26s/it]

2364


Epoch 1/1:  74%|███████▍  | 2364/3186 [1:50:56<16:30,  1.20s/it]

2365


Epoch 1/1:  74%|███████▍  | 2365/3186 [1:50:57<16:02,  1.17s/it]

2366


Epoch 1/1:  74%|███████▍  | 2366/3186 [1:50:58<14:21,  1.05s/it]

2367


Epoch 1/1:  74%|███████▍  | 2367/3186 [1:50:59<13:56,  1.02s/it]

2368


Epoch 1/1:  74%|███████▍  | 2368/3186 [1:50:59<13:07,  1.04it/s]

2369


Epoch 1/1:  74%|███████▍  | 2369/3186 [1:51:00<11:08,  1.22it/s]

2370


Epoch 1/1:  74%|███████▍  | 2370/3186 [1:51:00<10:20,  1.31it/s]

2371


Epoch 1/1:  74%|███████▍  | 2371/3186 [1:51:01<09:19,  1.46it/s]

2372


Epoch 1/1:  74%|███████▍  | 2372/3186 [1:51:02<10:34,  1.28it/s]

2373


Epoch 1/1:  74%|███████▍  | 2373/3186 [1:51:02<09:27,  1.43it/s]

2374


Epoch 1/1:  75%|███████▍  | 2374/3186 [1:51:03<09:07,  1.48it/s]

2375


Epoch 1/1:  75%|███████▍  | 2375/3186 [1:51:04<11:29,  1.18it/s]

2376


Epoch 1/1:  75%|███████▍  | 2376/3186 [1:51:05<10:50,  1.24it/s]

2377


Epoch 1/1:  75%|███████▍  | 2377/3186 [1:51:06<10:22,  1.30it/s]

2378


Epoch 1/1:  75%|███████▍  | 2378/3186 [1:51:06<09:26,  1.43it/s]

2379


Epoch 1/1:  75%|███████▍  | 2379/3186 [1:51:07<08:36,  1.56it/s]

2380


Epoch 1/1:  75%|███████▍  | 2380/3186 [1:51:07<08:23,  1.60it/s]

2381


Epoch 1/1:  75%|███████▍  | 2381/3186 [1:51:09<10:26,  1.28it/s]

2382


Epoch 1/1:  75%|███████▍  | 2382/3186 [1:51:09<11:14,  1.19it/s]

2383


Epoch 1/1:  75%|███████▍  | 2383/3186 [1:51:12<16:30,  1.23s/it]

2384


Epoch 1/1:  75%|███████▍  | 2384/3186 [1:51:12<14:56,  1.12s/it]

2385


Epoch 1/1:  75%|███████▍  | 2385/3186 [1:51:13<13:12,  1.01it/s]

2386


Epoch 1/1:  75%|███████▍  | 2386/3186 [1:51:14<11:44,  1.14it/s]

2387


Epoch 1/1:  75%|███████▍  | 2387/3186 [1:51:27<1:02:48,  4.72s/it]

2388


Epoch 1/1:  75%|███████▍  | 2388/3186 [1:51:34<1:09:38,  5.24s/it]

2389


Epoch 1/1:  75%|███████▍  | 2389/3186 [1:51:40<1:14:35,  5.62s/it]

2390


Epoch 1/1:  75%|███████▌  | 2390/3186 [1:51:51<1:32:29,  6.97s/it]

2391


Epoch 1/1:  75%|███████▌  | 2391/3186 [1:52:01<1:47:00,  8.08s/it]

2392


Epoch 1/1:  75%|███████▌  | 2392/3186 [1:52:11<1:54:30,  8.65s/it]

2393


Epoch 1/1:  75%|███████▌  | 2393/3186 [1:52:23<2:05:29,  9.49s/it]

2394


Epoch 1/1:  75%|███████▌  | 2394/3186 [1:52:28<1:48:41,  8.23s/it]

2395


Epoch 1/1:  75%|███████▌  | 2395/3186 [1:52:32<1:33:10,  7.07s/it]

2396


Epoch 1/1:  78%|███████▊  | 2480/3186 [2:03:37<2:13:44, 11.37s/it]

2481


Epoch 1/1:  78%|███████▊  | 2481/3186 [2:03:40<1:47:00,  9.11s/it]

2482


Epoch 1/1:  78%|███████▊  | 2482/3186 [2:03:47<1:37:28,  8.31s/it]

2483


Epoch 1/1:  78%|███████▊  | 2483/3186 [2:03:57<1:43:36,  8.84s/it]

2484


Epoch 1/1:  78%|███████▊  | 2484/3186 [2:04:02<1:29:22,  7.64s/it]

2485


Epoch 1/1:  78%|███████▊  | 2485/3186 [2:04:16<1:51:36,  9.55s/it]

2486


Epoch 1/1:  78%|███████▊  | 2486/3186 [2:04:24<1:47:46,  9.24s/it]

2487


Epoch 1/1:  78%|███████▊  | 2487/3186 [2:04:29<1:31:40,  7.87s/it]

2488


Epoch 1/1:  78%|███████▊  | 2488/3186 [2:04:35<1:23:46,  7.20s/it]

2489


Epoch 1/1:  78%|███████▊  | 2489/3186 [2:04:42<1:23:49,  7.22s/it]

2490


Epoch 1/1:  78%|███████▊  | 2490/3186 [2:04:54<1:42:01,  8.80s/it]

2491


Epoch 1/1:  78%|███████▊  | 2491/3186 [2:05:08<1:58:48, 10.26s/it]

2492


Epoch 1/1:  78%|███████▊  | 2492/3186 [2:05:12<1:36:10,  8.31s/it]

2493


Epoch 1/1:  78%|███████▊  | 2493/3186 [2:05:19<1:33:11,  8.07s/it]

2494


Epoch 1/1:  78%|███████▊  | 2494/3186 [2:05:38<2:08:51, 11.17s/it]

2495


Epoch 1/1:  78%|███████▊  | 2495/3186 [2:05:48<2:05:53, 10.93s/it]

2496


Epoch 1/1:  78%|███████▊  | 2496/3186 [2:05:51<1:39:47,  8.68s/it]

2497


Epoch 1/1:  78%|███████▊  | 2497/3186 [2:05:55<1:23:00,  7.23s/it]

2498


Epoch 1/1:  78%|███████▊  | 2498/3186 [2:06:09<1:43:33,  9.03s/it]

2499


Epoch 1/1:  78%|███████▊  | 2499/3186 [2:06:19<1:47:29,  9.39s/it]

2500


Epoch 1/1:  78%|███████▊  | 2500/3186 [2:06:31<1:57:38, 10.29s/it]

2501
B
(A) no


Epoch 1/1:  78%|███████▊  | 2501/3186 [2:06:34<1:30:35,  7.93s/it]

2502


Epoch 1/1:  79%|███████▊  | 2502/3186 [2:06:46<1:46:21,  9.33s/it]

2503


Epoch 1/1:  79%|███████▊  | 2503/3186 [2:06:55<1:43:18,  9.08s/it]

2504


Epoch 1/1:  79%|███████▊  | 2504/3186 [2:07:08<1:57:30, 10.34s/it]

2505


Epoch 1/1:  79%|███████▊  | 2505/3186 [2:07:21<2:07:47, 11.26s/it]

2506


Epoch 1/1:  79%|███████▊  | 2506/3186 [2:07:28<1:50:41,  9.77s/it]

2507


Epoch 1/1:  79%|███████▊  | 2507/3186 [2:07:36<1:46:46,  9.44s/it]

2508


Epoch 1/1:  79%|███████▊  | 2508/3186 [2:07:42<1:33:18,  8.26s/it]

2509


Epoch 1/1:  81%|████████▏ | 2589/3186 [2:18:33<1:18:30,  7.89s/it]

2590


Epoch 1/1:  81%|████████▏ | 2590/3186 [2:18:41<1:18:09,  7.87s/it]

2591


Epoch 1/1:  81%|████████▏ | 2591/3186 [2:18:54<1:33:17,  9.41s/it]

2592


Epoch 1/1:  81%|████████▏ | 2592/3186 [2:19:04<1:33:57,  9.49s/it]

2593


Epoch 1/1:  81%|████████▏ | 2593/3186 [2:19:07<1:14:39,  7.55s/it]

2594


Epoch 1/1:  81%|████████▏ | 2594/3186 [2:19:15<1:17:20,  7.84s/it]

2595


Epoch 1/1:  81%|████████▏ | 2595/3186 [2:19:20<1:06:24,  6.74s/it]

2596


Epoch 1/1:  81%|████████▏ | 2596/3186 [2:19:28<1:10:10,  7.14s/it]

2597


Epoch 1/1:  82%|████████▏ | 2597/3186 [2:19:34<1:08:01,  6.93s/it]

2598


Epoch 1/1:  82%|████████▏ | 2598/3186 [2:19:39<1:00:38,  6.19s/it]

2599


Epoch 1/1:  82%|████████▏ | 2599/3186 [2:19:46<1:04:50,  6.63s/it]

2600


Epoch 1/1:  82%|████████▏ | 2600/3186 [2:20:00<1:27:06,  8.92s/it]

2601
Option (A) was the order of the letters C A L Y P S O before the person shuffled them in the video.
(A) calypso


Epoch 1/1:  82%|████████▏ | 2601/3186 [2:20:05<1:12:56,  7.48s/it]

2602


Epoch 1/1:  82%|████████▏ | 2602/3186 [2:20:13<1:15:09,  7.72s/it]

2603


Epoch 1/1:  82%|████████▏ | 2603/3186 [2:20:22<1:19:02,  8.14s/it]

2604


Epoch 1/1:  82%|████████▏ | 2604/3186 [2:20:27<1:08:23,  7.05s/it]

2605


Epoch 1/1:  82%|████████▏ | 2605/3186 [2:20:33<1:07:40,  6.99s/it]

2606


Epoch 1/1:  82%|████████▏ | 2606/3186 [2:20:38<1:01:48,  6.39s/it]

2607


Epoch 1/1:  82%|████████▏ | 2607/3186 [2:20:44<1:00:21,  6.26s/it]

2608


Epoch 1/1:  82%|████████▏ | 2608/3186 [2:20:53<1:08:22,  7.10s/it]

2609


Epoch 1/1:  82%|████████▏ | 2609/3186 [2:21:07<1:27:54,  9.14s/it]

2610


Epoch 1/1:  82%|████████▏ | 2610/3186 [2:21:12<1:14:04,  7.72s/it]

2611


Epoch 1/1:  82%|████████▏ | 2611/3186 [2:21:25<1:28:58,  9.29s/it]

2612


Epoch 1/1:  82%|████████▏ | 2612/3186 [2:21:33<1:25:01,  8.89s/it]

2613


Epoch 1/1:  82%|████████▏ | 2613/3186 [2:21:40<1:21:57,  8.58s/it]

2614


Epoch 1/1:  82%|████████▏ | 2614/3186 [2:21:55<1:39:34, 10.45s/it]

2615


Epoch 1/1:  82%|████████▏ | 2615/3186 [2:21:59<1:20:50,  8.49s/it]

2616


Epoch 1/1:  82%|████████▏ | 2616/3186 [2:22:10<1:28:16,  9.29s/it]

2617


Epoch 1/1:  82%|████████▏ | 2617/3186 [2:22:15<1:13:38,  7.77s/it]

2618


Epoch 1/1:  82%|████████▏ | 2618/3186 [2:22:21<1:11:08,  7.52s/it]

2619


Epoch 1/1:  82%|████████▏ | 2619/3186 [2:22:40<1:41:37, 10.75s/it]

2620


Epoch 1/1:  84%|████████▍ | 2692/3186 [2:33:26<1:13:28,  8.92s/it]

2693


Epoch 1/1:  85%|████████▍ | 2693/3186 [2:33:40<1:27:05, 10.60s/it]

2694


Epoch 1/1:  85%|████████▍ | 2694/3186 [2:33:44<1:09:43,  8.50s/it]

2695


Epoch 1/1:  85%|████████▍ | 2695/3186 [2:33:56<1:18:16,  9.57s/it]

2696


Epoch 1/1:  85%|████████▍ | 2696/3186 [2:34:04<1:15:19,  9.22s/it]

2697


Epoch 1/1:  85%|████████▍ | 2697/3186 [2:34:15<1:18:23,  9.62s/it]

2698


Epoch 1/1:  85%|████████▍ | 2698/3186 [2:34:17<59:53,  7.36s/it]  

2699


Epoch 1/1:  85%|████████▍ | 2699/3186 [2:34:26<1:04:35,  7.96s/it]

2700


Epoch 1/1:  85%|████████▍ | 2700/3186 [2:34:31<57:03,  7.05s/it]  

2701
(A) tea
(C) eat


Epoch 1/1:  85%|████████▍ | 2701/3186 [2:34:43<1:09:06,  8.55s/it]

2702


Epoch 1/1:  85%|████████▍ | 2702/3186 [2:34:53<1:12:07,  8.94s/it]

2703


Epoch 1/1:  85%|████████▍ | 2703/3186 [2:35:07<1:23:02, 10.31s/it]

2704


Epoch 1/1:  85%|████████▍ | 2704/3186 [2:35:11<1:07:39,  8.42s/it]

2705


Epoch 1/1:  85%|████████▍ | 2705/3186 [2:35:16<1:01:23,  7.66s/it]

2706


Epoch 1/1:  85%|████████▍ | 2706/3186 [2:35:26<1:05:26,  8.18s/it]

2707


Epoch 1/1:  85%|████████▍ | 2707/3186 [2:35:40<1:19:50, 10.00s/it]

2708


Epoch 1/1:  85%|████████▍ | 2708/3186 [2:35:48<1:15:45,  9.51s/it]

2709


Epoch 1/1:  85%|████████▌ | 2709/3186 [2:35:55<1:07:34,  8.50s/it]

2710


Epoch 1/1:  85%|████████▌ | 2710/3186 [2:36:04<1:09:00,  8.70s/it]

2711


Epoch 1/1:  85%|████████▌ | 2711/3186 [2:36:09<1:01:31,  7.77s/it]

2712


Epoch 1/1:  85%|████████▌ | 2712/3186 [2:36:24<1:18:20,  9.92s/it]

2713


Epoch 1/1:  85%|████████▌ | 2713/3186 [2:36:32<1:11:51,  9.12s/it]

2714


Epoch 1/1:  85%|████████▌ | 2714/3186 [2:36:45<1:21:01, 10.30s/it]

2715


Epoch 1/1:  85%|████████▌ | 2715/3186 [2:36:52<1:13:28,  9.36s/it]

2716


Epoch 1/1:  85%|████████▌ | 2716/3186 [2:37:03<1:16:42,  9.79s/it]

2717


Epoch 1/1:  85%|████████▌ | 2717/3186 [2:37:18<1:29:13, 11.41s/it]

2718


Epoch 1/1:  85%|████████▌ | 2718/3186 [2:37:30<1:31:36, 11.74s/it]

2719


Epoch 1/1:  85%|████████▌ | 2719/3186 [2:37:39<1:25:15, 10.95s/it]

2720


Epoch 1/1:  85%|████████▌ | 2720/3186 [2:37:45<1:13:45,  9.50s/it]

2721


In [9]:
y_preds = [i.split('.')[0] for i in y_pred]

result = [1 if i == j else 0 for i, j in zip(y_real, y_preds)]

print("VideoLAVIT-8.5B-MVBench, accuracy is: " + str(sum(result) / len(result)))

InternVL-2B-8-frames-MVBench, accuracy is: 0.5443740095087163


In [None]:
/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/star/star/Charades_v1_480/ZS9XR.mp4 What happened after the person took the food?

In [21]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from models import build_model

Please 'pip install apex'
Please 'pip install apex'
Please 'pip install apex'
Please 'pip install apex'


In [10]:
model_path = "/home/jovyan/shares/SR004.nfs2/chekalina/LaVIT/VideoLaVIT/models/language_model_sft"#"/home/jinyang06/models/VideoLaVIT-v1/language_model_sft"
model_dtype='bf16'

max_video_clips = 16
device_id = 0
torch.cuda.set_device(device_id)
device = torch.device('cuda')

seed = 42
#torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# For Multi-Modal Understanding
runner = build_model(model_path=model_path, model_dtype=model_dtype, understanding=True, 
        device_id=device_id, use_xformers=False, max_video_clips=max_video_clips,)

Loading Video LaVIT Model Weight from /home/jovyan/shares/SR004.nfs2/chekalina/LaVIT/VideoLaVIT/models/language_model_sft, model precision: bf16
Not used {}


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some weights of the model checkpoint at /home/jovyan/shares/SR004.nfs2/chekalina/LaVIT/VideoLaVIT/models/language_model_sft were not used when initializing VideoLaVITLlamaForCausalLM: ['model.motion_tokenizer.quantize.cluster_size', 'model.motion_tokenizer.quantize.embedding.initted', 'model.motion_tokenizer.quantize.embedding.embed_avg', 'model.motion_tokenizer.quantize.embedding.cluster_size']
- This IS expected if you are initializing VideoLaVITLlamaForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing VideoLaVITLlamaForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


The Visual Vocab Size is 16384
The llama tokenizer vocab size is 32000
The maximal clip number is 16


In [12]:
video_path = '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/star/star/Charades_v1_480/EY6P4.mp4'
prompt = "What happened before the person watched at the book?"
answer = "He ate the medicine."
output = runner({"video": video_path, "text_input": prompt}, length_penalty=1, \
        use_nucleus_sampling=True, num_beams=1, max_length=512, temperature=1.0)[0]
print(output)

prompt A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <video>
What happened before the person watched at the book? ASSISTANT:
The video shows a person walking into the frame while talking with someone. Later, the person pointed at a book and walked towards the camera. But the video does not provide more information regarding the previous actions.


In [13]:
video_path = '/home/jovyan/shares/SR004.nfs2/lipengyi/1_Checked_idea/MVBench/video/star/star/Charades_v1_480/EY6P4.mp4'
prompt = "What happened on the video?"
answer = "What happens on the video?"
output = runner({"video": video_path, "text_input": prompt}, length_penalty=1, \
        use_nucleus_sampling=True, num_beams=1, max_length=512, temperature=1.0)[0]
print(output)

prompt A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <video>
What happened on the video? ASSISTANT:
The video depicts a man in a black shirt who stands inside a room holding papers and interacts with a laptop. He opens the door and walks out.
