In [2]:
# Test 1
image_size = 128
frames = 10
max_images = 120000
batch_size = 32

In [4]:
!pip install torch torchvision numpy

Defaulting to user installation because normal site-packages is not writeable
Collecting torch
  Downloading torch-1.13.0-cp310-cp310-manylinux1_x86_64.whl (890.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m890.1/890.1 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:05[0m
[?25hCollecting torchvision
  Downloading torchvision-0.14.0-cp310-cp310-manylinux1_x86_64.whl (24.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.3/24.3 MB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting numpy
  Downloading numpy-1.23.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting nvidia-cublas-cu11==11.10.3.66
  Downloading nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl (317.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [5]:
# GIF pre-processing

import numpy as np
from torchvision import transforms as T
from math import floor, fabs
from PIL import Image, ImageSequence


CHANNELS_TO_MODE = {
    1 : 'L',
    3 : 'RGB',
    4 : 'RGBA'
}

def center_crop(img, new_width, new_height): 
    width = img.size[0]
    height = img.size[1]
    left = int(np.ceil((width - new_width) / 2))
    right = width - int(np.floor((width - new_width) / 2))
    top = int(np.ceil((height - new_height) / 2))
    bottom = height - int(np.floor((height - new_height) / 2))
    return img.crop((left, top, right, bottom))

def resize_crop_img(img, width, height):
    # width < height
    if( img.size[0] < img.size[1]):
      wpercent = (width/float(img.size[0]))
      hsize = int((float(img.size[1])*float(wpercent)))
      img = img.resize((width, hsize), Image.Resampling.LANCZOS)
    else: # width >= height
      hpercent = (height/float(img.size[1]))
      wsize = int((float(img.size[0])*float(hpercent)))
      img = img.resize((wsize, height), Image.Resampling.LANCZOS)
    img = center_crop(img, width, height)
    # print(img.size[0])
    # print(img.size[1])
    return img

def transform_gif(img, new_width, new_height, frames, channels = 3):
    assert channels in CHANNELS_TO_MODE, f'channels {channels} invalid'
    mode = CHANNELS_TO_MODE[channels]
    gif_frames = img.n_frames
    for i in range(0, frames):
        img.seek(i % gif_frames)
        img_out = resize_crop_img(img, new_width, new_height)
        yield img_out.convert(mode)
        
# tensor of shape (channels, frames, height, width) -> gif
def video_tensor_to_gif(tensor, path, fps = 10, loop = 0, optimize = True):
    print("Converting video tensors to GIF")
    images = map(T.ToPILImage(), tensor.unbind(dim = 1))
    first_img, *rest_imgs = images
    print(1000/fps)
    first_img.save(path, save_all = True, append_images = rest_imgs, duration = int(1000/fps), loop = loop, optimize = optimize)
    print("Gif saved")
    return images

# gif -> (channels, frame, height, width) tensor
def gif_to_tensor(path, width = 256, height = 256, frames = 32, channels = 3, transform = T.ToTensor()):
    print("Converting GIF to video tensors")
    img = Image.open(path)
    imgs = transform_gif(img, new_width = width, new_height = height, frames = frames, channels = channels)
    tensors = tuple(map(transform, imgs))
    return torch.stack(tensors, dim = 1)

In [6]:
import os
import torch
train_data = "./train_data.tvs"
train_index = "./train_index.txt"

if not os.path.exists(train_data):
  !wget -O {train_data} https://raw.githubusercontent.com/raingo/TGIF-Release/master/data/tgif-v1.0.tsv

current_index = 0
texts = []
list_videos = []

def get_videos(index_start, index_end):
    global texts
    global list_videos
    
    texts = []
    list_videos = []

    with open("train_data.tvs") as fp:
        for i, line in enumerate(fp):
            if i >= index_start and i< index_end :
                file_img, file_text = line.split("\t")
                try:
                    print(f"Downloading image {i}");
                    !wget -O download.gif -o /dev/null {file_img}
                    tensor = gif_to_tensor('download.gif', width = image_size, height = image_size, frames = frames)
                    list_videos.append(tensor)
                    file_text = file_text[:-1] # Remove \n
                    texts.append(file_text)
                    os.remove('download.gif')
                except Exception as ex:
                    print(ex)
                    pass
            elif i > index_end:
                break

def get_next_videos():
    global current_index
    index = 0
    if not os.path.exists(train_index):
        with open(train_index, 'w') as fp:
            fp.write("0")
    else:
        with open(train_index, 'r') as fp:
            index = int(fp.readlines()[0])
    index_end = index + batch_size
    get_videos(index, index_end)
    with open(train_index, 'w') as fp:
        fp.write(f"{index_end}")
    current_index = index_end
    

--2022-12-06 22:06:32--  https://raw.githubusercontent.com/raingo/TGIF-Release/master/data/tgif-v1.0.tsv
Caricato certificato CA "/etc/ssl/certs/ca-certificates.crt"

Risoluzione di raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8000::154, 2606:50c0:8002::154, 2606:50c0:8001::154, ...
Connessione a raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8000::154|:443... connesso.
Richiesta HTTP inviata, in attesa di risposta... 200 OK
Lunghezza: 18660908 (18M) [text/plain]
Salvataggio in: «./train_data.tvs»


2022-12-06 22:06:36 (4,85 MB/s) - «./train_data.tvs» salvato [18660908/18660908]



In [7]:
!mkdir TGIF
i = 0
while i * batch_size < max_images:
    get_next_videos()
    videos = torch.stack(list_videos, dim = 0)
    torch.save(videos, f'./TGIF/videos_{i}.pt')
    with open(f'./TGIF/texts_{i}.txt', 'w') as fp:
        fp.write('\n'.join(texts))
    i += 1

Downloading image 0
Converting GIF to video tensors
Downloading image 1
Converting GIF to video tensors
Downloading image 2
Converting GIF to video tensors
Downloading image 3
Converting GIF to video tensors
Downloading image 4
Converting GIF to video tensors
Downloading image 5
Converting GIF to video tensors
Downloading image 6
Converting GIF to video tensors
Downloading image 7
Converting GIF to video tensors
Downloading image 8
Converting GIF to video tensors
Downloading image 9
Converting GIF to video tensors
Downloading image 10
Converting GIF to video tensors
Downloading image 11
Converting GIF to video tensors
Downloading image 12
Converting GIF to video tensors
Downloading image 13
Converting GIF to video tensors
Downloading image 14
Converting GIF to video tensors
Downloading image 15
Converting GIF to video tensors
Downloading image 16
Converting GIF to video tensors
Downloading image 17
Converting GIF to video tensors
Downloading image 18
Converting GIF to video tensors
Dow

KeyboardInterrupt: 