In [13]:
import torch
from torch.utils.data import Dataset
import torchvision
import glob
import os
import cv2
import numpy as np

In [2]:
pre_sequence = 20
after_sequence = 5
bs = 4

In [3]:
'''
custom dataset for illegal dumping project
background version
'''

'''
map-style dataset
'''

class Illegal_dumping_dataset(Dataset):
    def __init__(self, root, normalize=False):
        super(Illegal_dumping_dataset, self).__init__()
        self.root = root # path to the datasets e.g) train, val, and test

        self.mean = None
        self.std = None

        if normalize:
            # get the mean/std values along the channel dimension
            mean = data.mean(axis=(0, 1, 2, 3)).reshape(1, 1, -1, 1, 1)
            std = data.std(axis=(0, 1, 2, 3)).reshape(1, 1, -1, 1, 1)
            data = (data - mean) / std
            self.mean = mean
            self.std = std

        # this has all the names of clips as a list
        self.clips = glob.glob(f"{root}/*.avi")

    def __len__(self):
        # legnth of the dataset is the number of elements in self.clips
        return len(self.clips)

    def __getitem__(self, index):

        # get ith clip in a tensor
        frames, _, _ = torchvision.io.read_video(self.clips[index], pts_unit = "sec", output_format = "TCHW")

        # do normalization
        frames = frames / 255.0

        # frame_batch contains 10 frames that the model watches
        # target_batch contains 1 frame that the model predicts based on the previous 10 frames

        # get frames numbers to be sampled
        sample_frame_nums = torch.linspace(0, len(frames)-1, pre_sequence + after_sequence, dtype=int)

        # get frames accordingly
        sampled_frames = [frames[x] for x in sample_frame_nums]

        # stack those frames
        stacked_sampled_frames = torch.stack(sampled_frames)

        return stacked_sampled_frames[:pre_sequence], stacked_sampled_frames[pre_sequence:]

In [3]:
'''
iterable dataset_version 1

'''
from torch.utils.data import IterableDataset

class Illegal_dumping_dataset(IterableDataset):

    def __init__(self, root, normalize=False):
        super(Illegal_dumping_dataset, self).__init__()
        self.root = root # path to the datasets e.g) train, val, and test

        self.mean = None
        self.std = None

        if normalize:
            # get the mean/std values along the channel dimension
            mean = data.mean(axis=(0, 1, 2, 3)).reshape(1, 1, -1, 1, 1)
            std = data.std(axis=(0, 1, 2, 3)).reshape(1, 1, -1, 1, 1)
            data = (data - mean) / std
            self.mean = mean
            self.std = std

        # this has all the names of clips as a list
        self.clips = glob.glob(f"{root}/*.avi")

        self.length = len(self.clips)

    def __len__(self):
        return self.length

    def __iter__(self):
        # iter_csv = pd.read_csv(self.data_path, sep='\t', iterator=True, chunksize=1)
        # for line in iter_csv:
        #     line = line['text'].item()
        #     yield line

        for clip in self.clips:
            # get ith clip in a tensor

            frames, _, _ = torchvision.io.read_video(clip, pts_unit = "sec", output_format = "TCHW")
            sample_frame_nums = torch.linspace(0, len(frames)-1, pre_sequence + after_sequence, dtype=int)

            sampled_frames = [frames[x] for x in sample_frame_nums]

            del frames

            stacked_sampled_frames = torch.stack(sampled_frames)
            
            yield stacked_sampled_frames[:pre_sequence], stacked_sampled_frames[pre_sequence:]

In [3]:
'''
iterable dataset_version 2; using opencv

'''
from torch.utils.data import IterableDataset

class Illegal_dumping_dataset(IterableDataset):

    def __init__(self, root, pre_seq, after_seq, normalize=False):
        super(Illegal_dumping_dataset, self).__init__()
        self.root = root # path to the datasets e.g) train, val, and test

        self.mean = None
        self.std = None

        if normalize:
            # get the mean/std values along the channel dimension
            mean = data.mean(axis=(0, 1, 2, 3)).reshape(1, 1, -1, 1, 1)
            std = data.std(axis=(0, 1, 2, 3)).reshape(1, 1, -1, 1, 1)
            data = (data - mean) / std
            self.mean = mean
            self.std = std

        # this has all the names of clips as a list
        self.clips = glob.glob(f"{root}/*.avi")

        self.length = len(self.clips)

        self.pre_seq = pre_seq
        self.after_seq = after_seq

    def __len__(self):
        return self.length

    def __iter__(self):

        for clip in self.clips:

            cap = cv2.VideoCapture(clip)
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

            sample_frame_nums = np.linspace(0, total_frames-1, num = self.pre_seq + self.after_seq, dtype= int)

            sampled_frames = []

            for frame_num in sample_frame_nums:
                cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
                ret, frame = cap.read()

                if ret:
                    sampled_frames.append(frame.transpose(2, 0, 1))
            
            #cap.release()

            result = np.stack(sampled_frames)

            dataset = torch.tensor(result)

            yield dataset[:pre_sequence] / 255, dataset[pre_sequence:] / 255

In [4]:
print(os.getcwd())

/home/vis-ms/OpenSTL/custom_bg_many


In [5]:
# train_set = Illegal_dumping_dataset(root = "./train")
# val_set = Illegal_dumping_dataset(root = "./val")
# test_set = Illegal_dumping_dataset(root = "./test")

train_set = Illegal_dumping_dataset("./train", pre_sequence, after_sequence)
val_set = Illegal_dumping_dataset( "./val", pre_sequence, after_sequence)
test_set = Illegal_dumping_dataset( "./test", pre_sequence, after_sequence)

In [6]:
# dataloader_train = torch.utils.data.DataLoader(
#     train_set, batch_size = bs, shuffle=True, pin_memory=True)
# dataloader_val = torch.utils.data.DataLoader(
#     val_set, batch_size = bs, shuffle=True, pin_memory=True)
# dataloader_test = torch.utils.data.DataLoader(
#     test_set, batch_size = bs, shuffle=True, pin_memory=True)

dataloader_train = torch.utils.data.DataLoader(
    train_set, batch_size = bs)
dataloader_val = torch.utils.data.DataLoader(
    val_set, batch_size = bs)
dataloader_test = torch.utils.data.DataLoader(
    test_set, batch_size = bs)

# dataloader_train = torch.utils.data.DataLoader(
#     train_set, batch_size = bs, shuffle = True)
# dataloader_val = torch.utils.data.DataLoader(
#     val_set, batch_size = bs,  shuffle = True,)
# dataloader_test = torch.utils.data.DataLoader(
#     test_set, batch_size = bs, shuffle = True,)

In [10]:
from sys import getsizeof

In [7]:
temp = 0
count = 1
for t in dataloader_train:
    print(count)
    count += 1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213


In [11]:
temp = 0
for t in dataloader_train:
    temp = t
    break

In [11]:
print(t[0].shape)

torch.Size([4, 20, 3, 360, 640])


In [12]:
print(t[1].shape)

torch.Size([4, 5, 3, 360, 640])


In [12]:
print(getsizeof(temp))

88


In [7]:
custom_training_config = {
    'pre_seq_length': pre_sequence,
    'aft_seq_length': after_sequence,
    'total_length': pre_sequence + after_sequence,
    'batch_size': bs,
    'val_batch_size': bs,
    'epoch': 1000,
    'lr': 0.001,   
    'metrics': ['mse', 'mae', "psnr", "ssim"],

    'ex_name': 'custom_exp',
    'dataname': 'custom',
    'in_shape': [pre_sequence, 3, 360, 640]
}

custom_model_config = {
    # For MetaVP models, the most important hyperparameters are: 
    # N_S, N_T, hid_S, hid_T, model_type
    #'method': 'SimVP',
    # Users can either using a config file or directly set these hyperparameters 
    'config_file': 'configs/custom/example_model.py',
    
    # Here, we directly set these parameters
    'model_type': 'IncepU',
    'N_S': 4,
    'N_T': 8,
    'hid_S': 64,
    'hid_T': 256,
    'sched': 'onecycle'
}

In [8]:
from openstl.api import BaseExperiment
from openstl.utils import create_parser

args = create_parser().parse_args([])
config = args.__dict__

# update the training config
config.update(custom_training_config)
# update the model config
config.update(custom_model_config)

exp = BaseExperiment(args, dataloaders=(dataloader_train, dataloader_val, dataloader_test))

Use non-distributed mode with GPU: cuda:0
Environment info:
------------------------------------------------------------
sys.platform: linux
Python: 3.11.4 (main, Jul  5 2023, 13:45:01) [GCC 11.2.0]
CUDA available: True
CUDA_HOME: /usr/local/cuda-11.8
NVCC: Build cuda_11.8.r11.8/compiler.31833905_0
GPU 0: NVIDIA GeForce RTX 3090
GCC: gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
PyTorch: 2.0.1+cu117
PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.7
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=comput

In [9]:
print('>'*35 + ' training ' + '<'*35)
exp.train()

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> training <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<


  0%|          | 0/213 [00:02<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 564.00 MiB (GPU 0; 23.66 GiB total capacity; 19.97 GiB already allocated; 422.19 MiB free; 20.35 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
print('>'*35 + ' testing  ' + '<'*35)
exp.test()

In [None]:
import numpy as np
from openstl.utils import show_video_line

In [None]:
# show the given frames from an example
inputs = np.load('./work_dirs/custom_exp/saved/inputs.npy')
preds = np.load('./work_dirs/custom_exp/saved/preds.npy')
trues = np.load('./work_dirs/custom_exp/saved/trues.npy')

example_idx = 6
show_video_line(trues[example_idx], ncols=after_sequence, vmax=0.6, cbar=False, out_path=None, format='png', use_rgb=True)

In [None]:
example_idx = 5
show_video_line(trues[example_idx], ncols=after_sequence, vmax=0.6, cbar=False, out_path=None, format='png', use_rgb=True)

In [None]:
example_idx = 5
show_video_line(preds[example_idx], ncols=after_sequence, vmax=0.6, cbar=False, out_path=None, format='png', use_rgb=True)

In [None]:
from openstl.utils import show_video_gif_multiple

example_idx = 6
show_video_gif_multiple(inputs[example_idx], trues[example_idx], preds[example_idx], use_rgb=True, out_path='example.gif')