# Data

## Get Datasets

### Setup

In [1]:
%cd ./data

# If Dataset_Student has not been unzipped yet
!unzip Dataset_Student_V2.zip

In [2]:
import os
import torch
import numpy as np
import tqdm.auto as tqdm
import imageio.v3 as iio
import matplotlib.pyplot as plt

### Train & Validation: Images + Masks

In [4]:
# Path for our data files (we are currently in ~/WNet/data)
P = "Dataset_Student"

In [None]:
#@title Images
# Train
imgs = torch.zeros([1000, 22, 160, 240, 3]) # Create tensor of correct size (1K 'videos' x 22 frames x H x W x color-channels)

for path in tqdm.tqdm(os.listdir(f"{P}/train/")): # Iterate through all videos
  temp = [] # Records video frames into a list of image tensors
  for i in range(22): # Iterate through each frame
    copy = np.copy(iio.imread(f"{P}/train/{path}/image_{i}.png")) # Read the image
    temp.append(torch.Tensor(copy)) # Record frame
  imgs[int(path.split("_")[1])] = torch.stack(temp) # Place the image in the correct location (sorted)

torch.save(imgs, 'imgs.pt') # Save image tensor in ~/WNet/data

# Validation
val_imgs = torch.zeros([1000, 22, 160, 240, 3]) # Create tensor of correct size (1K 'videos' x 22 frames x H x W x color-channels)

for path in tqdm.tqdm(os.listdir(f"{P}/val/")): # Iterate through all videos
  temp = [] # Records video frames into a list of image tensors
  for i in range(22): # Iterate through each frame
    copy = np.copy(iio.imread(f"{P}/val/{path}/image_{i}.png")) # Read the image
    temp.append(torch.Tensor(copy)) # Record frame
  val_imgs[int(path.split("_")[1]) - 1000] = torch.stack(temp) # Place the image in the correct location (sorted)

torch.save(val_imgs, 'val_imgs.pt') # Save image tensor in ~/WNet/data

In [None]:
#@title Masks
# Train
masks = list(range(1000)) 
for path in tqdm.tqdm(os.listdir(f"{P}/train/")): # Iterate through all videos
  masks[int(path.split("_")[1]) - 1000] = torch.Tensor(np.load(f"{P}/train/{path}/mask.npy")) # Record masks for this video
masks = torch.stack(masks) # Stack these video masks
torch.save(masks, 'masks.pt') # Save masks

# Validation
val_masks = list(range(1000))
for path in tqdm.tqdm(os.listdir(f"{P}/val/")): # Iterate through all videos
  val_masks[int(path.split("_")[1]) - 1000] = torch.Tensor(np.load(f"{P}/val/{path}/mask.npy")) # Record masks for this video
val_masks = torch.stack(val_masks) # Stack these video masks
torch.save(val_masks, 'val_masks.pt') # Save masks

In [None]:
# Print shapes of tensors
print("Train imgs:", imgs.shape)
print("Val imgs:", val_imgs.shape)
print()
print("Train masks:", masks.shape)
print("Val masks:", val_masks.shape)

### Unlabeled Data - Not Recommended

If limited by memory, please follow the Lazy Loading instructions in [`Masker.ipynb`](./Masker.ipynb).

In [None]:
# Get sorted unlabeled video directories
dir_list = os.listdir(f"{P}/unlabeled/")
lst1 = [x for x in dir_list if len(x) == 10]
lst2 = [x for x in dir_list if len(x) == 11]
lst1.sort()
lst2.sort()
dirs = lst1 + lst2 # This 'dirs' list containes the sorted video directories for the unlabeled data

In [None]:
#@title Unlabeled Images
unlabeled_imgs = torch.zeros([2600, 21, 160, 240, 3]) #list(range(13000))
for path in tqdm.tqdm(os.listdir(f"{P}/unlabeled/")):
  temp = []
  for i in range(22):
    copy = np.copy(torch.Tensor(iio.imread(f"{P}/unlabeled/{path}/image_{i}.png")))
    temp.append(torch.Tensor(copy))
  unlabeled_imgs[int(path.split("_")[1])-2000] = torch.stack(temp)
torch.save(unlabeled_imgs, 'unlabeled_imgs.pt')

### Hidden/Test Data for Final Submission

In [None]:
# @title SUBMISSION Images
# This code turns the hidden submission images into a tensor - assuming 'hidden' is in ~/WNet/data
P_h = "hidden" # hidden images folder name

hidden_imgs = list(range(2000)) # 2K images
for path in tqdm.tqdm(os.listdir(f"{P_h}/")):
  temp = []
  for i in range(11): # Only 11 images in the hidden set
    copy = np.copy(torch.Tensor(iio.imread(f"{P_h}/{path}/image_{i}.png")))
    temp.append(torch.Tensor(copy))
  hidden_imgs[int(path.split("_")[1])-15000] = torch.stack(temp)
hidden_imgs = torch.stack(hidden_imgs).to(int)

torch.save(hidden_imgs, 'hidden_imgs.pt') # Save tensor

## Visualizations

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(6.4*2, 4.8))
axes[0].imshow(imgs[0][0])
axes[1].imshow(masks[0][0])
plt.show()

In [None]:
img = iio.imread("Dataset_Student/train/video_0/image_0.png")
plt.imshow(img)

In [None]:
plt.hist(img.mean(-1).reshape(-1),bins=256)
plt.show()

In [None]:
plt.imshow(img.mean(-1),cmap='Greys')
plt.show()