In [1]:
!pip install -q kaggle

In [2]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [1]:
!kaggle competitions download -c UBC-OCEAN

Downloading UBC-OCEAN.zip to /home/jupyter
100%|██████████████████████████████████████▉| 719G/719G [1:21:33<00:00, 145MB/s]
100%|███████████████████████████████████████| 719G/719G [1:21:33<00:00, 158MB/s]


In [2]:
!unzip UBC-OCEAN -d data > /dev/null

In [None]:
import os
import pandas as pd
from PIL import Image
import concurrent.futures

Image.MAX_IMAGE_PIXELS = None

# Create the 'tiles' directory if it doesn't exist
if not os.path.exists('tiles'):
    os.makedirs('tiles')

# Read the CSV file into a pandas DataFrame
df = pd.read_csv('data/train.csv')

# Function to split an image into tiles and save them
def process_image(row):
    image_id, height, width = row['image_id'], row['image_height'], row['image_width']
    print(f"processing image {image_id}")
    image_path = f'data/train_images/{image_id}.png'
    
    # Create the image directory if it doesn't exist
    if not os.path.exists(f'tiles/{image_id}'):
        os.makedirs(f'tiles/{image_id}')
    
    with Image.open(image_path) as image:
        for i in range(0, height, 224):
            for j in range(0, width, 224):
                tile = image.crop((j, i, j + 224, i + 224))
                # Make sure the tile size is 224x224
                if tile.size[0] != 224 or tile.size[1] != 224:
                    continue
                # Check if the tile is more than 90% black/white
                black_or_white_pixels = sum(1 for pixel in tile.getdata() if sum(pixel) < 50 or sum(pixel) > 255 * 3 - 50)
                if black_or_white_pixels / (224 * 224) >= 0.9:
                    continue

                tile.save(f'tiles/{image_id}/{i}_{j}.png')

# Function to parallelize the image processing
def parallel_process_images(data_frame):
    # Using a ThreadPoolExecutor to create and manage a pool of threads
    with concurrent.futures.ProcessPoolExecutor(max_workers=4) as executor:
        # Submitting all image processing tasks to the thread pool
        futures = [executor.submit(process_image, row) for _, row in data_frame.iterrows()]
        
        # Waiting for all futures to complete and gathering the results
        results = [future.result() for future in concurrent.futures.as_completed(futures)]
    
    return results

# Process each image in parallel
parallel_process_images(df)


processing image 3191processing image 5851processing image 2906processing image 6281



processing image 6363
processing image 6898
processing image 8279
processing image 8713
processing image 9183
processing image 9254


In [None]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold

# Load your data
train = pd.read_csv("data/train.csv")

# Create a column that combines 'is_tma' and 'label' for stratification
train['stratify_col'] = train['is_tma'].astype(str) + "_" + train['label'].astype(str)

# Determine the smallest group size for stratification
min_group_size = train['stratify_col'].value_counts().min()

# Determine the maximum number of folds
max_k = min_group_size // 1  # Each group needs at least 1 sample

# Initialize KFold
skf = StratifiedKFold(n_splits=max_k)

# Create folds
for fold, (train_idx, val_idx) in enumerate(skf.split(train, train['stratify_col'])):
    train_fold = train.iloc[train_idx]
    val_fold = train.iloc[val_idx]

    # Remove the 'stratify_col' before saving
    train_fold = train_fold.drop(columns=['stratify_col'])
    val_fold = val_fold.drop(columns=['stratify_col'])

    # Save each fold to CSV
    train_fold.to_csv(f"folds/train_fold_{fold}.csv", index=False)
    val_fold.to_csv(f"folds/val_fold_{fold}.csv", index=False)


In [1]:
import os
import pandas as pd
from PIL import Image
import concurrent.futures
import numpy as np

Image.MAX_IMAGE_PIXELS = None
IMAGE_SIZE = 2048
RESIZE_SIZE = 1024

# Create the 'tiles' directory if it doesn't exist
if not os.path.exists(f'tiles_{IMAGE_SIZE}'):
    os.makedirs(f'tiles_{IMAGE_SIZE}')

# Read the CSV file into a pandas DataFrame
df = pd.read_csv('data/train.csv')

def calculate_entropy(tile):
    # Convert to grayscale if the image is RGB
    if tile.mode == 'RGB':
        tile = tile.convert('L')

    # Flatten the tile and calculate histogram
    pixel_counts = np.histogram(tile, bins=range(256))[0]

    # Normalize to get probabilities
    probabilities = pixel_counts / np.sum(pixel_counts)

    # Filter out zero probabilities and calculate entropy
    probabilities = probabilities[probabilities > 0]
    entropy = -np.sum(probabilities * np.log2(probabilities))

    return entropy

# Function to split an image into tiles and save them
def process_image(row):
    image_id, height, width = row['image_id'], row['image_height'], row['image_width']
    print(f"processing image {image_id}")
    image_path = f'data/train_images/{image_id}.png'
    
    # Create the image directory if it doesn't exist
    if not os.path.exists(f'tiles_{IMAGE_SIZE}/{image_id}'):
        os.makedirs(f'tiles_{IMAGE_SIZE}/{image_id}')
   
    with Image.open(image_path) as image:
        for i in range(0, height, IMAGE_SIZE):
            for j in range(0, width, IMAGE_SIZE):
                tile = image.crop((j, i, j + IMAGE_SIZE, i + IMAGE_SIZE))
                # Make sure the tile size is IMAGE_SIZExIMAGE_SIZE and there's some data there
                if tile.size[0] != IMAGE_SIZE or tile.size[1] != IMAGE_SIZE or calculate_entropy(tile) < 3:
                    continue
                tile = tile.resize((RESIZE_SIZE, RESIZE_SIZE), Image.Resampling.LANCZOS)

                tile.save(f'tiles_{IMAGE_SIZE}/{image_id}/{i}_{j}.png')

# Function to parallelize the image processing
def parallel_process_images(data_frame):
    # Using a ThreadPoolExecutor to create and manage a pool of threads
    with concurrent.futures.ProcessPoolExecutor(max_workers=2) as executor:
        # Submitting all image processing tasks to the thread pool
        futures = [executor.submit(process_image, row) for _, row in data_frame.iterrows()]
        
        # Waiting for all futures to complete and gathering the results
        results = [future.result() for future in concurrent.futures.as_completed(futures)]
    
    return results

# Process each image in parallel
parallel_process_images(df)

# for _, row in df.iterrows():
#     process_image(row)


processing image 66processing image 4

processing image 91
processing image 281
processing image 286
processing image 431
processing image 706
processing image 970
processing image 1020
processing image 1080
processing image 1101
processing image 1252
processing image 1289
processing image 1295
processing image 1660
processing image 1666
processing image 1774
processing image 1925
processing image 1943
processing image 1952
processing image 2097
processing image 2227
processing image 2391
processing image 2666
processing image 2706
processing image 2906
processing image 3055
processing image 3084
processing image 3092
processing image 3098
processing image 3191
processing image 3222
processing image 3264
processing image 3511
processing image 3672
processing image 3881
processing image 3997
processing image 4134
processing image 4211
processing image 4608
processing image 4797
processing image 4827
processing image 4877
processing image 4963
processing image 5015
processing image 5114


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [5]:
import pandas as pd
from sklearn.utils import shuffle

# Load the training data
train = pd.read_csv("data/train.csv")

train = train[train['is_tma'] == False]
# Shuffle the data
train_shuffled = shuffle(train, random_state=42)

# Step 1: Group by the 'label' column
grouped = train_shuffled.groupby('label')

# Step 2: Sample 5 instances from each group
validation_set = grouped.apply(lambda x: x.sample(1, random_state=42)).reset_index(drop=True)

# Step 3: Find the remaining training data
train_not_validation = train_shuffled[~train_shuffled['image_id'].isin(validation_set['image_id'])]

# Step 4: Save to CSV files
validation_set.to_csv("validation-no-tma.csv", index=False)
train_not_validation.to_csv("train-no-tma.csv", index=False)


In [6]:
import pandas as pd
from sklearn.utils import shuffle

# Load the training data
train = pd.read_csv("data/train.csv")

train = train[train['is_tma'] == True]
# Shuffle the data
train_shuffled = shuffle(train, random_state=42)

# Step 1: Group by the 'label' column
grouped = train_shuffled.groupby('label')

# Step 2: Sample 5 instances from each group
validation_set = grouped.apply(lambda x: x.sample(1, random_state=42)).reset_index(drop=True)

# Step 3: Find the remaining training data
train_not_validation = train_shuffled[~train_shuffled['image_id'].isin(validation_set['image_id'])]

# Step 4: Save to CSV files
validation_set.to_csv("validation-yes-tma.csv", index=False)
train_not_validation.to_csv("train-yes-tma.csv", index=False)


In [32]:
import os
import pandas as pd
from PIL import Image
import concurrent.futures

Image.MAX_IMAGE_PIXELS = None

# Create the 'tiles' directory if it doesn't exist
if not os.path.exists('tiles'):
    os.makedirs('tiles')

# Read the CSV file into a pandas DataFrame
df = pd.read_csv('data/train.csv')

# Function to split an image into tiles, scale down, and save them
def process_image(row):
    image_id, height, width = row['image_id'], row['image_height'], row['image_width']
    print(f"processing image {image_id}")
    image_path = f'data/train_images/{image_id}.png'
    
    # Create the image directory if it doesn't exist
    image_dir = f'tiles_2964/{image_id}'
    if not os.path.exists(image_dir):
        os.makedirs(image_dir)
    
    with Image.open(image_path) as image:
        for i in range(0, height, 2964):
            for j in range(0, width, 2964):
                # Make sure we don't go out of image bounds
                tile = image.crop((j, i, min(j + 2964, width), min(i + 2964, height)))
                
                # Skip any tiles that are not full-size (e.g., at edges)
                if tile.size[0] != 2964 or tile.size[1] != 2964:
                    continue
                
                # Resize tile to 224x224
                tile = tile.resize((224, 224), Image.Resampling.LANCZOS)

                # Check if the tile is more than 90% black/white
                black_or_white_pixels = sum(1 for pixel in tile.getdata() if sum(pixel) < 50 or sum(pixel) > 255 * 3 - 50)
                if black_or_white_pixels / (224 * 224) >= 0.9:
                    continue

                # Save the tile
                tile.save(f'{image_dir}/{i}_{j}.png')

# # Function to parallelize the image processing
# def parallel_process_images(data_frame):
#     # Using a ThreadPoolExecutor to create and manage a pool of threads
#     with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
#         # Submitting all image processing tasks to the thread pool
#         futures = [executor.submit(process_image, row) for _, row in data_frame.iterrows()]
        
#         # Waiting for all futures to complete and gathering the results
#         results = [future.result() for future in concurrent.futures.as_completed(futures)]
    
#     return results

# Process each image in parallel
# parallel_process_images(df)

for _, row in df.iterrows():
    process_image(row)

processing image 2906
processing image 3191
processing image 5851
processing image 6281
processing image 6363
processing image 6898
processing image 8279
processing image 8713
processing image 9183
processing image 9254
processing image 10252
processing image 11263
processing image 14051
processing image 14401
processing image 14617
processing image 15231
processing image 16209
processing image 22740
processing image 25331
processing image 27739
processing image 29147
processing image 29904
processing image 32035
processing image 34649
processing image 34690
processing image 34822
processing image 36063
processing image 36499
processing image 38366
processing image 42549
processing image 44232
processing image 44530
processing image 48506
processing image 48550
processing image 49587
processing image 49872
processing image 50962
processing image 51032
processing image 51128
processing image 53377
processing image 54007
processing image 56117
processing image 56351
processing image 5694

In [7]:
import json
import subprocess
import os
import zipfile

with open('data/updated_image_ids.json', 'r') as json_file:
    file_list = json.load(json_file)

competition_name = 'UBC-OCEAN'

for file_name in file_list:
    file_path = f'data/train_images/{file_name}.png'
    # Check if the file already exists and delete it
    if os.path.exists(file_path):
        os.remove(file_path)
        print(f'Existing file {file_name}.png deleted.')

    # Execute the Kaggle command to download the file
    command = f'kaggle competitions download -c {competition_name} -f train_images/{file_name}.png -p data/train_images'
    try:
        subprocess.run(command, shell=True, check=True)
        
        downloaded_file_path = f'data/train_images/{file_name}.png.zip'
        # Check if the downloaded file is a zip file
        if os.path.exists(downloaded_file_path):
            with zipfile.ZipFile(downloaded_file_path, 'r') as zip_ref:
                zip_ref.extractall('data/train_images')
            print(f'Successfully unzipped {file_name}.png.zip')
            os.remove(downloaded_file_path)
            print(f'Deleted the zip file: {file_name}.png.zip')
        else:
            print(f'Successfully downloaded {file_name}.png')

    except subprocess.CalledProcessError as e:
        print(f'Error downloading {file_name}.png: {e}')

print('Download process completed.')


Existing file 2906.png deleted.
Downloading 2906.png.zip to data/train_images


100%|██████████| 1.62G/1.62G [00:14<00:00, 119MB/s] 



Successfully unzipped 2906.png.zip
Deleted the zip file: 2906.png.zip
Existing file 3191.png deleted.
Downloading 3191.png to data/train_images


100%|██████████| 1.06G/1.06G [00:13<00:00, 83.3MB/s]



Successfully downloaded 3191.png
Existing file 5851.png deleted.
Downloading 5851.png.zip to data/train_images


100%|██████████| 0.99G/0.99G [00:04<00:00, 251MB/s]



Successfully unzipped 5851.png.zip
Deleted the zip file: 5851.png.zip
Existing file 6281.png deleted.
Downloading 6281.png.zip to data/train_images


100%|██████████| 3.58G/3.58G [00:26<00:00, 143MB/s] 



Successfully unzipped 6281.png.zip
Deleted the zip file: 6281.png.zip
Existing file 6363.png deleted.
Downloading 6363.png.zip to data/train_images


100%|██████████| 1.25G/1.25G [00:06<00:00, 217MB/s] 



Successfully unzipped 6363.png.zip
Deleted the zip file: 6363.png.zip
Existing file 6898.png deleted.
Downloading 6898.png.zip to data/train_images


100%|██████████| 1.52G/1.52G [00:08<00:00, 185MB/s] 



Successfully unzipped 6898.png.zip
Deleted the zip file: 6898.png.zip
Downloading 8279.png.zip to data/train_images


100%|██████████| 2.19G/2.19G [00:19<00:00, 123MB/s] 



Successfully unzipped 8279.png.zip
Deleted the zip file: 8279.png.zip
Existing file 8713.png deleted.
Downloading 8713.png.zip to data/train_images


100%|██████████| 3.81G/3.81G [00:34<00:00, 118MB/s] 



Successfully unzipped 8713.png.zip
Deleted the zip file: 8713.png.zip
Existing file 9183.png deleted.
Downloading 9183.png.zip to data/train_images


100%|██████████| 1.42G/1.42G [00:14<00:00, 108MB/s] 



Successfully unzipped 9183.png.zip
Deleted the zip file: 9183.png.zip
Existing file 9254.png deleted.
Downloading 9254.png.zip to data/train_images


100%|██████████| 2.79G/2.79G [00:29<00:00, 100MB/s] 



Successfully unzipped 9254.png.zip
Deleted the zip file: 9254.png.zip
Existing file 10252.png deleted.
Downloading 10252.png.zip to data/train_images


100%|██████████| 1.15G/1.15G [00:11<00:00, 104MB/s] 



Successfully unzipped 10252.png.zip
Deleted the zip file: 10252.png.zip
Existing file 11263.png deleted.
Downloading 11263.png.zip to data/train_images


100%|██████████| 831M/831M [00:07<00:00, 121MB/s]  



Successfully unzipped 11263.png.zip
Deleted the zip file: 11263.png.zip
Existing file 14051.png deleted.
Downloading 14051.png.zip to data/train_images


100%|██████████| 983M/983M [00:10<00:00, 102MB/s]  



Successfully unzipped 14051.png.zip
Deleted the zip file: 14051.png.zip
Existing file 14401.png deleted.
Downloading 14401.png.zip to data/train_images


100%|██████████| 1.99G/1.99G [00:18<00:00, 113MB/s] 



Successfully unzipped 14401.png.zip
Deleted the zip file: 14401.png.zip
Existing file 14617.png deleted.
Downloading 14617.png.zip to data/train_images


100%|██████████| 1.17G/1.17G [00:18<00:00, 66.4MB/s]



Successfully unzipped 14617.png.zip
Deleted the zip file: 14617.png.zip
Existing file 15231.png deleted.
Downloading 15231.png.zip to data/train_images


100%|██████████| 2.89G/2.89G [00:29<00:00, 106MB/s] 



Successfully unzipped 15231.png.zip
Deleted the zip file: 15231.png.zip
Existing file 16209.png deleted.
Downloading 16209.png to data/train_images


100%|██████████| 1.34G/1.34G [00:27<00:00, 52.2MB/s]



Successfully downloaded 16209.png
Existing file 22740.png deleted.
Downloading 22740.png.zip to data/train_images


100%|██████████| 823M/823M [00:07<00:00, 122MB/s]  



Successfully unzipped 22740.png.zip
Deleted the zip file: 22740.png.zip
Existing file 25331.png deleted.
Downloading 25331.png.zip to data/train_images


100%|██████████| 2.12G/2.12G [00:19<00:00, 115MB/s] 



Successfully unzipped 25331.png.zip
Deleted the zip file: 25331.png.zip
Existing file 27739.png deleted.
Downloading 27739.png.zip to data/train_images


100%|██████████| 1.43G/1.43G [00:14<00:00, 107MB/s] 



Successfully unzipped 27739.png.zip
Deleted the zip file: 27739.png.zip
Existing file 29147.png deleted.
Downloading 29147.png.zip to data/train_images


100%|██████████| 773M/773M [00:06<00:00, 123MB/s]  



Successfully unzipped 29147.png.zip
Deleted the zip file: 29147.png.zip
Existing file 29904.png deleted.
Downloading 29904.png.zip to data/train_images


100%|██████████| 818M/818M [00:08<00:00, 103MB/s]  



Successfully unzipped 29904.png.zip
Deleted the zip file: 29904.png.zip
Existing file 32035.png deleted.
Downloading 32035.png.zip to data/train_images


100%|██████████| 870M/870M [00:08<00:00, 105MB/s]  



Successfully unzipped 32035.png.zip
Deleted the zip file: 32035.png.zip
Existing file 34649.png deleted.
Downloading 34649.png.zip to data/train_images


100%|██████████| 1.29G/1.29G [00:13<00:00, 102MB/s] 



Successfully unzipped 34649.png.zip
Deleted the zip file: 34649.png.zip
Existing file 34690.png deleted.
Downloading 34690.png.zip to data/train_images


100%|██████████| 1.35G/1.35G [00:13<00:00, 104MB/s] 



Successfully unzipped 34690.png.zip
Deleted the zip file: 34690.png.zip
Existing file 34822.png deleted.
Downloading 34822.png.zip to data/train_images


100%|██████████| 1.01G/1.01G [00:09<00:00, 110MB/s] 



Successfully unzipped 34822.png.zip
Deleted the zip file: 34822.png.zip
Existing file 36063.png deleted.
Downloading 36063.png.zip to data/train_images


100%|██████████| 1.07G/1.07G [00:09<00:00, 124MB/s] 



Successfully unzipped 36063.png.zip
Deleted the zip file: 36063.png.zip
Existing file 36499.png deleted.
Downloading 36499.png.zip to data/train_images


100%|██████████| 819M/819M [00:08<00:00, 102MB/s]  



Successfully unzipped 36499.png.zip
Deleted the zip file: 36499.png.zip
Existing file 38366.png deleted.
Downloading 38366.png.zip to data/train_images


100%|██████████| 1.02G/1.02G [00:09<00:00, 113MB/s] 



Successfully unzipped 38366.png.zip
Deleted the zip file: 38366.png.zip
Existing file 42549.png deleted.
Downloading 42549.png.zip to data/train_images


100%|██████████| 1.22G/1.22G [00:10<00:00, 122MB/s] 



Successfully unzipped 42549.png.zip
Deleted the zip file: 42549.png.zip
Existing file 44232.png deleted.
Downloading 44232.png.zip to data/train_images


100%|██████████| 773M/773M [00:08<00:00, 95.0MB/s] 



Successfully unzipped 44232.png.zip
Deleted the zip file: 44232.png.zip
Existing file 44530.png deleted.
Downloading 44530.png to data/train_images


100%|██████████| 1.63G/1.63G [00:28<00:00, 61.0MB/s]



Successfully downloaded 44530.png
Existing file 48506.png deleted.
Downloading 48506.png.zip to data/train_images


100%|██████████| 1.94G/1.94G [00:19<00:00, 110MB/s] 



Successfully unzipped 48506.png.zip
Deleted the zip file: 48506.png.zip
Existing file 48550.png deleted.
Downloading 48550.png.zip to data/train_images


100%|██████████| 977M/977M [00:08<00:00, 121MB/s]  



Successfully unzipped 48550.png.zip
Deleted the zip file: 48550.png.zip
Existing file 49587.png deleted.
Downloading 49587.png.zip to data/train_images


100%|██████████| 1.66G/1.66G [00:14<00:00, 124MB/s] 



Successfully unzipped 49587.png.zip
Deleted the zip file: 49587.png.zip
Existing file 49872.png deleted.
Downloading 49872.png.zip to data/train_images


100%|██████████| 1.06G/1.06G [00:09<00:00, 119MB/s] 



Successfully unzipped 49872.png.zip
Deleted the zip file: 49872.png.zip
Existing file 50962.png deleted.
Downloading 50962.png.zip to data/train_images


100%|██████████| 468M/468M [00:04<00:00, 118MB/s]  



Successfully unzipped 50962.png.zip
Deleted the zip file: 50962.png.zip
Existing file 51032.png deleted.
Downloading 51032.png.zip to data/train_images


100%|██████████| 1.86G/1.86G [00:26<00:00, 76.5MB/s]



Successfully unzipped 51032.png.zip
Deleted the zip file: 51032.png.zip
Existing file 51128.png deleted.
Downloading 51128.png.zip to data/train_images


100%|██████████| 1.60G/1.60G [00:11<00:00, 146MB/s] 



Successfully unzipped 51128.png.zip
Deleted the zip file: 51128.png.zip
Existing file 53377.png deleted.
Downloading 53377.png.zip to data/train_images


100%|██████████| 2.28G/2.28G [00:20<00:00, 117MB/s] 



Successfully unzipped 53377.png.zip
Deleted the zip file: 53377.png.zip
Existing file 54007.png deleted.
Downloading 54007.png.zip to data/train_images


100%|██████████| 1.06G/1.06G [00:10<00:00, 112MB/s] 



Successfully unzipped 54007.png.zip
Deleted the zip file: 54007.png.zip
Existing file 56117.png deleted.
Downloading 56117.png.zip to data/train_images


100%|██████████| 774M/774M [00:06<00:00, 124MB/s]  



Successfully unzipped 56117.png.zip
Deleted the zip file: 56117.png.zip
Existing file 56351.png deleted.
Downloading 56351.png.zip to data/train_images


100%|██████████| 1.70G/1.70G [00:15<00:00, 115MB/s] 



Successfully unzipped 56351.png.zip
Deleted the zip file: 56351.png.zip
Existing file 56947.png deleted.
Downloading 56947.png.zip to data/train_images


100%|██████████| 1.38G/1.38G [00:13<00:00, 112MB/s] 



Successfully unzipped 56947.png.zip
Deleted the zip file: 56947.png.zip
Existing file 57162.png deleted.
Downloading 57162.png.zip to data/train_images


100%|██████████| 3.25G/3.25G [00:32<00:00, 108MB/s] 



Successfully unzipped 57162.png.zip
Deleted the zip file: 57162.png.zip
Existing file 59031.png deleted.
Downloading 59031.png.zip to data/train_images


100%|██████████| 1.04G/1.04G [00:09<00:00, 119MB/s] 



Successfully unzipped 59031.png.zip
Deleted the zip file: 59031.png.zip
Existing file 62476.png deleted.
Downloading 62476.png.zip to data/train_images


100%|██████████| 1.15G/1.15G [00:10<00:00, 123MB/s] 



Successfully unzipped 62476.png.zip
Deleted the zip file: 62476.png.zip
Existing file 63165.png deleted.
Downloading 63165.png.zip to data/train_images


100%|██████████| 1.17G/1.17G [00:11<00:00, 108MB/s] 



Successfully unzipped 63165.png.zip
Deleted the zip file: 63165.png.zip
Existing file 64111.png deleted.
Downloading 64111.png.zip to data/train_images


100%|██████████| 594M/594M [00:06<00:00, 97.5MB/s] 



Successfully unzipped 64111.png.zip
Deleted the zip file: 64111.png.zip
Download process completed.


In [1]:
!kaggle datasets download -d sohier/ubc-ovarian-cancer-competition-supplemental-masks

Downloading ubc-ovarian-cancer-competition-supplemental-masks.zip to /home/jupyter
 95%|█████████████████████████████████████▉  | 337M/356M [00:08<00:00, 22.7MB/s]
100%|████████████████████████████████████████| 356M/356M [00:08<00:00, 43.8MB/s]


In [2]:
!unzip -q ubc-ovarian-cancer-competition-supplemental-masks.zip -d data/train_mask_images