# Deleting data

## Define path

In [7]:
data_path = "/home/pi/mycar/data"

In [16]:
%run Common_functions.ipynb

find_all_tubs loaded
image_count loaded
get_tub_uuid loaded
print_tubs loaded


## Find all the tubs

In [8]:
from os import listdir

def find_all_tubs():
    tubs = [d for d in listdir(data_path)]
    return tubs

tubs = find_all_tubs()

No of tubs found = 20


## Count the number of images of each tub

In [9]:
def image_count(tub_name):
    try:
        image_count = len(listdir(f"{data_path}/{tub}/images"))
    except:
        print(f"Warning: cannot find tub {tub_name}")
        image_count = -1
    return image_count

## Define a function to delete one single tub by tub name

We will create a function called `delete_tub` to delete a tub by tub name. In this function, we use a function called `rmtree`(https://docs.python.org/3/library/shutil.html#shutil.rmtree) from the `shutil` package to delete a directory.

To avoid us accidentally delete something, we added a parameter called `dry_run`. If `dry_run` is `True`, it will only simulate the delete without actually delete it. It is common to have a `dry_run` parameter to avoid you deleting something important. Deleting is not recoverable so please use this function with care.

In [10]:
import shutil

def delete_tub(tub_name, dry_run = True):
    tub_path = f"{data_path}/{tub_name}"
    print(f"Deleting {tub_path}")
    if not dry_run:
        shutil.rmtree(tub_path, ignore_errors=True)        

## Delete tubs with 0 images

In [18]:
tubs = find_all_tubs()
print_tubs(tubs)
for tub in tubs:
    if image_count(tub) == 0:
        delete_tub(tub, True)
tubs = find_all_tubs()
print_tubs(tubs)
        

Tub                  Image count          UUID                                    
tub_15_23-11-30      2422                 b7da4c7a-d868-46e1-a2e7-1b02c472c8ef
tub_16_23-11-30      2422                 a38a0958-baba-4b65-aded-e60e2f22e265
tub_18_23-12-02      2422                 dfbdcc85-d8fd-4417-a283-58bc7c05354b
tub_1_23-11-24       2422                 None
tub_20_23-12-02      2422                 None
tub_21_23-12-02      2422                 d72e2f62-0833-4d60-8f82-42587b10f9bb
tub_25_23-12-02      2422                 64087dfd-8a3d-4f45-98ed-2884a112c87f
tub_2_23-11-24       2422                 6ee2c59a-b8f4-4cb5-b06a-9691229919ae
tub_30_23-12-22      2422                 None
tub_35_24-01-13      2422                 None
tub_40_23-12-28      2422                 None
tub_44_24-01-13      2422                 None
tub_45_24-01-13      2422                 None
tub_47_24-01-13      2422                 None
tub_48_24-01-13      2422                 None
tub_49_24-01-13     

Change the `dry_run` above to `False` to delete the tubs with zero images.

## Delete tubs with less than 1000 images

In [13]:
tubs = find_all_tubs()

for tub in tubs:    
    if image_count(tub) < 1000:
        delete_tub(tub, False)

tubs = find_all_tubs()

No of tubs found = 20
Deleting /home/pi/mycar/data/tub_1_23-11-24
Deleting /home/pi/mycar/data/tub_20_23-12-02
Deleting /home/pi/mycar/data/tub_18_23-12-02
Deleting /home/pi/mycar/data/tub_44_24-01-13
Deleting /home/pi/mycar/data/tub_40_23-12-28
Deleting /home/pi/mycar/data/tub_55_24-02-01
Deleting /home/pi/mycar/data/tub_30_23-12-22
Deleting /home/pi/mycar/data/tub_8_23-11-29
Deleting /home/pi/mycar/data/tub_51_24-01-13
Deleting /home/pi/mycar/data/tub_47_24-01-13
Deleting /home/pi/mycar/data/tub_48_24-01-13
Deleting /home/pi/mycar/data/tub_35_24-01-13
Deleting /home/pi/mycar/data/tub_45_24-01-13
No of tubs found = 20


Change the `dry_run` above to `False` to delete the tubs less than images.