# Maintain

## Jobs

In [None]:
convert_year_type  = False
fix_countries      = False
rename_hotspots    = False
create_sample_set  = False
remove_resnet      = False
export_dataset     = True

# D A N G E R
cleanup_runs = False
cleanup_everything = False

## Basic Setup

In [None]:
import ipywidgets as widgets
import os, random, shutil

# dataset basics
dataset_label = 'video-game-screenshots'

# folder setup
path_screenshots = os.path.abspath(os.getcwd()) + '/screenshots/'
path_export = os.path.abspath(os.getcwd()) + '/dataset/'
path_samples = os.path.abspath(os.getcwd()) + '/sample_set/'

## Prepare Dataset

In [None]:
import fiftyone as fo

if dataset_label in fo.list_datasets():
    dataset = fo.load_dataset(dataset_label)
    session = fo.launch_app(dataset, auto=False)

Open [FiftyOne 🔗](http://localhost:5151).

## Rename Hotspot Labels

In [None]:
if rename_hotspots:
    sample_field = "kmeans_dinov2_embeddings_cluster"
    value_map = {
        "5": "Isometric Perspectives"
    }
    
    for sample in dataset.iter_samples(progress=True, autosave=True):
        if sample[sample_field] in value_map:
            sample[sample_field] = value_map[sample[sample_field]]
            sample.save()

## Convert Years from String to Int

In [None]:
if convert_year_type:
    for sample in dataset.iter_samples(progress=True, autosave=True):
        sample["year"] = int(sample["years"][0])

## Remove Resnet101 Embeddings from Samples

In [None]:
if remove_resnet:
    fields = ["resnet101_embeddings"]
    for field in fields:
        print("Deleting", field)
        dataset.delete_sample_field(field)

## Fix Country attribute

In [None]:
if fix_countries:
    dataset.rename_sample_field("countries", "countries_deprecated")
    for sample in dataset.iter_samples(progress=True, autosave=True):
        sample["countries"] = sample["countries_deprecated"].split("|")
        if sample["countries"][0] in [
            "http://www.wikidata.org/.well-known/genid/05a392b0889f3fc9120b0d65eec2de1c",
            "http://www.wikidata.org/.well-known/genid/a7cc242f7b23d82513a4faf4e66ca359"]:
            sample["countries"] = []
    dataset.delete_sample_field("countries_deprecated")

## Create Random Sample

In [None]:
if create_sample_set:

    sample_size = 1130
    
    if os.path.exists(path_samples):
        shutil.rmtree(path_samples)
    
    os.makedirs(path_samples)
    
    def get_files():
        filename = random.choice(os.listdir(path_screenshots))
        return {
            'filepath': path_screenshots+filename,
            'copypath': path_samples+filename 
        }
    
    for i in range(sample_size):
        files = get_files()
        while os.path.isfile(files['copypath']):
            files = get_files()
        shutil.copy(files['filepath'], files['copypath'])

## Export Dataset

In [None]:
if export_dataset:
    dataset.export(
        export_dir=path_export,
        dataset_type=fo.types.FiftyOneDataset,
        use_dirs=True,
    )

## Cleanup

In [None]:
if cleanup_runs:
    dataset.delete_runs()
    dataset.delete_brain_runs()

In [None]:
if cleanup_everything:
    dataset.delete_runs()
    dataset.delete_brain_runs()
    dataset.delete_evaluations()
    fo.delete_dataset(dataset_label)