# Init

## Infrasctructure

In [None]:
!pip install -U fiftyone torch torchvision umap-learn scikit-learn ipywidgets
!fiftyone plugins download https://github.com/jacobmarks/clustering-plugin

In [None]:
import fiftyone.zoo as foz

# load models
foz.load_zoo_model("resnet101-imagenet-torch")
foz.load_zoo_model("dinov2-vitb14-torch")

## Setup Dataset

In [None]:
import os, csv, json
import fiftyone as fo

# dataset basics
dataset_label = 'video-game-screenshots'
dataset_exists = False

# folder setup
path_samples = os.path.abspath(os.getcwd()) + '/screenshots/'
path_export = os.path.abspath(os.getcwd()) + '/dataset/'

## Prepare Dataset

*If dataset is persistent and exists, or exported dataset exists*

In [None]:
if dataset_label in fo.list_datasets():
    dataset_exists = True
    dataset = fo.load_dataset(dataset_label)
    session = fo.launch_app(dataset, auto=False)

if os.path.isdir(path_export) and not dataset_exists:
    dataset_exists = True
    dataset = fo.Dataset.from_dir(
        dataset_dir=path_export,
        dataset_type=fo.types.FiftyOneDataset,
        name=dataset_label,
        persistent=True
    )

### If dataset does not yet exist

In [None]:
if not dataset_exists:

    dataset = fo.Dataset(dataset_label)
    dataset.persistent = True
    
    # open the wikidata query file to link wikidata_id in filename and csv 
    with open('query.csv') as games_data:
    
        # load reformated mobygames platforms file to get proper platform label
        with open('platforms_reformatted.json') as platforms_data:
            games_rows = csv.reader(games_data, delimiter=',', quotechar='"')
            games = {}
            platforms = json.load(platforms_data)
    
            # save game title and country of origin in tuple
            for game in games_rows:
                games[game[0].split('/')[-1]] = {
                    'title': game[1],
                    'countries': game[4].replace(',', '|') 
                }
    
            # go through all screenshots in the samples folder
            for path, folders, files in os.walk(path_samples):
    
                sample_count = 0
                
                for screenshot in files:
                    
                    filepath = os.path.join(path, screenshot)
    
                    wikidata_id = screenshot.split('_')[0]
    
                    if wikidata_id in games:
    
                        title = games[wikidata_id]['title']
                        platform = platforms[screenshot.split('_')[3]]['platform_name']
                        years = screenshot.split('_')[1]
                        years = years.split('-')
                        year = int(years[0])
                        countries = games[wikidata_id]['countries'].split('|')
        
                        sample = fo.Sample(filepath=filepath)
        
                        sample['wikidata_id'] = wikidata_id
                        sample['title'] = title
                        sample['platform'] = platform
                        sample['years'] = years
                        sample['year'] = year
                        sample['countries'] = countries
                    
                        dataset.add_sample(sample)

In [None]:
session = fo.launch_app(dataset, auto=False)

[Open Dataset in FiftyOne](http://localhost:5151/)