# Plot and Stats

In [None]:
import fiftyone as fo
import plotly.express as px
import pandas as pd
import ipywidgets as widgets

from fiftyone import ViewField as F

# dataset basics
dataset_label = 'video-game-screenshots'

# colors
colorschemes = ['aggrnyl', 'agsunset', 'algae', 'amp', 'armyrose', 'balance',
    'blackbody', 'bluered', 'blues', 'blugrn', 'bluyl', 'brbg',
    'brwnyl', 'bugn', 'bupu', 'burg', 'burgyl', 'cividis', 'curl',
    'darkmint', 'deep', 'delta', 'dense', 'earth', 'edge', 'electric',
    'emrld', 'fall', 'geyser', 'gnbu', 'gray', 'greens', 'greys',
    'haline', 'hot', 'hsv', 'ice', 'icefire', 'inferno', 'jet',
    'magenta', 'magma', 'matter', 'mint', 'mrybm', 'mygbm', 'oranges',
    'orrd', 'oryel', 'oxy', 'peach', 'phase', 'picnic', 'pinkyl',
    'piyg', 'plasma', 'plotly3', 'portland', 'prgn', 'pubu', 'pubugn',
    'puor', 'purd', 'purp', 'purples', 'purpor', 'rainbow', 'rdbu',
    'rdgy', 'rdpu', 'rdylbu', 'rdylgn', 'redor', 'reds', 'solar',
    'spectral', 'speed', 'sunset', 'sunsetdark', 'teal', 'tealgrn',
    'tealrose', 'tempo', 'temps', 'thermal', 'tropic', 'turbid',
    'turbo', 'twilight', 'viridis', 'ylgn', 'ylgnbu', 'ylorbr',
    'ylorrd']

colorscheme = 'magma'
color = '#c23c74'

## Prepare Dataset

In [None]:
if dataset_label in fo.list_datasets():
    dataset = fo.load_dataset(dataset_label)

samples = []
clusterings = {
    '8': {
        'labels': {
            '0': 'mazes',
            '1': 'title screens',
            '2': 'text screens',
            '3': 'perspective projection',
            '4': 'maps',
            '5': 'HUDs',
            '6': 'running text and lists',
            '7': 'simple title screens',
        },
        'clusters': {},
        'size_max': 200
    },
    '32': {
        'labels': {
            '0': 'boxing',
            '1': 'simple lists',
            '2': 'early action',
            '3': 'gloom-ish portraits',
            '4': 'simple titles',
            '5': 'POV racing',
            '6': 'HUDs with graphics',
            '7': 'little text',
            '8': 'HUDs with text',
            '9': 'maps',
            '10': 'space action',
            '11': 'menacing title screens',
            '12': 'historic fighting',
            '13': 'pre-historic adventures',
            '14': 'abstracted gameplay',
            '15': 'low-res action',
            '16': 'sci-fi action titles',
            '17': 'weirdcore',
            '18': 'rugged nature action',
            '19': 'simple text',
            '20': 'tiles and mazes',
            '21': 'vectorized POV',
            '22': 'tile-based action',
            '23': 'isometric landscapes',
            '24': 'graphic adventures',
            '25': 'sports on green',
            '26': 'simple text',
            '27': 'simple decorative titles',
            '28': 'abstract POV stearing',
            '29': 'exploring nature',
            '30': 'warehouse action',
            '31': 'skeuomorphic graphic adventures',
        },
        'clusters': {},
        'size_max': 100
    }
}

for _sample in dataset:
    sample = {
        'point': _sample['dinov2_umap_0001_100'],
        'x': _sample['dinov2_umap_0001_100'][0],
        'y': _sample['dinov2_umap_0001_100'][1],
        'year': _sample['year'],
        'platform': _sample['platform'],
        '8': _sample['kmeans_dinov2_embeddings_8_cluster'],
        '32': _sample['kmeans_dinov2_embeddings_32_cluster'],
        '128': _sample['kmeans_dinov2_embeddings_128_cluster']
    }
    samples.append(sample)

    for clustering in clusterings:
        if sample[clustering] not in clusterings[clustering]['clusters']:
            clusterings[clustering]['clusters'][sample[clustering]] = {}
            clusterings[clustering]['clusters'][sample[clustering]]['samples'] = []
    
        clusterings[clustering]['clusters'][sample[clustering]]['samples'].append(sample)

Open [FiftyOne 🔗](http://localhost:5151).

## Histogram

In [None]:
# Histograms for central attributes
for attribute in ['year', 'platform', 'countries']:
    counts = dataset.count_values(attribute)
    counts = dict(sorted(counts.items(), key=lambda item: item[1]))
    df = pd.DataFrame.from_dict(counts, orient='index', columns=['count'])

    plot = px.bar(df)
    plot.update_layout(showlegend=False)
    plot.update_layout(
        margin={'t':0,'l':0,'b':0,'r':0}
    )
    plot.show()
    plot.write_image('plots/histogram-'+attribute+'.svg', height=800, width=1100)

## Embeddings

In [None]:
# Creating a DataFrame for Plotting
df = pd.DataFrame.from_dict(samples)

In [None]:
# Plotting with larger markers, showing cluster labels, and adding color map
plot = px.scatter(df, x="x", y="y", color="year",
                  color_continuous_scale=colorscheme,
                  labels={'label': 'Year'}, height=800, width=1100)

# Update marker size and text position
plot.update_traces(marker=dict(size=3))

plot.update_xaxes(visible=False)
plot.update_yaxes(visible=False)
plot.update_layout(
    margin={'t':0,'l':0,'b':0,'r':0}
)

# Save the plot to a file
plot.show()
plot.write_image('plots/scatter.svg', height=800, width=1100)

In [None]:
# Plotting with larger markers, showing cluster labels, and adding color map
colormap = {}
for i in range(32):
    colormap[str(i)] = '#f7cef7'
colormap['3'] = '#00f'
colormap['11'] = '#00f'

plot = px.scatter(df, x="x", y="y", color="32",
                  color_discrete_map=colormap,
                  height=800, width=1100)

# Update marker size and text position
plot.update_traces(marker=dict(size=3))
plot.update_layout(showlegend=False)
plot.update_xaxes(visible=False)
plot.update_yaxes(visible=False)
plot.update_layout(
    margin={'t':0,'l':0,'b':0,'r':0}
)

# Save the plot to a file
plot.show()
plot.write_image('plots/scatter-nsfw.svg', height=800, width=1100)

In [None]:
# Plotting with larger markers, showing cluster labels, and adding color map
colormap = {}
for i in range(32):
    colormap[str(i)] = '#f7cef7'
colormap['0'] = '#00f'

plot = px.scatter(df, x="x", y="y", color="32",
                  color_discrete_map=colormap,
                  height=800, width=1100)

# Update marker size and text position
plot.update_traces(marker=dict(size=3))
plot.update_layout(showlegend=False)
plot.update_xaxes(visible=False)
plot.update_yaxes(visible=False)
plot.update_layout(
    margin={'t':0,'l':0,'b':0,'r':0}
)

# Save the plot to a file
plot.show()
plot.write_image('plots/scatter-boxing.svg', height=800, width=1100)

## Bubble Charts

In [None]:
import numpy as np

for clustering in clusterings:
    for _cluster in clusterings[clustering]['clusters']:
        cluster = clusterings[clustering]['clusters'][_cluster]
        _samples = cluster['samples']
        x = [p['x'] for p in _samples]
        y = [p['y'] for p in _samples]
        centroid = (np.mean(x), np.mean(y))
        cluster['x'] = centroid[0]
        cluster['y'] = centroid[1]
        years = [p['year'] for p in _samples]
        cluster['year'] = round(np.mean(years))
        cluster['size'] = len(_samples)
        cluster['label'] = clusterings[clustering]['labels'][_cluster]
    
    # Creating a DataFrame for Plotting
    df = pd.DataFrame.from_dict(clusterings[clustering]['clusters'], orient="index", columns=['x', 'y', 'size', 'label', 'year'])


    # Plotting with larger markers, showing cluster labels, and adding color map
    plot = px.scatter(df, x="x", y="y", size="size", color="year",
                      color_continuous_scale=colorscheme, size_max=clusterings[clustering]['size_max'],
                      text='label', height=800, width=1100)
    
    # Update marker size and text position
    plot.update_traces(
        marker=dict(
            sizemin=15,
            line=dict(width=0),
            opacity=0.5,
        ),
        textfont_color='black'
    )
    plot.update_xaxes(visible=False)
    plot.update_yaxes(visible=False)
    plot.update_layout(
        margin={'t':0,'l':0,'b':0,'r':0}
    )

    
    # Save the plot to a file
    plot.write_image('plots/bubbles-'+clustering+'.svg', height=800, width=1100)
    plot.show()