## Dataset Inspection

In [1]:
import os, sys
sys.path.append(os.path.join(os.path.abspath(''), '../'))

import peewee as pw
from toyDb.databases import ExperimentDb, ShaderDb

import numpy as np
import json
import hashlib

from misc.ComplexDatasetSnapshotter import (
    EnvironmentFilter,
    CycleTrialsFilter,
    ErrorFilter,
    ShadertoyIdFilter,
    WidthHeightFilter,
    ResourceFilter,
    TraceAvailabilityFilter,
    SpvTokenizedLengthFilter,
    TraceDuplicationPostFilter,
    ComplexDatasetSnapshotter,
    AugmentationFilter
)
from misc.Directory import (
  getIntermediateDir
)
from dataset.FragmentPerformanceTracedSnapshotDataset import FragmentPerformanceTracedSnapshotDataset

ExperimentDb.init_from_default_db()
import pickle

In [2]:
snapshotter = ComplexDatasetSnapshotter()

# Basic filters
snapshotter.registerFilter(EnvironmentFilter())
snapshotter.registerFilter(WidthHeightFilter())
snapshotter.registerFilter(ResourceFilter())
snapshotter.registerFilter(CycleTrialsFilter())
snapshotter.registerFilter(ErrorFilter())
snapshotter.registerFilter(TraceAvailabilityFilter())
snapshotter.registerFilter(AugmentationFilter())

# Length filter
lengthFilter = SpvTokenizedLengthFilter()
lengthFilter.setThreshold(4096)

if os.path.isfile(os.path.join(getIntermediateDir(), "./lengthFilterCache.json")):
    lengthFilter.readFromCache(os.path.join(getIntermediateDir(), "./lengthFilterCache.json"))
else:
    lengthFilter.process(parallel=True)
    lengthFilter.writeToCache(os.path.join(getIntermediateDir(), "./lengthFilterCache.json"))

snapshotter.registerFilter(lengthFilter)
# Train / test split filter

# Examine our dear group
snapshotter.examineGroups(1)

  0%|          | 0/257106 [00:00<?, ?it/s]

EnvironmentFilter_EnvId1 = 263919
WidthHeightFilter_1024-768 = 263919
ResourceFilter_resourceNone = 6813
ResourceFilter_resource1 = 257106
CycleTrialsFilter_30cycles-10trials = 263919
ErrorFilter_error1 = 6231
ErrorFilter_error0 = 27701
ErrorFilter_error3 = 77
ErrorFilter_error2 = 504
ErrorFilter_error100 = 1
ErrorFilter_error5 = 229405
TraceAvailabilityFilter_noTrace = 34459
TraceAvailabilityFilter_haveTrace = 229460
AugmentationFilter_aug0 = 20669
AugmentationFilter_aug20000 = 13856
AugmentationFilter_aug10009 = 11940
AugmentationFilter_aug10013 = 4170
AugmentationFilter_aug10014 = 13304
AugmentationFilter_aug10015 = 13466
AugmentationFilter_aug10016 = 13275
AugmentationFilter_aug10017 = 8269
AugmentationFilter_aug10021 = 13468
AugmentationFilter_aug10035 = 5956
AugmentationFilter_aug10037 = 12568
AugmentationFilter_aug10045 = 13304
AugmentationFilter_aug10046 = 13468
AugmentationFilter_aug20002 = 13487
AugmentationFilter_aug20001 = 13856
AugmentationFilter_aug20003 = 13827
Augmentat

### 

In [3]:
commonFilter = [
    [('EnvironmentFilter', 'EnvId1')],
    [('CycleTrialsFilter', '30cycles-10trials')],
    [
        ('AugmentationFilter', 'aug0'),
        ('AugmentationFilter', 'aug20000'),
        ('AugmentationFilter', 'aug20001'),
        ('AugmentationFilter', 'aug10006'),
        ('AugmentationFilter', 'aug10059'),
        ('AugmentationFilter', 'aug10015'),
        ('AugmentationFilter', 'aug10046'),
    ],
    [
        ('ResourceFilter', 'resource1')
    ],
    [
        ('WidthHeightFilter', '1024-768')
    ],
    [
        ('ErrorFilter', 'error0'),
        ('ErrorFilter', 'error5')
    ],
    [('TraceAvailabilityFilter', 'haveTrace')],
    [('SpvTokenizedLengthFilter', 'belowOrEqualThreshold4096')]
]

destFile = os.path.join(
    getIntermediateDir(),
    "./FragPerfSnapshotTracedDataset4096-optimized-train-augmented.dat"
)
# if not os.path.isfile(destFile):
snapshotter.doSnapshotWithOptAug(
    destFile,
    commonFilter
)

with open(destFile, "rb") as f:
    file_hash = hashlib.md5()
    chunk = f.read(8192)
    while chunk:
        file_hash.update(chunk)
        chunk = f.read(8192)

print(f"Hash for {destFile}:\n- md5sum: {file_hash.hexdigest()}")

Train samples: 9004
Test samples: 2247
Hash for f:\dev\NGPP\NGPP\vkPredict\misc\.././intermediates\./FragPerfSnapshotTracedDataset4096-optimized-train-augmented.dat:
- md5sum: 474da60f6a62222d7dbff278c23eeeb9


In [4]:
print(snapshotter.filterResults.keys())
lenSet = snapshotter.filterResults['SpvTokenizedLengthFilter'][0].elemExprIds
OSet = snapshotter.filterResults['AugmentationFilter'][1].elemExprIds
NSet = snapshotter.filterResults['AugmentationFilter'][0].elemExprIds
OSSet = snapshotter.filterResults['AugmentationFilter'][14].elemExprIds

print(f"lenSet: {len(lenSet)}")
print(f"OSet: {len(OSet)}")
print(f"NSet: {len(NSet)}")

OSet = lenSet & OSet
NSet = lenSet & NSet
OSSet = lenSet & OSSet

print(f"OSet: {len(OSet)}")
print(f"NSet: {len(NSet)}")
print(f"OSSet: {len(OSSet)}")

dict_keys(['EnvironmentFilter', 'WidthHeightFilter', 'ResourceFilter', 'CycleTrialsFilter', 'ErrorFilter', 'TraceAvailabilityFilter', 'AugmentationFilter', 'SpvTokenizedLengthFilter'])
lenSet: 196226
OSet: 13856
NSet: 20669
OSet: 10927
NSet: 11264
OSSet: 10899
