In [7]:
import os
import numpy as np
import json
import requests
from tqdm import tqdm


## Download akb .json file

In [2]:
url = f'https://akb.ztf.snad.space/objects/'
with requests.get(url) as response:
    response.raise_for_status()
    open(f'akb.ztf.snad.space.json', 'wb').write(response.content)

def get_oids(filepath):
    file = open(filepath)
    obj_list = json.load(file)
    file.close()

    oids = []
    tags = []
    for data in obj_list:
        oids.append(data['oid'])
        tags.append(data['tags'])

    targets = [] # 1-artefact,  0-transient
    for tag_list in tags:
        if 'artefact' in tag_list:
            targets.append(1)
        else:
            targets.append(0)
    
    return oids, targets

In [4]:
oids, labels = get_oids('akb.ztf.snad.space.json')

## Download available feature files

In [5]:
dwn_feature = os.listdir('akb_features')
for oid, label in zip(oids, labels):
    if str(oid)[0] == '1':
        field = str(oid)[:4]
    else:
        field = str(oid)[:3]

    file_types = zip(['feature', 'feature', 'oid'], ['dat', 'name', 'dat'])
    for file_type in file_types:
        if f'{file_type[0]}_{field}.{file_type[1]}' not in dwn_feature:
            url = f'https://sai.snad.space/tmp/features/{file_type[0]}_{field}.{file_type[1]}'
            with requests.get(url) as response:
                response.raise_for_status()
                open(f'akb_features/{file_type[0]}_{field}.{file_type[1]}', 'wb').write(response.content)


## Construct dataset from downloaded files

In [8]:
data = {'oids':[], 'labels':[], 'features':[]}
for i, oid in tqdm(enumerate(oids)):
    if str(oid)[0] != 1:
        field = str(oid)[:3]
    else:
        field = str(oid)[:4]
    
    try:
        field_oids = np.memmap(f'akb_features/oid_{field}.dat', mode='r', dtype=np.uint64)
        ind = list(field_oids).index(oid)
            
        with open(f'akb_features/feature_{field}.name') as f:
            names = f.read().split()
        dtype = [(name, np.float32) for name in names]
        field_feature = np.memmap(f'akb_features/feature_{field}.dat', mode='r', dtype=dtype, shape=field_oids.shape)
            
        data['oids'].append(oid)
        data['features'].append(field_feature[ind].tolist())
        data['labels'].append(labels[i])
    except:
        continue

2813it [47:38,  1.02s/it]


## Save constructed dataset

In [9]:
np.save('feature_dataset_latest.npy', data)

In [12]:
len(data['oids'])

2091