In [1]:
import typing
import os
from pprint import pprint
from citrination_client import *
from citrination_client.views.data_view_builder import DataViewBuilder
from pprint import pprint
import pypif
import time
import json
import uuid

In [5]:
api_key = os.environ.get('CITRINATION_API_KEY')
site = 'https://citrination.com'
config_path = os.path.join('config','config.json')
with open(config_path, 'r') as f:
    config = json.load(f)
dataset_id = config['dataset_id']
dataset_name = config['dataset_name']
data_view_name = config.get('data_view_name', generate_data_view_name(dataset_name, config))
data_view_desc = config.get('data_view_desc', 'Binary Method')

In [4]:
def generate_data_view_name(dataset_name, config):
    config['datas_view_name'] = f"{dataset_name}_{uuid.uuid1()}"
    with open(config_path, 'w+') as f:
        json.dump(config, f)
    return config['datas_view_name']

In [6]:
client = CitrinationClient(os.environ.get('CITRINATION_API_KEY'), site)

In [7]:
client.data_views.search_template_client.get_available_columns(dataset_id)

['Name', 'Superalloy series', 'formula', 'Property stable']

In [8]:
class MlTemplate():
    def __init__(self, 
                 output,
                 dataset_id,
                 name, 
                 desc,
                 output_lb=None, 
                 output_ub=None, 
                 units=None,
                 rt_flag=True,
                 data_view_id=None,
                 ml_config = None,
                 desc_type='real',
                 categories=None):
        self.dataset_id = dataset_id
        self.output = output
        self.output_lb = output_lb
        self.output_ub = output_ub
        self.units = units
        self.data_view_id = data_view_id
        self.name = name
        self.desc = desc
        self.ml_config = ml_config
        self.desc_type = desc_type
        self.categories = categories
        
        if rt_flag:
            self.rt_flag = 'room temperature'
        else:
            self.rt_flag = 'non-room temperature'
            
        self.desc = 'This view maps composition to {} on the {} data from dataset {}'.format(output, self.rt_flag, dataset_id)


def make_ml_config(template):
    # Create ML configuration
    dv_builder = DataViewBuilder()
    dv_builder.dataset_ids([template.dataset_id]) # ID number for band gaps dataset

    # Define descriptors
    desc_formula = InorganicDescriptor(key='formula', threshold=1.0) # threshold <= 1.0; default in future releases
    dv_builder.add_descriptor(descriptor=desc_formula, role='input')
    
    if template.desc_type == 'cat':
        desc = CategoricalDescriptor(key=template.output, categories=template.categories)
    else:
        desc = RealDescriptor(key=template.output, lower_bound=template.output_lb, upper_bound=template.output_ub, units=template.units)
    dv_builder.add_descriptor(descriptor=desc, role='output')
    
    # Build the configuration once all the pieces are in place
    return dv_builder.build()


def make_view(client, template):
    template.ml_config = make_ml_config(template)
    view_id = client.data_views.create(configuration=template.ml_config, name=template.name, description=template.desc)
    print('Data view {} was successfully created.'.format(view_id))
    return view_id

In [9]:
ml_template = MlTemplate('Property stable', dataset_id, data_view_name, data_view_desc, desc_type='cat', categories=['0','1'])


In [10]:
config['data_view_id'] = make_view(client, ml_template)

Data view 10616 was successfully created.


In [11]:
with open(config_path, 'w') as f:
    json.dump(config, f, indent=2)