In [7]:
import sys
sys.path.insert(0, 'utils')

import pandas as pd
import matplotlib.pyplot as plt
import ds_charts as ds
from ds_charts import HEIGHT
import numpy as np
from pandas.plotting import register_matplotlib_converters
import seaborn as sns
import json
import jstyleson
import collections
import copy
import datetime
from dateutil.relativedelta import relativedelta
from sklearn.preprocessing import StandardScaler, MinMaxScaler ,LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
import timeit
from math import radians, cos, sin, asin, sqrt

# sys.stdout = open('Results/dankfe_1_timings.txt', 'a')


base_name = "aq"
data_folder = f'data/{base_name}/'
model_folder = f'data/{base_name}/'
print(data_folder)


data/aq/


In [8]:
# Get dataset
start_time = timeit.default_timer()
if base_name == "covid":
    data = pd.read_csv(f'{data_folder}{base_name}_base.csv',parse_dates=['current_date','first_date'], infer_datetime_format=True)
else:
    data = pd.read_csv(f'{data_folder}{base_name}_base.csv',parse_dates=['current_date'], infer_datetime_format=True)
# print('read csv')
read_time = timeit.default_timer() - start_time

# Getting ER Model of case study data
f = open(f'{model_folder}{base_name}_model.json')
model = jstyleson.load(f)

# Getting options JSON
f = open(f'{model_folder}options.json')
options = jstyleson.load(f)

# Getting variable template JSON
f = open(f'utils/var_template.json')
template = jstyleson.load(f)

In [9]:
target = 'ALARM'

# target_count = data[target].value_counts()
# positive_class = target_count.idxmin()
# negative_class = target_count.idxmax()
# print('Minority class=', positive_class, ':', target_count[positive_class])
# print('Majority class=', negative_class, ':', target_count[negative_class])
# print('Proportion:', round(target_count[positive_class] / target_count[negative_class], 2), ': 1')

In [10]:
# entity_name_list = [entity['name'] for entity in model['entities']]
# # Raise Error if entities do not match columns:
# if collections.Counter(entity_name_list) != collections.Counter(data.columns.to_list()):
#     raise ValueError("Entities and Columns do not have the same size.")

# Remove all non-observed columns
not_observed = [entity['name'] for entity in model['entities'] if not entity['observed']]

# Get names of columns to create
created_vars = [relation['output'] for relation in model['relations']]

# Convert date entities to timestamp
date_columns = [entity['name'] for entity in model['entities'] if entity['type'] == 'datetime']
# for col in date_columns:
#     data[col] = pd.to_datetime(data[col], format = "%Y-%m-%d")
# data['current_date'] = pd.to_datetime(data['current_date'], format = "%Y-%m-%d %H:%M:%S")
# data['current_date'] = pd.to_datetime(data['current_date'], format = "%d/%m/%Y")
# data['first_date'] = pd.to_datetime(data['first_date'], format = "%d/%m/%Y")
data_observed = data.drop(columns = not_observed)
# data_observed = data_observed.sample(frac = 0.1, random_state = 0).reset_index(drop = True)
# data_observed = data_observed[:1000]

In [11]:
# print(data.dtypes)
# ds.get_variable_types(data)

In [12]:
# Data Preparation

def data_preparation(dataset,options,target):
    if dataset[target].isna().sum() != 0:
        # print("Target MVs: dropped rows")
        dataset = dataset.dropna(subset=[target]).reset_index(drop = True)
    dataset = encodeLabels(dataset)
    if options['checkMissingValues'] != 'none':
        dataset = checkMissingValues(dataset,options['checkMissingValues'])
    return dataset
    # generate Variables
    # if options['checkScaling'] != 'none':
    #     dataset = checkScaling(dataset,options['checkScaling'])
    # # divide into train and test
    # data_train, data_test = train_test_split(dataset,train_size=0.7,test_size=0.3,stratify=dataset[target],random_state=1)
    # if options['checkBalancing']:
    #     data_train = checkBalancing(data_train)
    # return data_train,data_test

def data_preparation_2(dataset,options,target):
    if options['checkScaling'] != 'none':
        dataset = checkScaling(dataset,options['checkScaling'])
    data_train, data_test = train_test_split(dataset,train_size=0.7,test_size=0.3,stratify=dataset[target],random_state=1)
    if options['checkBalancing']:
        data_train = checkBalancing(data_train)
    data_train = data_train.reset_index(drop=True)
    data_test = data_test.reset_index(drop=True)
    return data_train,data_test

def encodeLabels(dataset):
    le = LabelEncoder()
    bool_df = dataset.select_dtypes(include='bool')
    if len(bool_df.columns) != 0:
        for col in bool_df:
            dataset[col] = le.fit_transform(dataset[col])
    return dataset

def checkScaling(dataset,method):
    # print(f"Checking scaling: method {method}")
    numeric_vars, symbolic_vars, binary_vars, date_vars = ds.get_variable_types(dataset).values()
    
    if method == 'zscore':
        transf = StandardScaler(with_mean=True, with_std=True, copy=True).fit(dataset[numeric_vars])
    if method == 'minmax':
        transf = MinMaxScaler(feature_range=(0,1), copy=True).fit(dataset[numeric_vars])
    tmp = pd.DataFrame(transf.transform(dataset[numeric_vars]), index = dataset.index, columns = numeric_vars)
    data_scaled = pd.concat([tmp,dataset[symbolic_vars],dataset[binary_vars],dataset[date_vars]], axis = 1)
    return data_scaled

def checkBalancing(dataset):
    target_count = data[target].value_counts()
    proportion = target_count[target_count.idxmin()] / target_count[target_count.idxmax()]
    data_bal = dataset.copy(deep=True)
    df_min = data_bal[data_bal[target] == target_count.idxmin()]
    df_max = data_bal[data_bal[target] == target_count.idxmax()]
    if proportion <= 0.66:
        if target_count[target_count.idxmin()] >= 25000 and target_count[target_count.idxmax()] >= 25000:
            print("Checking balancing: Undersampling both classes - both over 25000")
            df_min = df_min.sample(n=25000, replace = False, random_state = 1)
            df_max = df_max.sample(n=25000, replace = False, random_state = 1)
        elif target_count[target_count.idxmin()] < 25000 and target_count[target_count.idxmax()] >= 25000:
            print("Checking balancing: Oversampling min class and undersampling max class - one over 25000")
            df_min = df_min.sample(n=25000, replace = True, random_state = 1)
            df_max = df_max.sample(n=25000, replace = False, random_state = 1)
        else:
            print("Checking balancing: Oversampling min class - none over 25000")
            df_min = df_min.sample(n=len(df_max), replace = True, random_state = 1)
        data_balanced = pd.concat([df_min,df_max],axis = 0)
        return data_balanced
    else:
        print("Checking balancing: No balancing required.")
        return dataset

def checkMissingValues(dataset,method):
    # print(f"Checking MVs: method {method}")
    numeric_vars, binary_vars, date_vars, symbolic_vars = ds.get_variable_types(dataset).values()
    if method == 'auto':
        tmp_nr, tmp_sb, tmp_bool = None, None, None
        if len(numeric_vars) > 0:
            imp = SimpleImputer(strategy='median', missing_values=np.nan, copy=True)
            tmp_nr = pd.DataFrame(imp.fit_transform(dataset[numeric_vars]), index =dataset.index ,columns=numeric_vars)
        if len(symbolic_vars) > 0:
            imp = SimpleImputer(strategy='most_frequent', missing_values=np.nan, copy=True)
            tmp_sb = pd.DataFrame(imp.fit_transform(dataset[symbolic_vars]), index=dataset.index ,columns=symbolic_vars)
        if len(binary_vars) > 0:
            imp = SimpleImputer(strategy='most_frequent', missing_values=np.nan, copy=True)
            tmp_bool = pd.DataFrame(imp.fit_transform(dataset[binary_vars]), index=dataset.index ,columns=binary_vars)
    data_mv = pd.concat([tmp_nr, tmp_sb, tmp_bool,dataset[date_vars]], axis=1)
    return data_mv

# data_prepared_train, data_prepared_test = data_preparation(data,options,target)

In [13]:
# Automatic variable generation

def generateAutoVariables(dataset,options,template,model):
    if options['generateDates']:
        model = generateDates(dataset,template['dates'])
    if len(options['generateFiveSummary']) != 0:
        model = generateFiveSummary(dataset,template['five_summary'], options['groupby'], options['generateFiveSummary'])
    return model

def generateDates(dataset,template):
    date_vars = ds.get_variable_types(dataset)['Date']
    new_datevars = []
    template_tmp = copy.deepcopy(template)
    for date_col in date_vars:
        template_tmp = copy.deepcopy(template)
        for new_var in template_tmp:
            new_var['output'] = f"{date_col}_{new_var['output']}"
            new_var['inputs'].append(date_col)
            new_datevars.append(new_var)
    # print(f"Adding {len(new_datevars)} new variables")
    model['relations'] =  model['relations'] + new_datevars
    return model
    
def generateFiveSummary(dataset,template,groupby,num_vars):
    # num_vars = ds.get_variable_types(dataset)['Numeric']
    new_numvars = []
    template_tmp = copy.deepcopy(template)
    if len(groupby) != 0:
        for group in groupby:
            for num_col in num_vars:
                template_tmp = copy.deepcopy(template)
                for new_var in template_tmp:
                    new_var['output'] = f"{num_col}_{new_var['output']}_per_{group}"
                    new_var['inputs'].append(num_col)
                    new_var['groupby'] = group
                    new_numvars.append(new_var)
    # print(f"Adding {len(new_numvars)} new variables")
    model['relations'] = model['relations'] + new_numvars
    return model

In [14]:
# Feature Generation
holidays = pd.read_csv('holidays.csv')
holidays['date'] =  pd.to_datetime(holidays['date'],format="%Y-%m-%d")

day_periods = [(1, (datetime.datetime(2000,1,1,0,0,0),  datetime.datetime(2000,1,1,2,59,59))),
           (2, (datetime.datetime(2000,1,1,3,0,0),  datetime.datetime(2000,1,1,5,59,59))),
           (3, (datetime.datetime(2000,1,1,6,0,0),  datetime.datetime(2000,1,1,8,59,59))),
           (4, (datetime.datetime(2000,1,1,9,0,0),  datetime.datetime(2000,1,1,11,59,59))),
           (5, (datetime.datetime(2000,1,1,12,0,0),  datetime.datetime(2000,1,1,14,59,59))),
           (6, (datetime.datetime(2000,1,1,15,0,0),  datetime.datetime(2000,1,1,17,59,59))),
           (7, (datetime.datetime(2000,1,1,18,0,0),  datetime.datetime(2000,1,1,20,59,59))),
           (8, (datetime.datetime(2000,1,1,21,0,0),  datetime.datetime(2000,1,1,23,59,59)))]

energy_prices = [(1, (datetime.datetime(2000,1,1,22,0,0),  datetime.datetime(2000,1,1,23,59,59))),
           (1, (datetime.datetime(2000,1,1,0,0,0),  datetime.datetime(2000,1,1,7,59,59))),
           (2, (datetime.datetime(2000,1,1,8,0,0),  datetime.datetime(2000,1,1,8,59,59))),
           (2, (datetime.datetime(2000,1,1,10,30,0),  datetime.datetime(2000,1,1,17,59,59))),
           (2, (datetime.datetime(2000,1,1,20,30,0),  datetime.datetime(2000,1,1,21,59,59))),
           (3, (datetime.datetime(2000,1,1,9,0,0),  datetime.datetime(2000,1,1,10,29,59))),
           (3, (datetime.datetime(2000,1,1,18,0,0),  datetime.datetime(2000,1,1,20,29,59)))]

seasons = [(1, (datetime.date(2000,  1,  1),  datetime.date(2000,  3, 20))),
           (2, (datetime.date(2000,  3, 21),  datetime.date(2000,  6, 20))),
           (3, (datetime.date(2000,  6, 21),  datetime.date(2000,  9, 22))),
           (4, (datetime.date(2000,  9, 23),  datetime.date(2000, 12, 20))),
           (1, (datetime.date(2000, 12, 21),  datetime.date(2000, 12, 31)))]

center_baltimore = (39.30746849825375, -76.61560625253648)

crime_type = [
    (1, 'LARCENY'), (2,'LARCENY FROM AUTO'), (3,'AUTO THEFT'), (11,'ROBBERY - STREET'), (12,'ROBBERY - COMMERCIAL'), (13,'ROBBERY - CARJACKING'),(14,'ROBBERY - RESIDENCE'),(15,'BURGLARY'),(21,'COMMON ASSAULT'),(31,'ASSAULT BY THREAT'),(32,'AGG. ASSAULT'),(41,'ARSON'),(51,'SHOOTING'),(52,'RAPE'),(61,'HOMICIDE')
]
weapon_type = [
    (0, 'NONE'), (1,'HANDS'), (2,'OTHER'), (3,'KNIFE'), (4,'FIREARM')
]

# Kept data for operations
temp_data = None


# DANKFE 2
def dankfe_2(model,loop_dataset,edit_dataset):
    global temp_data
    relations_queue = model['relations']

    while len(relations_queue) != 0:
        current_relation = relations_queue[0]
        print(current_relation['output'])
        newvar_name = current_relation['output']
        inputs = current_relation['inputs']
        groupby = current_relation['groupby']
        if set(inputs).issubset(loop_dataset.columns):         # if inputs already exist in the dataset
            start_time = timeit.default_timer()
            # if len(current_relation['constraint']) == 0:
            #     constraint = "index == index"
            # else:
            constraint = current_relation['constraint']
            if len(current_relation['constraint']) != 0:
                constraint = "row." + constraint
            else:
                constraint = "True"
            if len(groupby) == 0: #LAMBDA: no row dependence
                for index, op in enumerate(current_relation['operations']):
                    if index == 0:
                        edit_dataset[newvar_name] = edit_dataset.apply(lambda row: get_operation(op,*zip(inputs,row[inputs])) if pd.eval(constraint, target = row) else np.nan, axis = 1)
                    else:
                        edit_dataset[newvar_name] = edit_dataset.apply(lambda row: get_operation(op,zip(newvar_name,row[newvar_name])) if pd.eval(constraint, target = row) else np.nan, axis = 1)
            else:
                needed_rows = current_relation['needsRows']
                for i in range(len(loop_dataset)):
                    # counter += 1
                    row = loop_dataset.loc[i]  # we pass the list to get a DataFrame instead of Series
                    if len(groupby) != 0:
                        if needed_rows == 'all':
                            temp_data = loop_dataset[loop_dataset[groupby] == row.loc[groupby]]
                        elif needed_rows < 0:
                            temp_data = loop_dataset[loop_dataset[groupby] == row.loc[groupby]].loc[i + needed_rows+1:i]
                        else:
                            temp_data = loop_dataset[loop_dataset[groupby] == row.loc[groupby]].loc[i:i + needed_rows-1] # get necessary rows to temp_data
                    # else:
                    #     if needed_rows < 0: 
                    #         temp_data = dataset.loc[i+ needed_rows:i]
                    #     else:    
                    #         temp_data = dataset.loc[i:i + needed_rows]
                    if pd.eval(constraint, target = row) == False:
                        edit_dataset.loc[i,newvar_name] = np.nan
                        continue
                    else:
                        for opIndex, op in enumerate(current_relation['operations']):
                            input_values =  [row.loc[inp] for inp in inputs]
                            if opIndex == 0:
                                edit_dataset.loc[i,newvar_name] = get_operation(op,*zip(inputs,input_values))
                            else:
                                edit_dataset.loc[i,newvar_name] = get_operation(op,zip(newvar_name,edit_dataset.loc[i,newvar_name]))
            relations_queue = relations_queue[1:]
            loop_dataset = edit_dataset.copy(deep=True)
            print(timeit.default_timer() - start_time)
        else:
            # send to the bottom of the queue
            relations_queue.append(relations_queue.pop(relations_queue.index(current_relation)))


# DANKFE 1
def dankfe_1(model,dataset):
    relations_queue = model['relations']

    while len(relations_queue) != 0:
        current_relation = relations_queue[0]
        if len(current_relation['groupby']) != 0:
            relations_queue.append(relations_queue.pop(relations_queue.index(current_relation)))
            continue
        print(current_relation['output'])
        newvar_name = current_relation['output']
        inputs = current_relation['inputs']
        if set(inputs).issubset(dataset.columns):
            start_time = timeit.default_timer()
            # if inputs already exist in the dataset
            constraint = current_relation['constraint']
            if len(current_relation['constraint']) != 0:
                constraint = "row." + constraint
            else:
                constraint = "True"
            for index, op in enumerate(current_relation['operations']):
                if index == 0:
                    dataset[newvar_name] = dataset.apply(lambda row: get_operation(op,*zip(inputs,row[inputs])) if pd.eval(constraint, target = row) else np.nan, axis = 1)
                else:
                    # dataset[newvar_name] = dataset.apply(lambda row: print(row[newvar_name]), axis = 1)
                    dataset[newvar_name] = dataset.apply(lambda row: get_operation(op,(newvar_name,row[newvar_name])) if pd.eval(constraint, target = row) else np.nan, axis = 1)
            relations_queue = relations_queue[1:]
            print(timeit.default_timer() - start_time)
        else:
            # send to the bottom of the queue
            relations_queue.append(relations_queue.pop(relations_queue.index(current_relation)))

    
def get_operation(code,*values):
    if code == '+':
        sum_values = [values[i][1] for i,x in enumerate(values)]
        return np.sum(sum_values)
    elif code == 'positive_sum':
        sum_values = [values[i][1] for i,x in enumerate(values) if values[i][1] >= 0]
        return np.sum(sum_values)
    elif code == 'negative_sum':
        sum_values = [values[i][1] for i,x in enumerate(values) if values[i][1] <= 0]
        return np.sum(sum_values)
    elif code == '-':
        return values[0][1] - values[1][1]
    elif code == '*':
        prod_values = [values[i][1] for i,x in enumerate(values)]
        return np.prod(prod_values)
    elif code == '/':
        return round(values[0][1] / values[1][1],2)
    elif code == '>=':
        return values[0][1] >= values[1][1]
    elif code == 'datediff':
        return relativedelta(values[0][1],values[1][1])
    elif code == 'years':
        return values[0][1].years
    elif code == 'months':
        return values[0][1].years * 12 + values[0][1].months
    elif code == 'getHour':
        return values[0][1].hour
    elif code == 'getDay':
        return values[0][1].day
    elif code == 'getMonth':
        return values[0][1].month
    elif code == 'getYear':
        return values[0][1].year
    elif code == 'getWeekday':
        return values[0][1].dayofweek
    elif code == 'getSeason':
        return getSeason(values[0][1],seasons)
    elif code == 'getDayPeriod':
        return getDayPeriod(values[0][1],day_periods)
    elif code == 'getHoliday':
        return generateHoliday(values[0][1],values[1][1],holidays)
    elif code == 'getEnergyPrice':
        return getDayPeriod(values[0][1],energy_prices)
    elif code == 'divide_by_30':
        return values[0][1] / 30
    elif code == 'getAverage':
        return generateAverage(values[0][0])
    elif code == 'getMax':
        return generateMax(values[0][0])
    elif code == 'getMin':
        return generateMin(values[0][0])
    elif code == 'getStd':
        return generateStd(values[0][0])
    elif code == 'getMedian':
        return generateMedian(values[0][0])
    elif code == 'generateAvg_2weeks':
        return generateAverage2Weeks(values[0][1],values[1][1])
    elif code == 'generateAvg_2w_100k':
        return values[0][1] * 100000 / values[1][1]
    elif code == 'generateSum_2weeks':
        return generateCumulative2Weeks(values[0][1],values[1][1])
    elif code == 'generateHighRisk_2weeks':
        return generateHighRisk(values[0][1],values[1][1],14)
    elif code == 'getLastYearTemp':
        return generateLastYearTemp(values[0][1],values[1][1],12)
    elif code == 'generateSum_2w_100k':
        return values[0][1] * 100000 / values[1][1]
    elif code == 'generateEstadio_8ed':
        return generateFromTable(values[0][1],values[1][1],estadio)
    elif code == 'generateN_8ed':
        return generateN8ed(values[0][1])
    elif code == 'getDistanceBaltimore':
        return haversine(values[0][1],values[1][1],center_baltimore[0],center_baltimore[1])
    elif code == 'getCrimeType':
        return [x[0] for x in crime_type if x[1] == values[0][1]][0]
    elif code == 'getWeapon':
        return [x[0] for x in weapon_type if x[1] == values[0][1]][0]
    elif code == 'getCases100k':
        return values[0][1] * 100000 / values[1][1]
    elif code == 'getCurrentRisk':
        return values[0][1] > 120
    elif code == 'getAverageDiffPos':
        return values[0][1] >= 0
    elif code == 'generatePM25_safe':
        return values[0][1] >= 35
    elif code == 'generatePM10_safe':
        return values[0][1] >= 150
    elif code == 'generateSO2_safe':
        return values[0][1] >= 0.14
    else:
        return lambda *x : x

def haversine(lat1, lon1, lat2, lon2):
      R = 6372.8
      dLat = radians(lat2 - lat1)
      dLon = radians(lon2 - lon1)
      lat1 = radians(lat1)
      lat2 = radians(lat2)
      a = sin(dLat/2)**2 + cos(lat1)*cos(lat2)*sin(dLon/2)**2
      c = 2*asin(sqrt(a))
      return R * c

def generateFromTable(t,n,table):
    return table[t][n]

def generateN8ed(gg_p):
    if gg_p == 0:
        return 1
    elif gg_p >= 1 and gg_p <= 2:
        return 2
    elif gg_p >= 3 and gg_p <= 6:
        return 3
    elif gg_p >= 7 and gg_p <= 15:
        return 4
    else:
        return 5

def generateAverage(column):
    return np.mean(temp_data[column].to_list())
def generateMax(column):
    return np.max(temp_data[column].to_list())
def generateMin(column):
    return np.min(temp_data[column].to_list())
def generateStd(column):
    return np.std(temp_data[column].to_list())
def generateMedian(column):
    return np.median(temp_data[column].to_list())

def getSeason(date,season_dict):
    date = date.replace(year=2000)
    return next(season for season, (start, end) in season_dict
                if start <= date <= end)
def getDayPeriod(date,season_dict):
    date = date.replace(year=2000,month=1,day=1)
    return next(season for season, (start, end) in season_dict
                if start <= date <= end)

def generateAverage2Weeks(date,cases):
    lastCases = temp_data['cases'].to_list()
    return np.mean(lastCases)

def generateHighRisk(date,sum_2weeks,offset):
    high_risk_day = date + datetime.timedelta(days=offset)
    high_risk_cumulative = temp_data[temp_data['current_date'] == high_risk_day]
    if len(high_risk_cumulative) == 0:
        return np.nan
    else:
        return high_risk_cumulative.iloc[0]['sum_2w_100k'] >= 120.0

def generateLastYearTemp(date,temp,offset):
    last_year_day = date - relativedelta(months=offset)
    last_year_record = temp_data[temp_data['current_date'] == last_year_day]
    if len(last_year_record) == 0:
        return np.nan
    else:
        return last_year_record.iloc[0]['temperature']

def generateCumulative2Weeks(date,cases):
    lastCases = temp_data['cases'].to_list()
    return np.sum(lastCases)

def generateHoliday(date,country,holiday_data):
    return holiday_data.loc[(holiday_data['date'] == date) & (holiday_data['country'] == country)].any().all()

In [15]:
print('read csv')
print(read_time)

# # First data preparation - MVs
start_time = timeit.default_timer()
data_observed = data_preparation(data_observed,options,target)
print(timeit.default_timer() - start_time)

# # Add automatic variables to ER Model before generation
start_time = timeit.default_timer()
model = generateAutoVariables(data_observed,options,template,model)
print(timeit.default_timer() - start_time)

# print(model)

# # DANKFE 2 or 3
data_loop = data_observed.copy(deep=True)
dankfe_2(model,data_loop,data_observed)

# # DANKFE 1 (comment paragraph above if running DANKFE-1)
# dankfe_1(model,data_observed)

# Scaling and Balancing
start_time = timeit.default_timer()
data_observed_train, data_observed_test = data_preparation_2(data_observed,options,target)
print(timeit.default_timer() - start_time)

print('save csv')
start_time = timeit.default_timer()
data_observed_train.to_csv(f'{data_folder}{base_name}_gen_train.csv',index=False)
data_observed_test.to_csv(f'{data_folder}{base_name}_gen_test.csv',index=False)
# data_observed.to_csv(f'{data_folder}{base_name}_lambda.csv',index= False)
print(timeit.default_timer() - start_time)
# sys.stdout.close()

read csv
0.4526356999995187
0.5078981000115164
0.08322460000636056
{'entities': [{'name': 'current_date', 'type': 'datetime', 'description': 'Date of reporting', 'order': 0, 'observed': True, 'constraints': []}, {'name': 'ALARM', 'type': 'bool', 'description': 'Risk of low quality air', 'order': 0, 'observed': True, 'constraints': []}, {'name': 'CO_Mean', 'type': 'float', 'description': 'CO Mean', 'order': 0, 'observed': True, 'constraints': []}, {'name': 'CO_Min', 'type': 'float', 'description': 'CO Min', 'order': 0, 'observed': True, 'constraints': []}, {'name': 'CO_Max', 'type': 'float', 'description': 'CO Max', 'order': 0, 'observed': True, 'constraints': []}, {'name': 'CO_Std', 'type': 'float', 'description': 'CO Std', 'order': 0, 'observed': True, 'constraints': []}, {'name': 'NO2_Mean', 'type': 'float', 'description': 'NO2 Mean', 'order': 0, 'observed': True, 'constraints': []}, {'name': 'NO2_Min', 'type': 'float', 'description': 'NO2 Min', 'order': 0, 'observed': True, 'constra