In [1]:
# from preql.compiler import compile
## set up imports of local module code
import os
nb_path = os.path.abspath("")
from sys import path
from os.path import dirname

path.insert(0,  dirname(nb_path))

# nb_path

from preql.core.models import Select, Grain
from preql.core.query_processor import process_query
from preql.parser import parse
from logging import StreamHandler, INFO
from preql.constants import logger

from pytest import fixture

from preql import Environment
from preql.constants import logger as base_logger
from preql.core.enums import (
    DataType,
    Purpose,
    FunctionType,
    ComparisonOperator,
    WindowType,
)
from preql.core.env_processor import generate_graph
from preql.core.functions import Count, CountDistinct, Max, Min
from preql.core.models import (
    Concept,
    Datasource,
    ColumnAssignment,
    Function,
    Grain,
    WindowItem,
    FilterItem,
    OrderItem,
    WhereClause,
    Comparison,
)

logger.setLevel(INFO)
logger.addHandler(StreamHandler())
def test_environment():
    env = Environment()
    order_id = Concept(name="order_id", datatype=DataType.INTEGER, purpose=Purpose.KEY)

    order_timestamp = Concept(
        name="order_timestamp", datatype=DataType.TIMESTAMP, purpose=Purpose.PROPERTY
    )

    order_count = Concept(
        name="order_count",
        datatype=DataType.INTEGER,
        purpose=Purpose.METRIC,
        lineage=Count([order_id]),
    )

    distinct_order_count = Concept(
        name="distinct_order_count",
        datatype=DataType.INTEGER,
        purpose=Purpose.METRIC,
        lineage=CountDistinct([order_id]),
    )

    max_order_id = Concept(
        name="max_order_id",
        datatype=DataType.INTEGER,
        purpose=Purpose.METRIC,
        lineage=Max([order_id]),
    )

    min_order_id = Concept(
        name="min_order_id",
        datatype=DataType.INTEGER,
        purpose=Purpose.METRIC,
        lineage=Min([order_id]),
    )

    revenue = Concept(name="revenue", datatype=DataType.FLOAT, purpose=Purpose.PROPERTY)

    total_revenue = Concept(
        name="total_revenue",
        datatype=DataType.FLOAT,
        purpose=Purpose.METRIC,
        lineage=Function(
            arguments=[revenue],
            output_datatype=DataType.FLOAT,
            output_purpose=Purpose.METRIC,
            operator=FunctionType.SUM,
        ),
    )
    product_id = Concept(
        name="product_id", datatype=DataType.INTEGER, purpose=Purpose.KEY
    )

    assert product_id.grain.components[0].name == "product_id"

    category_id = Concept(
        name="category_id", datatype=DataType.INTEGER, purpose=Purpose.KEY
    )
    category_name = Concept(
        name="category_name",
        datatype=DataType.STRING,
        purpose=Purpose.PROPERTY,
        grain=category_id,
    )

    category_name_length = Concept(
        name="category_name_length",
        datatype=DataType.INTEGER,
        purpose=Purpose.PROPERTY,
        grain=category_id,
        lineage=Function(
            arguments=[category_name],
            output_datatype=DataType.INTEGER,
            output_purpose=Purpose.PROPERTY,
            operator=FunctionType.LENGTH,
        ),
    )

    product_revenue_rank = Concept(
        name="product_revenue_rank",
        datatype=DataType.INTEGER,
        purpose=Purpose.PROPERTY,
        lineage=WindowItem(
            type=WindowType.RANK,
            content=product_id,
            order_by=[OrderItem(expr=total_revenue, order="desc")],
        ),
    )
    product_revenue_rank_by_category = Concept(
        name="product_revenue_rank_by_category",
        datatype=DataType.INTEGER,
        purpose=Purpose.PROPERTY,
        lineage=WindowItem(
            type=WindowType.RANK,
            content=product_id,
            over=[category_id],
            order_by=[OrderItem(expr=total_revenue, order="desc")],
        ),
    )

    products_with_revenue_over_50 = Concept(
        name="products_with_revenue_over_50",
        datatype=DataType.INTEGER,
        purpose=Purpose.PROPERTY,
        lineage=FilterItem(
            content=product_id,
            where=WhereClause(
                conditional=Comparison(
                    left=total_revenue, operator=ComparisonOperator.GT, right=50
                )
            ),
        ),
    )
    test_revenue = Datasource(
        identifier="revenue",
        columns=[
            ColumnAssignment(alias="revenue", concept=revenue),
            ColumnAssignment(alias="order_id", concept=order_id),
            ColumnAssignment(alias="product_id", concept=product_id),
            ColumnAssignment(alias="order_timestamp", concept=order_timestamp),
        ],
        address="tblRevenue",
        grain=Grain(components=[order_id]),
    )

    test_product = Datasource(
        identifier="products",
        columns=[
            ColumnAssignment(alias="product_id", concept=product_id),
            ColumnAssignment(alias="category_id", concept=category_id),
        ],
        address="tblProducts",
        grain=Grain(components=[product_id]),
    )

    test_category = Datasource(
        identifier="category",
        columns=[
            ColumnAssignment(alias="category_id", concept=category_id),
            ColumnAssignment(alias="category_name", concept=category_name),
        ],
        address="tblCategory",
        grain=Grain(components=[category_id]),
    )

    for item in [test_product, test_category, test_revenue]:
        env.add_datasource(item)

    for item in [
        category_id,
        category_name,
        category_name_length,
        total_revenue,
        revenue,
        product_id,
        order_id,
        order_count,
        order_timestamp,
        distinct_order_count,
        min_order_id,
        max_order_id,
        product_revenue_rank,
        product_revenue_rank_by_category,
        products_with_revenue_over_50,
    ]:
        env.add_concept(item)
        # env.concepts[item.name] = item
    return env

env = test_environment()

TEST_SETUP = r"""

key item string;
key value float;
key count int;
key store_id int;

key test_upper_case_2 <- CASE WHEN category_name = upper(category_name) then True else False END;

persist bool_is_upper_name into upper_name from
select
    test_upper_case_2
;

select 
test_upper_case_2;
"""


env, parsed = parse(TEST_SETUP, environment=env)
select: Select = parsed[-1]

process_query(statement=select, environment=env)


[QUERY BUILD] getting source datasource for query with output ['local.test_upper_case_2<>']
[CONCEPT DETAIL] Beginning sourcing loop for ['local.test_upper_case_2<>']
	[CONCEPT DETAIL] found direct select node with all 1 concepts, returning static selection
[CONCEPT DETAIL] finished a loop iteration looking for ['local.test_upper_case_2'] from [SelectNode<local.test_upper_case_2>], have ['local.test_upper_case_2']
[CONCEPT DETAIL] have all concepts, have ['local.test_upper_case_2'] from [SelectNode<local.test_upper_case_2>] checking for single connected graph
[CONCEPT DETAIL] Graph analysis: 1 subgraphs found
[CONCEPT DETAIL] One fully connected subgraph returned, sourcing ['local.test_upper_case_2'] successful.
[CONCEPT DETAIL - MERGE NODE] Merge node has only one parent with the same outputs as this merge node, dropping merge node 


ProcessedQuery(output_columns=[Concept(name='test_upper_case_2', datatype=<DataType.BOOL: 'bool'>, purpose=<Purpose.KEY: 'key'>, metadata=Metadata(description=None, line_number=4, concept_source=<ConceptSource.MANUAL: 'manual'>), lineage=Function(operator=<FunctionType.CASE: 'case'>, arg_count=-1, output_datatype=<DataType.BOOL: 'bool'>, output_purpose=<Purpose.PROPERTY: 'property'>, valid_inputs=None, arguments=[CaseWhen(comparison=local.category_name<local.category_id> = upper(local.category_name<local.category_id>), expr=True), CaseElse(expr=False)]), namespace='local', keys=None, grain=Grain(components=[], nested=False))], ctes=[CTE(name='cte_category_at_local_category_id_4330417665674466', source=QueryDatasource(input_concepts=[Concept(name='category_name', datatype=<DataType.STRING: 'string'>, purpose=<Purpose.PROPERTY: 'property'>, metadata=Metadata(description=None, line_number=None, concept_source=<ConceptSource.MANUAL: 'manual'>), lineage=None, namespace='local', keys=None, g