In [1]:
# Autoreloading
%load_ext autoreload
%autoreload 2

In [1]:
# Imports
from cluster_client.manager import ClusterManager
from cluster_client.model.field_expressions import *
from cluster_client.model.aggregate_expressions import *
from cluster_client.upload.cassandra import *
from cluster_client.connector.cassandra import CassandraConnector
from cluster_client.test_data.generate_test_loan_data import loan_amortisation_generator, loan_amortisation_metadata, loan_origination_generator, loan_origination_metadata

In [None]:
# Upload
connector = CassandraConnector("localhost", 9042)
upload_handler = CassandraUploadHandler(connector)
upload_handler.create_from_csv(r"D:\uni\Y4S1\Project\forex.csv", "test", "forex_table", ["slug"], ["date"])

In [None]:
# Upload from iterator
connector = CassandraConnector("localhost", 9042)
upload_handler = CassandraUploadHandler(connector)
(column_names, column_types) = loan_origination_metadata()
upload_handler.drop_table("origination", "data").create_from_iterator(loan_origination_generator(1000), "origination", "data", column_names, column_types, ["loan_id"])

In [2]:
# Setup cluster manager
CLUSTER_ADDRESS = "localhost"
manager = ClusterManager(CLUSTER_ADDRESS)

In [None]:
# Original Table
result = manager.cassandra_table("origination", "data").evaluate()
result.get_dataframe()

In [None]:
# Select Expression
# Syntax: .select(list, of, expressions)

# Expressions:
# - F("my_field") references the field "my_field"
# - V("value") references the string "value"
# - String Operations:
#       (Function.Left(Function.ToString(F("origination_date")), 9)).as_name("Date_string")
#       (Function.ToString(F("interest_rate") * 100) + "%").as_name("%_interest_rate")
# - Arithmetic operations:
#       F("duration") + V(2)
#       F("interest_rate") * V(2)

select = manager.cassandra_table("origination", "data").select(
    F("Loan_ID"),
    Function.Left(Function.ToString(F("origination_date")), 10).as_name("date_string"), 
    (Function.ToString(F("interest_rate") * 100) + "%").as_name("%_interest_rate")
).evaluate()
select.get_dataframe()

In [None]:
# Filter Expression
# Syntax: .filter(single_expression)

# Comparisons:
# - Single comparisons:
#       Numeric: < > >= <=
#       Equality: ==, !=
#       String Operations: .contains(), .starts_with(), .ends_with() (and case insensitive versions)
#       Null checks: .is_null(), .is_not_null()
# - Combining comparisons (with AND/OR):
#       (Comparison) & (Comparison) (NOTE: brackets are required)
#       (Comparison) | (Comparison)

filter = manager.cassandra_table("origination", "data").filter((Function.Left(Function.ToString(F("origination_date")), 10).contains("2021-11-")) & (F("duration") > 26)).evaluate()
filter.get_dataframe()

In [None]:
# Group By Expression
# Syntax: .group_by([group, by, expressions]) OR
#         .group_by([group, by, expressions], [aggregate, expressions])

# Aggregate Expressions:
# - Max, Min, Sum, Avg, Count, Distinct Count, String Concat, Distinct String Concat

group_by = manager.cassandra_table("origination", "data").group_by([F("duration")], [Max(F("origination_date"))]).evaluate()
group_by.get_dataframe()