In [None]:
import great_expectations as gx
import pandas as pd

In [None]:
context = gx.get_context()

In [None]:
data_source = context.data_sources.add_pandas(name="raw_stock_data")
data_asset = data_source.add_dataframe_asset(name="raw_stock_data_asset")

In [None]:
batch_definition_name = "raw_stock_data_batch"
batch_definition = data_asset.add_batch_definition_whole_dataframe(name=batch_definition_name)

In [None]:
batch_parameters = {"dataframe": pd.read_parquet("../data/raw/stock_data.parquet")}
batch = batch_definition.get_batch(batch_parameters=batch_parameters)

In [None]:
df = pd.read_parquet("../data/raw/stock_data.parquet")
df.head()

In [None]:
expectation_suite_name = "stock_data_expectation_suite"
suite = gx.ExpectationSuite(name=expectation_suite_name)

suite.add_expectation(
    gx.expectations.ExpectColumnValuesToNotBeNull(column="symbol")
)
suite.add_expectation(
    gx.expectations.ExpectTableColumnsToMatchSet(column_set=["date", "open", "high", "low", "close", "volume", "symbol", "adj_close"])
)

In [None]:
# --- Schema: exact columns present (order-insensitive) ---
suite.add_expectation(
    gx.expectations.ExpectTableColumnsToMatchSet(
        column_set=["date", "symbol", "open", "high", "low", "close", "volume", "adj_close"]
    )
)

# --- Basic completeness ---
suite.add_expectation(gx.expectations.ExpectColumnValuesToNotBeNull(column="date"))
suite.add_expectation(gx.expectations.ExpectColumnValuesToNotBeNull(column="symbol"))
suite.add_expectation(gx.expectations.ExpectColumnValuesToNotBeNull(column="open"))
suite.add_expectation(gx.expectations.ExpectColumnValuesToNotBeNull(column="close"))
suite.add_expectation(gx.expectations.ExpectColumnValuesToNotBeNull(column="volume"))

# --- Data types (Pandas dtypes shown; adjust if your backend differs) ---
suite.add_expectation(
    gx.expectations.ExpectColumnValuesToBeOfType(column="date", type_="datetime64[ns]")
)
suite.add_expectation(
    gx.expectations.ExpectColumnValuesToBeOfType(column="symbol", type_="object")
)
suite.add_expectation(
    gx.expectations.ExpectColumnValuesToBeOfType(column="open", type_="float64")
)
suite.add_expectation(
    gx.expectations.ExpectColumnValuesToBeOfType(column="high", type_="float64")
)
suite.add_expectation(
    gx.expectations.ExpectColumnValuesToBeOfType(column="low", type_="float64")
)
suite.add_expectation(
    gx.expectations.ExpectColumnValuesToBeOfType(column="close", type_="float64")
)
suite.add_expectation(
    gx.expectations.ExpectColumnValuesToBeOfType(column="adj_close", type_="float64")
)
suite.add_expectation(
    gx.expectations.ExpectColumnValuesToBeOfType(column="volume", type_="int64")
)

# --- Logical price relationships ---
suite.add_expectation(
    gx.expectations.ExpectColumnPairValuesAToBeGreaterThanB(column_A="high", column_B="low")
)

# --- Uniqueness & row count ---
suite.add_expectation(
    gx.expectations.ExpectCompoundColumnsToBeUnique(column_list=["date", "symbol"])
)
suite.add_expectation(
    gx.expectations.ExpectTableRowCountToBeBetween(min_value=1)
)

# --- Allowed symbols
suite.add_expectation(
    gx.expectations.ExpectColumnValuesToBeInSet(
        column="symbol",
        value_set=["AAPL", "MSFT", "AMZN", "GOOGL", "META", "NVDA", "TSLA"],
    )
)

In [None]:
context.suites.add(suite)

In [None]:
validation_result = batch.validate(suite)
print(validation_result)