In [0]:
%load_ext autoreload
%autoreload 2
# Enables autoreload; learn more at https://docs.databricks.com/en/files/workspace-modules.html#autoreload-for-python-modules
# To disable autoreload; run %autoreload 0

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [0]:
import json
import os

from logger import log_message

In [0]:
base_path = "file:/Workspace/Users/masa.cirkovic@abo.fi/"
raw_data_path = base_path + "rawdata/"

In [0]:
# Bronze Contracts
bronze_car_sales_contract = {
    "version": "1.0",
    "layer": "bronze",
    "source": f"{raw_data_path}car_sales_data.csv",
    "format": "csv",
    "ingestion_process": "batch_ingestion",
    "schema": {
        "_c0": "int",
        "car": "string",
        "date": "string",
        "dealer_name": "string",
        "dealer_tegion": "string",
        "company": "string",
        "model": "string",
        "engine": "string",
        "transmission": "string",
        "color_car": "string",
        "body_style": "string",
        "price_$": "string"
    }
}

bronze_companies_contract = {
    "version": "1.0",
    "layer": "bronze",
    "source": f"{raw_data_path}companies_data.json",
    "format": "json",
    "ingestion_process": "batch_ingestion",
    "schema": {
        "company_id": "string",
        "company_name": "string",
        "company_type": "string",
        "founding_year": "int",
        "headquarters": "string"
    }
}

bronze_customers_contract = {
    "version": "1.0",
    "layer": "bronze",
    "source": f"{raw_data_path}customers_data.csv",
    "format": "csv",
    "ingestion_process": "batch_ingestion",
    "schema": {
        "car_id": "string",
        "name": "string",
        "gender": "string",
        "annual_income": "int"
    }
}

In [0]:
# Silver Contracts
silver_car_sales_contract = {
    "version": "1.0",
    "layer": "silver",
    "source": "bronze_car_sales_data",
    "format": "dataframe",
    "schema": {
        "car_id": {"type": "string", "rules": ["not_null"]},
        "date": {"type": "date", "rules": ["not_null"]},
        "dealer_name": {"type": "string", "rules": ["not_null"]},
        "dealer_region": {"type": "string", "rules": ["not_null"]},
        "company_id": {"type": "string", "rules": ["not_null"]},
        "model": {"type": "string", "rules": ["not_null"]},
        "engine": {"type": "string", "rules": ["not_null"]},
        "transmission": {"type": "string", "rules": ["not_null"]},
        "color": {"type": "string", "rules": ["not_null"]},
        "body_style": {"type": "string", "rules": ["not_null"]},
        "price_$": {"type": "int", "rules": ["greater_than_0", "not_null"]}
    }
}

silver_companies_contract = {
    "version": "1.0",
    "layer": "silver",
    "source": "bronze_companies_data",
    "format": "dataframe",
    "schema": {
        "company_id": {"type": "string", "rules": ["not_null"]},
        "company_name": {"type": "string", "rules": ["not_null"]},
        "company_type": {"type": "string", "rules": ["not_null"]},
        "founding_year": {"type": "int", "rules": ["not_null"]},
        "headquarters": {"type": "string", "rules": ["not_null"]}
    }
}

silver_customers_contract = {
    "version": "1.0",
    "layer": "silver",
    "source": "bronze_customers_data",
    "format": "dataframe",
    "schema": {
        "car_id": {"type": "string", "rules": ["not_null"]},
        "name": {"type": "string", "rules": ["not_null"]},
        "gender": {"type": "string", "rules": ["not_null"]},
        "annual_income_$": {"type": "int", "rules": ["greater_than_0", "not_null"]}
    }
}

In [0]:
# Golden Layer Contract
golden_contract = {
    "version": "1.0",
    "layer": "golden",
    "source": ["silver_car_sales_data", "silver_companies_data", "silver_customers_data"],
    "format": "dataframe",
    "schema": {
        "car_id": {"type": "string", "rules": ["not_null"]},
        "date": {"type": "date", "rules": ["not_null"]},
        "dealer_name": {"type": "string", "rules": ["not_null"]},
        "dealer_region": {"type": "string", "rules": ["not_null"]},
        "company_id": {"type": "string", "rules": ["not_null"]},
        "model": {"type": "string", "rules": ["not_null"]},
        "engine": {"type": "string", "rules": ["not_null"]},
        "transmission": {"type": "string", "rules": ["not_null"]},
        "color": {"type": "string", "rules": ["not_null"]},
        "body_style": {"type": "string", "rules": ["not_null"]},
        "price_$": {"type": "int", "rules": ["greater_than_0", "not_null"]},
        "company_name": {"type": "string", "rules": ["not_null"]},
        "company_type": {"type": "string", "rules": ["not_null"]},
        "founding_year": {"type": "int", "rules": ["not_null"]},
        "headquarters": {"type": "string", "rules": ["not_null"]},
        "name": {"type": "string", "rules": ["not_null"]},
        "gender": {"type": "string", "rules": ["not_null"]},
        "annual_income_$": {"type": "int", "rules": ["greater_than_0", "not_null"]}
    }
}

In [0]:
contracts = {
    "bronze_car_sales_contract.json": bronze_car_sales_contract,
    "bronze_companies_contract.json": bronze_companies_contract,
    "bronze_customers_contract.json": bronze_customers_contract,
    "silver_car_sales_contract.json": silver_car_sales_contract,
    "silver_companies_contract.json": silver_companies_contract,
    "silver_customers_contract.json": silver_customers_contract,
    "golden_contract.json": golden_contract
}

contract_path = base_path.split("file:")[1] + "contracts/"
os.makedirs(contract_path, exist_ok=True)

# Save each contract to DBFS
for filename, contract in contracts.items():
    try:
        with open(f"{contract_path}{filename}", "w") as file:
            json.dump(contract, file, indent=4)
        log_message("info", "data_contracts", "saving contracts", f"Saved at {contract_path}{filename}")
    except Exception as e:
        log_message("error", "data_contracts", "saving contracts", str(e))

print("All 7 contracts successfully saved in", contract_path)

All 7 contracts successfully saved in /Workspace/Users/masa.cirkovic@abo.fi/contracts/


In [0]:
path = base_path + "contracts/"
display(dbutils.fs.ls(path))

path,name,size,modificationTime
file:/Workspace/Users/masa.cirkovic@abo.fi/contracts/bronze_car_sales_contract.json,bronze_car_sales_contract.json,572,1742469590237
file:/Workspace/Users/masa.cirkovic@abo.fi/contracts/bronze_companies_contract.json,bronze_companies_contract.json,389,1742469590850
file:/Workspace/Users/masa.cirkovic@abo.fi/contracts/bronze_customers_contract.json,bronze_customers_contract.json,335,1742469591347
file:/Workspace/Users/masa.cirkovic@abo.fi/contracts/silver_car_sales_contract.json,silver_car_sales_contract.json,1562,1742469591800
file:/Workspace/Users/masa.cirkovic@abo.fi/contracts/silver_companies_contract.json,silver_companies_contract.json,786,1742469592148
file:/Workspace/Users/masa.cirkovic@abo.fi/contracts/silver_customers_contract.json,silver_customers_contract.json,673,1742469592477
file:/Workspace/Users/masa.cirkovic@abo.fi/contracts/golden_contract.json,golden_contract.json,2579,1742469592786
