In [None]:
from cassandra.cluster import Cluster
import csv
import uuid


def fetch_data_from_cassandra():
    
    cluster = Cluster(["localhost"])
    session = cluster.connect()
    session.set_keyspace('customer_contract_management')

    
    rows_query = "SELECT COUNT(*) FROM clients"
    rows_count = session.execute(rows_query).one()[0]

    
    if rows_count > 100:
        fetch_limit = 50
    elif rows_count > 50:
        fetch_limit = 20
    else:
        fetch_limit = rows_count

    
    query = """
        SELECT client_id, name, address, contact_number, 
               area_type_id, classification_id, client_status_id
        FROM clients
        LIMIT %s
    """
    clients = session.execute(query, [fetch_limit])
    clients = list(clients)  

    
    area_types_query = """
        SELECT area_type_id, area_type, description FROM area_types
    """
    area_types = session.execute(area_types_query)
    area_types = list(area_types)  

    
    classifications_query = """
        SELECT classification_id, classification, description FROM classifications
    """
    classifications = session.execute(classifications_query)
    classifications = list(classifications)  
    client_statuses_query = """
        SELECT client_status_id, account_status, description FROM client_statuses
    """
    client_statuses = session.execute(client_statuses_query)
    client_statuses = list(client_statuses)  
    contracts_query = """
        SELECT contract_id, client_id, area_type_id, contract_type_id, start_date, end_date,
               status_id, base_fee, last_updated, reason_for_status
        FROM contracts
        LIMIT %s
    """
    contracts = session.execute(contracts_query, [fetch_limit])
    contracts = list(contracts)  
    contract_statuses_query = """
        SELECT status_id, status, description FROM contract_statuses
    """
    contract_statuses = session.execute(contract_statuses_query)
    contract_statuses = list(contract_statuses)  
    contract_policies_query = """
        SELECT contract_id, policy_id FROM contract_policies
        LIMIT %s
    """
    contract_policies = session.execute(contract_policies_query, [fetch_limit])
    contract_policies = list(contract_policies)  
    policies_query = """
        SELECT policy_id, policy_name, policy_details, effective_date
        FROM regulatory_policies
    """
    policies = session.execute(policies_query)
    policies = list(policies)  

    return clients, area_types, classifications, client_statuses, contracts, contract_statuses, contract_policies, policies



def merge_and_export_to_csv():
    # Fetch data from Cassandra
    clients, area_types, classifications, client_statuses, contracts, contract_statuses, contract_policies, policies = fetch_data_from_cassandra()

    # Create dictionaries for quick lookups
    area_types_dict = {at.area_type_id: at for at in area_types}
    classifications_dict = {cl.classification_id: cl for cl in classifications}
    client_statuses_dict = {cs.client_status_id: cs for cs in client_statuses}
    contract_statuses_dict = {cs.status_id: cs for cs in contract_statuses}
    policies_dict = {policy.policy_id: policy for policy in policies}

    # Create a list for merged rows
    merged_rows = []

    # Merge data from Clients, Contracts, Contract Policies, and Policies
    for client in clients:
        # Get the related area type, classification, and client status for this client
        area_type = area_types_dict.get(client.area_type_id)
        classification = classifications_dict.get(client.classification_id)
        client_status = client_statuses_dict.get(client.client_status_id)

        # Check if the related records exist
        if area_type and classification and client_status:
            print(f"Merging client: {client.client_id}")
        else:
            print(f"Skipping client: {client.client_id} - Missing related data")
        # Find the related contracts for this client
        client_contracts = [
            contract for contract in contracts if contract.client_id == client.client_id]

        for contract in client_contracts:
            # Get the related contract status for this contract
            contract_status = contract_statuses_dict.get(contract.status_id)

            # Find related contract policies for this contract
            related_policies = [
                cp for cp in contract_policies if cp.contract_id == contract.contract_id]

            # Merge each contract with the related policies
            for cp in related_policies:
                policy = policies_dict.get(cp.policy_id)

                # Merge client, contract, and policy data
                merged_rows.append([
                    client.client_id, client.name, client.address, client.contact_number,
                    area_type.area_type if area_type else "Unknown", area_type.description if area_type else "Unknown",
                    classification.classification if classification else "Unknown", classification.description if classification else "Unknown",
                    client_status.account_status if client_status else "Unknown", client_status.description if client_status else "Unknown",
                    contract.contract_id, contract.start_date, contract.end_date, contract.base_fee, contract.last_updated,
                    contract_status.status if contract_status else "Unknown", contract_status.description if contract_status else "Unknown",
                    policy.policy_name if policy else "Unknown", policy.policy_details if policy else "Unknown", policy.effective_date if policy else "Unknown"
                ])

    # Write merged data to a CSV file
    with open('merged_cassandra.csv', mode='w', newline='') as file:
        writer = csv.writer(file)
        # Write the header row
        writer.writerow([
            "ClientID", "ClientName", "Address", "ContactNumber", "AreaType", "AreaDescription",
            "Classification", "ClassificationDescription", "AccountStatus", "StatusDescription",
            "ContractID", "StartDate", "EndDate", "BaseFee", "LastUpdated", "ContractStatus",
            "ContractStatusDescription", "PolicyName", "PolicyDetails", "EffectiveDate"
        ])

        # Write merged rows
        for row in merged_rows:
            writer.writerow(row)

    print("CSV file 'merged_cassandra.csv' has been created successfully!")


if __name__ == "__main__":
    merge_and_export_to_csv()

In [18]:
from cassandra.cluster import Cluster
import csv
import uuid


def fetch_data_from_cassandra():
    # Connect to Cassandra
    cluster = Cluster(["localhost"])
    session = cluster.connect()
    session.set_keyspace('customer_contract_management')

    # Fetch the number of rows in the clients table
    rows_query = "SELECT COUNT(*) FROM clients"
    rows_count = session.execute(rows_query).one()[0]

    # Determine fetch limit
    if rows_count > 100:
        fetch_limit = 50
    elif rows_count > 50:
        fetch_limit = 20
    else:
        fetch_limit = rows_count

    # Fetch data from the clients table (without aliases)
    query = """
        SELECT client_id, name, address, contact_number, 
               area_type_id, classification_id, client_status_id
        FROM clients
        LIMIT %s
    """
    clients = session.execute(query, [fetch_limit])
    clients = list(clients)  # Convert to list

    # Fetch data from the area_types table (without aliases)
    area_types_query = """
        SELECT area_type_id, area_type, description FROM area_types
    """
    area_types = session.execute(area_types_query)
    area_types = list(area_types)  # Convert to list

    # Fetch data from the classifications table (without aliases)
    classifications_query = """
        SELECT classification_id, classification, description FROM classifications
    """
    classifications = session.execute(classifications_query)
    classifications = list(classifications)  # Convert to list

    # Fetch data from the client_statuses table (without aliases)
    client_statuses_query = """
        SELECT client_status_id, account_status, description FROM client_statuses
    """
    client_statuses = session.execute(client_statuses_query)
    client_statuses = list(client_statuses)  # Convert to list

    # Fetch Contracts data (without aliases)
    contracts_query = """
        SELECT contract_id, client_id, area_type_id, contract_type_id, start_date, end_date,
               status_id, base_fee, last_updated, reason_for_status
        FROM contracts
        LIMIT %s
    """
    contracts = session.execute(contracts_query, [fetch_limit])
    contracts = list(contracts)  # Convert to list

    # Fetch Contract Statuses (without aliases)
    contract_statuses_query = """
        SELECT status_id, status, description FROM contract_statuses
    """
    contract_statuses = session.execute(contract_statuses_query)
    contract_statuses = list(contract_statuses)  # Convert to list

    # Fetch Contract Policies data separately (without aliases)
    contract_policies_query = """
        SELECT contract_id, policy_id FROM contract_policies
        LIMIT %s
    """
    contract_policies = session.execute(contract_policies_query, [fetch_limit])
    contract_policies = list(contract_policies)  # Convert to list

    # Fetch Regulatory Policies (without aliases)
    policies_query = """
        SELECT policy_id, policy_name, policy_details, effective_date
        FROM regulatory_policies
    """
    policies = session.execute(policies_query)
    policies = list(policies)  # Convert to list

    return clients, area_types, classifications, client_statuses, contracts, contract_statuses, contract_policies, policies


def merge_and_export_to_csv():
    # Fetch data from Cassandra
    clients, area_types, classifications, client_statuses, contracts, contract_statuses, contract_policies, policies = fetch_data_from_cassandra()

    # Debugging: print number of clients and contracts fetched
    print(f"Fetched {len(clients)} clients from the database.")
    print(f"Fetched {len(contracts)} contracts from the database.")

    # Create dictionaries for quick lookups
    area_types_dict = {at.area_type_id: at for at in area_types}
    classifications_dict = {cl.classification_id: cl for cl in classifications}
    client_statuses_dict = {cs.client_status_id: cs for cs in client_statuses}
    contract_statuses_dict = {cs.status_id: cs for cs in contract_statuses}
    policies_dict = {policy.policy_id: policy for policy in policies}

    # Create a list for merged rows
    merged_rows = []

    # Merge data from Clients, Contracts, Contract Policies, and Policies
    for client in clients:
        print(f"Merging client: {client.client_id}")

        # Get the related area type, classification, and client status for this client
        area_type = area_types_dict.get(client.area_type_id)
        classification = classifications_dict.get(client.classification_id)
        client_status = client_statuses_dict.get(client.client_status_id)

        # Check if the related records exist
        if area_type and classification and client_status:
            print(f"Found related data for client: {client.client_id}")
        else:
            print(f"Skipping client {client.client_id} - Missing related data")

        # Find the related contracts for this client
        client_contracts = [
            contract for contract in contracts if contract.client_id == client.client_id
        ]

        # Debugging: Check the number of contracts found for the client
        print(f"Client {client.client_id} has {len(client_contracts)} contracts")

        # If no contracts were found, skip to the next client
        if len(client_contracts) == 0:
            print(f"No contracts found for client {client.client_id}. Skipping.")
            continue

        for contract in client_contracts:
            # Debugging: Check the contract data
            print(f"Found contract {contract.contract_id} for client {client.client_id}")

            # Get the related contract status for this contract
            contract_status = contract_statuses_dict.get(contract.status_id)

            # Find related contract policies for this contract
            related_policies = [
                cp for cp in contract_policies if cp.contract_id == contract.contract_id
            ]

            # Debugging: Check number of policies for this contract
            print(f"Found {len(related_policies)} policies for contract {contract.contract_id}")

            # Merge each contract with the related policies
            for cp in related_policies:
                policy = policies_dict.get(cp.policy_id)

                # Merge client, contract, and policy data
                merged_rows.append([
                    client.client_id, client.name, client.address, client.contact_number,
                    area_type.area_type if area_type else "Unknown", area_type.description if area_type else "Unknown",
                    classification.classification if classification else "Unknown", classification.description if classification else "Unknown",
                    client_status.account_status if client_status else "Unknown", client_status.description if client_status else "Unknown",
                    contract.contract_id, contract.start_date, contract.end_date, contract.base_fee, contract.last_updated,
                    contract_status.status if contract_status else "Unknown", contract_status.description if contract_status else "Unknown",
                    policy.policy_name if policy else "Unknown", policy.policy_details if policy else "Unknown", policy.effective_date if policy else "Unknown"
                ])

    # Write merged data to a CSV file
    with open('merged_cassandra.csv', mode='w', newline='') as file:
        writer = csv.writer(file)
        # Write the header row
        writer.writerow([
            "ClientID", "ClientName", "Address", "ContactNumber", "AreaType", "AreaDescription",
            "Classification", "ClassificationDescription", "AccountStatus", "StatusDescription",
            "ContractID", "StartDate", "EndDate", "BaseFee", "LastUpdated", "ContractStatus",
            "ContractStatusDescription", "PolicyName", "PolicyDetails", "EffectiveDate"
        ])

        # Write merged rows
        for row in merged_rows:
            writer.writerow(row)

    print(f"CSV file 'merged_cassandra.csv' has been created successfully with {len(merged_rows)} rows!")



if __name__ == "__main__":
    merge_and_export_to_csv()

Fetched 50 clients from the database.
Fetched 50 contracts from the database.
Merging client: 7f8b83a9-d6c2-4309-b6c7-8b4ae8cd40ec
Found related data for client: 7f8b83a9-d6c2-4309-b6c7-8b4ae8cd40ec
Client 7f8b83a9-d6c2-4309-b6c7-8b4ae8cd40ec has 0 contracts
No contracts found for client 7f8b83a9-d6c2-4309-b6c7-8b4ae8cd40ec. Skipping.
Merging client: 75ad2f97-c1bc-49e5-ad6b-8681a0b143c9
Found related data for client: 75ad2f97-c1bc-49e5-ad6b-8681a0b143c9
Client 75ad2f97-c1bc-49e5-ad6b-8681a0b143c9 has 0 contracts
No contracts found for client 75ad2f97-c1bc-49e5-ad6b-8681a0b143c9. Skipping.
Merging client: dbe08fa5-9281-4a14-b2a6-48abf1a6d7d3
Found related data for client: dbe08fa5-9281-4a14-b2a6-48abf1a6d7d3
Client dbe08fa5-9281-4a14-b2a6-48abf1a6d7d3 has 0 contracts
No contracts found for client dbe08fa5-9281-4a14-b2a6-48abf1a6d7d3. Skipping.
Merging client: ea614ade-9cee-43ba-bb90-319f7079f8dc
Found related data for client: ea614ade-9cee-43ba-bb90-319f7079f8dc
Client ea614ade-9cee-43b