<a href="https://colab.research.google.com/github/samer-glitch/Federated-Governance-and-Provenance-Scoring-for-Trustworthy-AI-A-Metadata-Ledger-Approach/blob/main/Branch_D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive, files
drive.mount("/content/drive", force_remount=True)

import os, uuid, datetime, json, shutil, io, re, random
import pandas as pd, numpy as np

LEDGER_DIR = "/content/drive/MyDrive/TADP_Ledgers/"
os.makedirs(LEDGER_DIR, exist_ok=True)

WEIGHTS = dict(dim1=.20, dim2=.20, dim3=.20, dim4=.15, dim5=.25)
policy  = lambda s: ("High","ACCEPT") if s>=3.75 else \
                    ("Moderate","REVIEW") if s>=3.0  else \
                    ("Low‑Moderate","QUARANTINE")

def push_bundle(client_id, version_id,
                dim1, dim2, dim3, dim4, dim5, licence_note="research"):
    up   = files.upload()
    fname= next(iter(up))
    df   = pd.read_csv(io.BytesIO(up[fname]))
    print(f"Rows in {fname}: {len(df):,}")

    dataset_id = re.sub(r"[()\s]", "_", os.path.splitext(fname)[0])

    mean = lambda d: np.mean([v if isinstance(v,(int,float)) else v["score"]
                              for v in d.values()])
    av   = {"dim1":mean(dim1()), "dim2":mean(dim2(df)),
            "dim3":mean(dim3()), "dim4":mean(dim4()), "dim5":mean(dim5())}
    p    = round(sum(WEIGHTS[k]*v for k,v in av.items()),2)
    band,msg = policy(p)

    bundle = {
        "tx_id": str(uuid.uuid4()),
        "timestamp": datetime.datetime.utcnow().isoformat(),
        "client_id": client_id,
        "dataset_id": dataset_id,
        "dataset_name": fname,
        "pscore": p,
        "trust_category": band,
        "rationale": msg,
        "version_id": version_id,
        "record_count": len(df),
        "dimension_details":{
            "Dimension1":{"attrs":dim1(),"avg":av["dim1"]},
            "Dimension2":{"attrs":dim2(df),"avg":av["dim2"]},
            "Dimension3":{"attrs":dim3(),"avg":av["dim3"]},
            "Dimension4":{"attrs":dim4(),"avg":av["dim4"]},
            "Dimension5":{"attrs":dim5(),"avg":av["dim5"]},
            "Context":{"Licence":licence_note}}
    }

    # append / create ledger
    ledger = f"{client_id}_ledger.csv"
    pd.DataFrame([bundle]).to_csv(
        ledger, mode="a", header=not os.path.exists(ledger), index=False)
    json_name = fname.replace(".csv",".json")
    with open(json_name,"w") as fp: json.dump(bundle, fp, indent=2)
    shutil.copy(ledger,     LEDGER_DIR+ledger)
    shutil.copy(json_name,  LEDGER_DIR+json_name)
    print(f"🏁  {fname} uploaded | PScore {p} ({band})\n")


Mounted at /content/drive


In [None]:
# ---------- Branch D  ·  Batch #1  (Low‑Moderate) ----------
CLIENT_ID, VERSION_ID = "BranchD", "v1.0"

dim1 = lambda: {"SourceReputation":2,"DataController":2,
                "AuditsCerts":1,"DataObjective":2,"CollectionMethod":2}

dim3 = lambda: {"DataDictionary":2,"VersionLogs":1,
                "CollectionProtocol":2,"UpdatesOnDefinitions":1}

dim4 = lambda: {"DataFreshness":2,"ScheduledRefresh":1,"RetentionClarity":1}

dim5 = lambda: {"RegCoverage":2,"ConsentEthics":2,"GeoLocation":2,
                "Sensitivity":2,"Audits":1}

dim2 = lambda df: {"Completeness":{"score":2},
                   "DuplicationRate":{"score":3},
                   "ErrorRates":{"score":3},
                   "Consistency":{"score":2}}

push_bundle(CLIENT_ID, VERSION_ID, dim1, dim2, dim3, dim4, dim5,
            licence_note="testing‑only")


Saving D1.csv to D1.csv
Rows in D1.csv: 2,279
🏁  D1.csv uploaded | PScore 1.81 (Low‑Moderate)



In [None]:
# ---------- Branch D  ·  Batch #2  (High) ----------
CLIENT_ID, VERSION_ID = "BranchD", "v1.1"

dim1 = lambda: {"SourceReputation":5,"DataController":4,
                "AuditsCerts":4,"DataObjective":5,"CollectionMethod":5}

dim3 = lambda: {"DataDictionary":5,"VersionLogs":4,
                "CollectionProtocol":5,"UpdatesOnDefinitions":5}

dim4 = lambda: {"DataFreshness":5,"ScheduledRefresh":4,"RetentionClarity":4}

dim5 = lambda: {"RegCoverage":5,"ConsentEthics":5,"GeoLocation":5,
                "Sensitivity":5,"Audits":4}

dim2 = lambda df: {"Completeness":{"score":5},
                   "DuplicationRate":{"score":5},
                   "ErrorRates":{"score":5},
                   "Consistency":{"score":5}}

push_bundle(CLIENT_ID, VERSION_ID, dim1, dim2, dim3, dim4, dim5,
            licence_note="production")


Saving D2.csv to D2.csv
Rows in D2.csv: 1,631
🏁  D2.csv uploaded | PScore 4.72 (High)

