<a href="https://colab.research.google.com/github/samer-glitch/Federated-Governance-and-Provenance-Scoring-for-Trustworthy-AI-A-Metadata-Ledger-Approach/blob/main/Branch_A.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive, files
drive.mount("/content/drive", force_remount=True)

import os, uuid, datetime, json, shutil, io, re, random
import pandas as pd, numpy as np

LEDGER_DIR = "/content/drive/MyDrive/TADP_Ledgers/"
os.makedirs(LEDGER_DIR, exist_ok=True)

WEIGHTS = dict(dim1=.20, dim2=.20, dim3=.20, dim4=.15, dim5=.25)
policy  = lambda s: ("High","ACCEPT") if s>=3.75 else \
                    ("Moderate","REVIEW") if s>=3.0  else \
                    ("Low‑Moderate","QUARANTINE")

def push_bundle(client_id, version_id,
                dim1, dim2, dim3, dim4, dim5, licence_note="research"):
    up   = files.upload()
    fname= next(iter(up))
    df   = pd.read_csv(io.BytesIO(up[fname]))
    print(f"Rows in {fname}: {len(df):,}")

    dataset_id = re.sub(r"[()\s]", "_", os.path.splitext(fname)[0])

    mean = lambda d: np.mean([v if isinstance(v,(int,float)) else v["score"]
                              for v in d.values()])
    av   = {"dim1":mean(dim1()), "dim2":mean(dim2(df)),
            "dim3":mean(dim3()), "dim4":mean(dim4()), "dim5":mean(dim5())}
    p    = round(sum(WEIGHTS[k]*v for k,v in av.items()),2)
    band,msg = policy(p)

    bundle = {
        "tx_id": str(uuid.uuid4()),
        "timestamp": datetime.datetime.utcnow().isoformat(),
        "client_id": client_id,
        "dataset_id": dataset_id,
        "dataset_name": fname,
        "pscore": p,
        "trust_category": band,
        "rationale": msg,
        "version_id": version_id,
        "record_count": len(df),
        "dimension_details":{
            "Dimension1":{"attrs":dim1(),"avg":av["dim1"]},
            "Dimension2":{"attrs":dim2(df),"avg":av["dim2"]},
            "Dimension3":{"attrs":dim3(),"avg":av["dim3"]},
            "Dimension4":{"attrs":dim4(),"avg":av["dim4"]},
            "Dimension5":{"attrs":dim5(),"avg":av["dim5"]},
            "Context":{"Licence":licence_note}}
    }

    # append / create ledger
    ledger = f"{client_id}_ledger.csv"
    pd.DataFrame([bundle]).to_csv(
        ledger, mode="a", header=not os.path.exists(ledger), index=False)
    json_name = fname.replace(".csv",".json")
    with open(json_name,"w") as fp: json.dump(bundle, fp, indent=2)
    shutil.copy(ledger,     LEDGER_DIR+ledger)
    shutil.copy(json_name,  LEDGER_DIR+json_name)
    print(f"🏁  {fname} uploaded | PScore {p} ({band})\n")


Mounted at /content/drive


In [2]:
CLIENT_ID, VERSION_ID = "BranchA", "v1.0"

dim1 = lambda: {"SourceReputation":5,"DataController":5,"AuditsCerts":5,
                "DataObjective":5,"CollectionMethod":4}
dim3 = lambda: {"DataDictionary":5,"VersionLogs":4,"CollectionProtocol":5,
                "UpdatesOnDefinitions":4}
dim4 = lambda: {"DataFreshness":4,"ScheduledRefresh":4,"RetentionClarity":4}
dim5 = lambda: {"RegCoverage":5,"ConsentEthics":5,"GeoLocation":4,
                "Sensitivity":4,"Audits":4}
dim2 = lambda df: {"Completeness":{"score":5},"DuplicationRate":{"score":5},
                   "ErrorRates":{"score":5},"Consistency":{"score":4}}

push_bundle(CLIENT_ID, VERSION_ID, dim1, dim2, dim3, dim4, dim5, "clinical‑research")


Saving A1.csv to A1.csv
Rows in A1.csv: 8,140
🏁  A1.csv uploaded | PScore 4.51 (High)



In [3]:
CLIENT_ID, VERSION_ID = "BranchA", "v1.1"

rand45 = lambda: random.choice([4,5])
dim1 = lambda: {"SourceReputation":5,"DataController":rand45(),
                "AuditsCerts":5,"DataObjective":5,"CollectionMethod":rand45()}
dim3 = lambda: {"DataDictionary":5,"VersionLogs":rand45(),
                "CollectionProtocol":5,"UpdatesOnDefinitions":rand45()}
dim4 = lambda: {"DataFreshness":5,"ScheduledRefresh":4,"RetentionClarity":4}
dim5 = lambda: {"RegCoverage":5,"ConsentEthics":rand45(),"GeoLocation":5,
                "Sensitivity":4,"Audits":5}
dim2 = lambda df: {"Completeness":{"score":5},"DuplicationRate":{"score":4},
                   "ErrorRates":{"score":5},"Consistency":{"score":4}}

push_bundle(CLIENT_ID, VERSION_ID, dim1, dim2, dim3, dim4, dim5, "clinical‑research")


Saving A2.csv to A2.csv
Rows in A2.csv: 2,604
🏁  A2.csv uploaded | PScore 4.62 (High)



In [4]:
CLIENT_ID, VERSION_ID = "BranchA", "v1.2"

dim1 = lambda: {"SourceReputation":3,"DataController":3,"AuditsCerts":2,
                "DataObjective":3,"CollectionMethod":3}
dim3 = lambda: {"DataDictionary":3,"VersionLogs":2,"CollectionProtocol":3,
                "UpdatesOnDefinitions":2}
dim4 = lambda: {"DataFreshness":3,"ScheduledRefresh":2,"RetentionClarity":2}
dim5 = lambda: {"RegCoverage":3,"ConsentEthics":3,"GeoLocation":3,
                "Sensitivity":3,"Audits":2}
dim2 = lambda df: {"Completeness":{"score":4},"DuplicationRate":{"score":3},
                   "ErrorRates":{"score":4},"Consistency":{"score":3}}

push_bundle(CLIENT_ID, VERSION_ID, dim1, dim2, dim3, dim4, dim5, "exploratory")


Saving A3.csv to A3.csv
Rows in A3.csv: 3,256
🏁  A3.csv uploaded | PScore 2.81 (Low‑Moderate)

