In [1]:
import os
import json
import markdown
import pandas as pd

In [None]:
def read_markdown(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        return markdown.markdown(f.read())

In [4]:
def read_csv(file_path):
    return pd.read_csv(file_path).to_string()

In [2]:
def clean_text(text):
    return " ".join(text.replace("\n", " ").split())

In [5]:
def load_documents(folder="files"):
    docs = []
    for root, _, files in os.walk(folder):
        for f in files:
            path = os.path.join(root, f)

            if f.endswith(".md"):
                content = read_markdown(path)
            elif f.endswith(".csv"):
                content = read_csv(path)
            else:
                continue

            cleaned = clean_text(content)

            docs.append({
                "source": path,
                "folder": os.path.basename(os.path.dirname(path)).lower(),
                "content": cleaned
            })
    return docs

In [6]:
with open("src/role_mapping.json") as f:
    tag_map = json.load(f)

def assign_role(doc):
    folder = doc["folder"]

    for role, keywords in tag_map.items():
        if keywords == ["*"]:      
            continue
        for k in keywords:
            if k.lower() == folder:
                return role

    if "C-Level" in tag_map:       
        return "C-Level"

    return "Employees"

In [7]:
def build_access_map(data):
    access = {}
    for role in tag_map:
        access[role] = []

    for d in data:
        r = assign_role(d)
        access[r].append(d["source"])

        if "C-Level" in access:
            access["C-Level"].append(d["source"])

    return access

In [8]:
data = load_documents("files")
access_map = build_access_map(data)

In [9]:
for role, docs in access_map.items():
    print(f"\nRole: {role}")
    for d in docs:
        print("  -", d)


Role: Finance
  - files\Finance\financial_summary.md
  - files\Finance\quarterly_financial_report.md

Role: Marketing
  - files\marketing\marketing_report_2024.md
  - files\marketing\marketing_report_q1_2024.md
  - files\marketing\marketing_report_q2_2024.md
  - files\marketing\marketing_report_q3_2024.md
  - files\marketing\market_report_q4_2024.md

Role: HR
  - files\HR\hr_data.csv

Role: Engineering
  - files\engineering\engineering_master_doc.md

Role: Employees
  - files\general\employee_handbook.md

Role: C-Level
  - files\engineering\engineering_master_doc.md
  - files\Finance\financial_summary.md
  - files\Finance\quarterly_financial_report.md
  - files\general\employee_handbook.md
  - files\HR\hr_data.csv
  - files\marketing\marketing_report_2024.md
  - files\marketing\marketing_report_q1_2024.md
  - files\marketing\marketing_report_q2_2024.md
  - files\marketing\marketing_report_q3_2024.md
  - files\marketing\market_report_q4_2024.md
