## Uptime

* [#85](https://github.com/salgo60/Svenskaforsamlingar/issues/85)
* [RiksarkivetUptime.ipynb](https://github.com/salgo60/DIGG-skuggbacklog/blob/master/Notebook/RiksarkivetUptime.ipynb)
   * körs under https://github.com/salgo60/DIGG-skuggbacklog/
      * [Notebook/RiksarkivetUptime.ipynb](https://github.com/salgo60/DIGG-skuggbacklog/blob/master/Notebook/RiksarkivetUptime.ipynb)

In [1]:
import time

from datetime import datetime

now = datetime.now()
timestamp = now.timestamp()

start_time = time.time()
print("Start:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))


Start: 2026-01-06 13:26:53


In [2]:
#list(git_commits_for_file(Path("history/riksarkivet-nad-se-ula-12231.yml")))[:5]


In [3]:
from pathlib import Path

DOCS_DIR = Path("../docs").resolve()
DOCS_DIR.mkdir(exist_ok=True)
DOCS_DIR

PosixPath('/Users/salgo/Documents/GitHub/DIGG-skuggbacklog/docs')

In [4]:
import subprocess
import yaml
import pandas as pd
from datetime import datetime, timezone
from dateutil import parser
from pathlib import Path


In [5]:

from pathlib import Path
import subprocess

REPO_ROOT = Path("/Users/salgo/Documents/GitHub/DIGG-skuggbacklog").resolve()
assert (REPO_ROOT / ".git").exists(), "Not a git repository"


In [6]:
# =========================
# KONFIGURATION
# =========================

HISTORY_DIR = Path("history")

FILES = {
    "riksarkivet_nad_se_ula_12231": "riksarkivet-nad-se-ula-12231.yml",
    "riksarkivet_forskarsalen": "riksarkivet-forskarsalen.yml",
    "riksarkivet_kyrkbok_c0005418": "riksarkivet-kyrkbok-c0005418.yml",
}


In [7]:
from dateutil import parser
from datetime import timezone

def git_commits_for_file(path: Path):
    rel_path = path.as_posix()  # t.ex. history/fil.yml

    cmd = [
        "git", "log",
        "--follow",
        "--reverse",
        "--format=%H|%cI",
        "--",                 # <-- ABSOLUT KRITISK
        rel_path
    ]

    out = subprocess.check_output(
        cmd,
        cwd=REPO_ROOT,        # <-- ABSOLUT KRITISK
        text=True
    )

    for line in out.strip().splitlines():
        sha, ts = line.split("|", 1)
        yield sha, parser.isoparse(ts).astimezone(timezone.utc)

import yaml

def load_file_at_commit(commit: str, path: Path) -> dict:
    rel_path = path.as_posix()

    cmd = [
        "git", "show",
        f"{commit}:{rel_path}"
    ]

    content = subprocess.check_output(
        cmd,
        cwd=REPO_ROOT,
        text=True
    )

    return yaml.safe_load(content)


In [8]:
def extract_status_timeseries(endpoint: str, filename: str) -> pd.DataFrame:
    path = HISTORY_DIR / filename
    rows = []

    for commit, ts in git_commits_for_file(path):
        data = load_file_at_commit(commit, path)
        rows.append({
            "endpoint": endpoint,
            "commit": commit,
            "time": ts,
            "status": data.get("status"),
        })

    return pd.DataFrame(rows)


In [9]:
def build_incidents(ts_df: pd.DataFrame) -> pd.DataFrame:
    incidents = []

    ts_df = ts_df.sort_values("time").reset_index(drop=True)

    current_start = None

    for i, row in ts_df.iterrows():
        status = row["status"]
        time = row["time"]

        if status == "down" and current_start is None:
            # up → down
            current_start = time

        elif status == "up" and current_start is not None:
            # down → up
            duration = (time - current_start).total_seconds() / 60
            incidents.append({
                "endpoint": row["endpoint"],
                "start": current_start,
                "end": time,
                "duration_min": round(duration, 2),
                "ongoing": False,
            })
            current_start = None

    # Om den slutar i down → pågående incident
    if current_start is not None:
        end = datetime.now(timezone.utc)
        duration = (end - current_start).total_seconds() / 60
        incidents.append({
            "endpoint": ts_df.iloc[-1]["endpoint"],
            "start": current_start,
            "end": end,
            "duration_min": round(duration, 2),
            "ongoing": True,
        })

    return pd.DataFrame(incidents)


In [None]:
all_incidents = []

for endpoint, filename in FILES.items():
    ts_df = extract_status_timeseries(endpoint, filename)
    incidents = build_incidents(ts_df)
    all_incidents.append(incidents)

incidents_df = pd.concat(all_incidents, ignore_index=True)
incidents_df.sort_values("start", inplace=True)

incidents_df


In [None]:
incidents_df["downtime_days"] = (
    incidents_df["duration_min"] / 60 / 24
).round(2)

incidents_df


In [None]:

#df = pd.read_csv("test.csv")
#df = incidents_df.drop(columns=["Unnamed: 0"])
incidents_df.to_csv("incidents_clean.csv", index=False)
incidents_df.to_csv(
    DOCS_DIR / "incidents_clean_2.csv",
    index=False
)


In [None]:
summary = (
    df.groupby("endpoint")
      .agg(
          incidents=("duration_min", "count"),
          total_downtime_min=("duration_min", "sum"),
          max_incident_min=("duration_min", "max"),
          median_incident_min=("duration_min", "median"),
      )
      .reset_index()
)

summary["total_downtime_hours"] = (summary["total_downtime_min"] / 60).round(2)
summary


In [None]:
import pandas as pd

df = pd.read_csv("incidents_clean.csv", parse_dates=["start", "end"])


In [None]:
from datetime import timedelta

rows = []

for _, r in df.iterrows():
    day = r["start"].date()
    end_day = r["end"].date()

    while day <= end_day:
        rows.append({
            "endpoint": r["endpoint"],
            "date": day,
            "duration_min": r["duration_min"],
        })
        day += timedelta(days=1)

calendar_df = pd.DataFrame(rows)


In [None]:
calendar_daily = (
    calendar_df
    .groupby(["endpoint", "date"])
    .agg(
        incidents=("duration_min", "count"),
        downtime_min=("duration_min", "sum"),
    )
    .reset_index()
)

calendar_daily.to_csv("calendar_daily_2.csv", index=False)
calendar_daily.to_csv(
    DOCS_DIR / "calendar_daily_2.csv",
    index=False
)


In [None]:
import json
import subprocess
from datetime import datetime, timezone

commit = subprocess.check_output(
    ["git", "rev-parse", "--short", "HEAD"],
    text=True
).strip()

meta = {
    "generated_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
    "commit": commit,
}

with open(DOCS_DIR / "meta_2.json", "w", encoding="utf-8") as f:
    json.dump(meta, f, indent=2)


In [None]:
 # End timer and calculate duration
end_time = time.time()
elapsed_time = end_time - start_time# Bygg audit-lager för den här etappen

# Print current date and total time
print("Date:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
minutes, seconds = divmod(elapsed_time, 60)
print("Total time elapsed: {:02.0f} minutes {:05.2f} seconds".format(minutes, seconds))
