## Uptime

* [#85](https://github.com/salgo60/Svenskaforsamlingar/issues/85)
* [RiksarkivetUptime.ipynb](https://github.com/salgo60/Svenskaforsamlingar/tree/master/Notebook/RiksarkivetUptime.ipynb)
   * körs under https://github.com/salgo60/DIGG-skuggbacklog/
      * [Notebook/RiksarkivetUptime.ipynb](https://github.com/salgo60/DIGG-skuggbacklog/blob/master/Notebook/RiksarkivetUptime.ipynb)

In [1]:
import time

from datetime import datetime

now = datetime.now()
timestamp = now.timestamp()

start_time = time.time()
print("Start:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))


Start: 2025-12-27 23:45:55


In [2]:
#list(git_commits_for_file(Path("history/riksarkivet-nad-se-ula-12231.yml")))[:5]


In [18]:
from pathlib import Path

DOCS_DIR = Path("../docs").resolve()
DOCS_DIR.mkdir(exist_ok=True)
DOCS_DIR

PosixPath('/Users/salgo/Documents/GitHub/DIGG-skuggbacklog/docs')

In [3]:
import subprocess
import yaml
import pandas as pd
from datetime import datetime, timezone
from dateutil import parser
from pathlib import Path


In [4]:

from pathlib import Path
import subprocess

REPO_ROOT = Path("/Users/salgo/Documents/GitHub/DIGG-skuggbacklog").resolve()
assert (REPO_ROOT / ".git").exists(), "Not a git repository"


In [5]:
# =========================
# KONFIGURATION
# =========================

HISTORY_DIR = Path("history")

FILES = {
    "riksarkivet_nad_se_ula_12231": "riksarkivet-nad-se-ula-12231.yml",
    "riksarkivet_forskarsalen": "riksarkivet-forskarsalen.yml",
    "riksarkivet_kyrkbok_c0005418": "riksarkivet-kyrkbok-c0005418.yml",
}


In [6]:
from dateutil import parser
from datetime import timezone

def git_commits_for_file(path: Path):
    rel_path = path.as_posix()  # t.ex. history/fil.yml

    cmd = [
        "git", "log",
        "--follow",
        "--reverse",
        "--format=%H|%cI",
        "--",                 # <-- ABSOLUT KRITISK
        rel_path
    ]

    out = subprocess.check_output(
        cmd,
        cwd=REPO_ROOT,        # <-- ABSOLUT KRITISK
        text=True
    )

    for line in out.strip().splitlines():
        sha, ts = line.split("|", 1)
        yield sha, parser.isoparse(ts).astimezone(timezone.utc)

import yaml

def load_file_at_commit(commit: str, path: Path) -> dict:
    rel_path = path.as_posix()

    cmd = [
        "git", "show",
        f"{commit}:{rel_path}"
    ]

    content = subprocess.check_output(
        cmd,
        cwd=REPO_ROOT,
        text=True
    )

    return yaml.safe_load(content)


In [7]:
def extract_status_timeseries(endpoint: str, filename: str) -> pd.DataFrame:
    path = HISTORY_DIR / filename
    rows = []

    for commit, ts in git_commits_for_file(path):
        data = load_file_at_commit(commit, path)
        rows.append({
            "endpoint": endpoint,
            "commit": commit,
            "time": ts,
            "status": data.get("status"),
        })

    return pd.DataFrame(rows)


In [8]:
def build_incidents(ts_df: pd.DataFrame) -> pd.DataFrame:
    incidents = []

    ts_df = ts_df.sort_values("time").reset_index(drop=True)

    current_start = None

    for i, row in ts_df.iterrows():
        status = row["status"]
        time = row["time"]

        if status == "down" and current_start is None:
            # up → down
            current_start = time

        elif status == "up" and current_start is not None:
            # down → up
            duration = (time - current_start).total_seconds() / 60
            incidents.append({
                "endpoint": row["endpoint"],
                "start": current_start,
                "end": time,
                "duration_min": round(duration, 2),
                "ongoing": False,
            })
            current_start = None

    # Om den slutar i down → pågående incident
    if current_start is not None:
        end = datetime.now(timezone.utc)
        duration = (end - current_start).total_seconds() / 60
        incidents.append({
            "endpoint": ts_df.iloc[-1]["endpoint"],
            "start": current_start,
            "end": end,
            "duration_min": round(duration, 2),
            "ongoing": True,
        })

    return pd.DataFrame(incidents)


In [9]:
all_incidents = []

for endpoint, filename in FILES.items():
    ts_df = extract_status_timeseries(endpoint, filename)
    incidents = build_incidents(ts_df)
    all_incidents.append(incidents)

incidents_df = pd.concat(all_incidents, ignore_index=True)
incidents_df.sort_values("start", inplace=True)

incidents_df


Unnamed: 0,endpoint,start,end,duration_min,ongoing
0,riksarkivet_nad_se_ula_12231,2023-05-09 05:13:45+00:00,2023-05-09 05:31:58+00:00,18.22,False
1,riksarkivet_nad_se_ula_12231,2023-05-09 05:57:01+00:00,2023-05-09 06:16:55+00:00,19.90,False
2,riksarkivet_nad_se_ula_12231,2023-05-09 06:47:53+00:00,2023-05-09 06:56:13+00:00,8.33,False
172,riksarkivet_forskarsalen,2023-05-11 15:45:24+00:00,2023-05-11 16:06:57+00:00,21.55,False
173,riksarkivet_forskarsalen,2023-05-11 16:28:07+00:00,2023-05-11 17:09:35+00:00,41.47,False
...,...,...,...,...,...
280,riksarkivet_forskarsalen,2025-12-18 18:20:52+00:00,2025-12-18 18:49:54+00:00,29.03,False
399,riksarkivet_kyrkbok_c0005418,2025-12-18 18:21:23+00:00,2025-12-18 18:49:57+00:00,28.57,False
171,riksarkivet_nad_se_ula_12231,2025-12-20 14:44:15+00:00,2025-12-20 14:52:05+00:00,7.83,False
281,riksarkivet_forskarsalen,2025-12-20 14:44:46+00:00,2025-12-20 14:52:08+00:00,7.37,False


In [10]:
incidents_df["downtime_days"] = (
    incidents_df["duration_min"] / 60 / 24
).round(2)

incidents_df


Unnamed: 0,endpoint,start,end,duration_min,ongoing,downtime_days
0,riksarkivet_nad_se_ula_12231,2023-05-09 05:13:45+00:00,2023-05-09 05:31:58+00:00,18.22,False,0.01
1,riksarkivet_nad_se_ula_12231,2023-05-09 05:57:01+00:00,2023-05-09 06:16:55+00:00,19.90,False,0.01
2,riksarkivet_nad_se_ula_12231,2023-05-09 06:47:53+00:00,2023-05-09 06:56:13+00:00,8.33,False,0.01
172,riksarkivet_forskarsalen,2023-05-11 15:45:24+00:00,2023-05-11 16:06:57+00:00,21.55,False,0.01
173,riksarkivet_forskarsalen,2023-05-11 16:28:07+00:00,2023-05-11 17:09:35+00:00,41.47,False,0.03
...,...,...,...,...,...,...
280,riksarkivet_forskarsalen,2025-12-18 18:20:52+00:00,2025-12-18 18:49:54+00:00,29.03,False,0.02
399,riksarkivet_kyrkbok_c0005418,2025-12-18 18:21:23+00:00,2025-12-18 18:49:57+00:00,28.57,False,0.02
171,riksarkivet_nad_se_ula_12231,2025-12-20 14:44:15+00:00,2025-12-20 14:52:05+00:00,7.83,False,0.01
281,riksarkivet_forskarsalen,2025-12-20 14:44:46+00:00,2025-12-20 14:52:08+00:00,7.37,False,0.01


In [20]:

df = pd.read_csv("test.csv")
df = df.drop(columns=["Unnamed: 0"])
df.to_csv("incidents_clean.csv", index=False)
df.to_csv(
    DOCS_DIR / "incidents_clean.csv",
    index=False
)


In [14]:
summary = (
    df.groupby("endpoint")
      .agg(
          incidents=("duration_min", "count"),
          total_downtime_min=("duration_min", "sum"),
          max_incident_min=("duration_min", "max"),
          median_incident_min=("duration_min", "median"),
      )
      .reset_index()
)

summary["total_downtime_hours"] = (summary["total_downtime_min"] / 60).round(2)
summary


Unnamed: 0,endpoint,incidents,total_downtime_min,max_incident_min,median_incident_min,total_downtime_hours
0,riksarkivet_forskarsalen,110,12180.07,4344.15,16.53,203.0
1,riksarkivet_kyrkbok_c0005418,119,13037.03,4262.85,22.35,217.28
2,riksarkivet_nad_se_ula_12231,172,11948.58,2430.03,20.525,199.14


In [15]:
import pandas as pd

df = pd.read_csv("incidents_clean.csv", parse_dates=["start", "end"])


In [16]:
from datetime import timedelta

rows = []

for _, r in df.iterrows():
    day = r["start"].date()
    end_day = r["end"].date()

    while day <= end_day:
        rows.append({
            "endpoint": r["endpoint"],
            "date": day,
            "duration_min": r["duration_min"],
        })
        day += timedelta(days=1)

calendar_df = pd.DataFrame(rows)


In [21]:
calendar_daily = (
    calendar_df
    .groupby(["endpoint", "date"])
    .agg(
        incidents=("duration_min", "count"),
        downtime_min=("duration_min", "sum"),
    )
    .reset_index()
)

calendar_daily.to_csv("calendar_daily.csv", index=False)
calendar_daily.to_csv(
    DOCS_DIR / "calendar_daily.csv",
    index=False
)


In [22]:
import json
import subprocess
from datetime import datetime, timezone

commit = subprocess.check_output(
    ["git", "rev-parse", "--short", "HEAD"],
    text=True
).strip()

meta = {
    "generated_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
    "commit": commit,
}

with open(DOCS_DIR / "meta.json", "w", encoding="utf-8") as f:
    json.dump(meta, f, indent=2)


In [11]:
 # End timer and calculate duration
end_time = time.time()
elapsed_time = end_time - start_time# Bygg audit-lager för den här etappen

# Print current date and total time
print("Date:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
minutes, seconds = divmod(elapsed_time, 60)
print("Total time elapsed: {:02.0f} minutes {:05.2f} seconds".format(minutes, seconds))


Date: 2025-12-27 23:46:35
Total time elapsed: 00 minutes 40.30 seconds
