## Uptime

* [#85](https://github.com/salgo60/Svenskaforsamlingar/issues/85)
* [RiksarkivetUptime.ipynb](https://github.com/salgo60/DIGG-skuggbacklog/blob/master/Notebook/RiksarkivetUptime.ipynb)
   * körs under https://github.com/salgo60/DIGG-skuggbacklog/
      * [Notebook/RiksarkivetUptime.ipynb](https://github.com/salgo60/DIGG-skuggbacklog/blob/master/Notebook/RiksarkivetUptime.ipynb)

In [1]:
import time

from datetime import datetime

now = datetime.now()
timestamp = now.timestamp()

start_time = time.time()
print("Start:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))


Start: 2026-01-06 13:56:23


In [2]:
#list(git_commits_for_file(Path("history/riksarkivet-nad-se-ula-12231.yml")))[:5]


In [3]:
from pathlib import Path

DOCS_DIR = Path("../docs").resolve()
DOCS_DIR.mkdir(exist_ok=True)
DOCS_DIR

PosixPath('/Users/salgo/Documents/GitHub/DIGG-skuggbacklog/docs')

In [4]:
import subprocess
import yaml
import pandas as pd
from datetime import datetime, timezone
from dateutil import parser
from pathlib import Path


In [5]:

from pathlib import Path
import subprocess

REPO_ROOT = Path("/Users/salgo/Documents/GitHub/DIGG-skuggbacklog").resolve()
assert (REPO_ROOT / ".git").exists(), "Not a git repository"


In [6]:
# =========================
# KONFIGURATION
# =========================

HISTORY_DIR = Path("history")

FILES = {
    "riksarkivet_nad_se_ula_12231": "riksarkivet-nad-se-ula-12231.yml",
    "riksarkivet_forskarsalen": "riksarkivet-forskarsalen.yml",
    "riksarkivet_kyrkbok_c0005418": "riksarkivet-kyrkbok-c0005418.yml",
}


In [7]:
import subprocess
import re
from dateutil import parser
from datetime import timezone
from pathlib import Path

def git_commits_for_file(path: Path):
    rel_path = path.as_posix()

    cmd = [
        "git", "log",
        "--follow",
        "--reverse",
        "--format=%H|%cI|%s",
        "--",
        rel_path
    ]

    out = subprocess.check_output(
        cmd,
        cwd=REPO_ROOT,
        text=True
    )

    ISSUE_RE = re.compile(r"#(\d+)")

    for line in out.strip().splitlines():
        sha, ts, subject = line.split("|", 2)
        issues = ISSUE_RE.findall(subject)

        yield (
            sha,
            parser.isoparse(ts).astimezone(timezone.utc),
            issues,
        )


import yaml

def load_file_at_commit(commit: str, path: Path) -> dict:
    rel_path = path.as_posix()

    cmd = [
        "git", "show",
        f"{commit}:{rel_path}"
    ]

    content = subprocess.check_output(
        cmd,
        cwd=REPO_ROOT,
        text=True
    )

    return yaml.safe_load(content)


In [8]:
def extract_status_timeseries(endpoint: str, filename: str) -> pd.DataFrame:
    path = HISTORY_DIR / filename
    rows = []

    for commit, ts, issues in git_commits_for_file(path):
        data = load_file_at_commit(commit, path)
        rows.append({
            "endpoint": endpoint,
            "commit": commit,
            "time": ts,
            "status": data.get("status"),
            "issues": issues,
        })

    return pd.DataFrame(rows)



In [9]:
from datetime import datetime, timezone
import pandas as pd

def build_incidents(ts_df: pd.DataFrame) -> pd.DataFrame:
    """
    Bygger incidenter från en tidsserie av statusändringar.

    Förväntade kolumner i ts_df:
      - endpoint : str
      - time     : datetime (timezone-aware, UTC)
      - status   : 'up' | 'down'
      - issues   : list[str]

    Returnerar DataFrame med:
      - endpoint
      - start
      - end
      - duration_min
      - ongoing
      - issues
    """

    incidents = []

    ts_df = ts_df.sort_values("time").reset_index(drop=True)

    current_start = None
    collected_issues = []

    for _, row in ts_df.iterrows():
        status = row["status"]
        time = row["time"]
        issues = row.get("issues") or []

        # up → down : starta incident
        if status == "down" and current_start is None:
            current_start = time
            collected_issues = list(issues)

        # down → down : fortsätt samla issues
        elif status == "down" and current_start is not None:
            collected_issues.extend(issues)

        # down → up : avsluta incident
        elif status == "up" and current_start is not None:
            duration = (time - current_start).total_seconds() / 60

            incidents.append({
                "endpoint": row["endpoint"],
                "start": current_start,
                "end": time,
                "duration_min": round(duration, 2),
                "ongoing": False,
                "issues": sorted(set(collected_issues)),
            })

            current_start = None
            collected_issues = []

        # up → up : inget att göra

    # Slutar i down → pågående incident
    if current_start is not None:
        end = datetime.now(timezone.utc)
        duration = (end - current_start).total_seconds() / 60

        incidents.append({
            "endpoint": ts_df.iloc[-1]["endpoint"],
            "start": current_start,
            "end": end,
            "duration_min": round(duration, 2),
            "ongoing": True,
            "issues": sorted(set(collected_issues)),
        })

    return pd.DataFrame(incidents)


In [10]:
all_incidents = []

for endpoint, filename in FILES.items():
    ts_df = extract_status_timeseries(endpoint, filename)
    incidents = build_incidents(ts_df)
    all_incidents.append(incidents)

incidents_df = pd.concat(all_incidents, ignore_index=True)
incidents_df.sort_values("start", inplace=True)

incidents_df



Unnamed: 0,endpoint,start,end,duration_min,ongoing,issues
0,riksarkivet_nad_se_ula_12231,2023-05-09 05:13:45+00:00,2023-05-09 05:31:58+00:00,18.22,False,[]
1,riksarkivet_nad_se_ula_12231,2023-05-09 05:57:01+00:00,2023-05-09 06:16:55+00:00,19.90,False,[]
2,riksarkivet_nad_se_ula_12231,2023-05-09 06:47:53+00:00,2023-05-09 06:56:13+00:00,8.33,False,[]
176,riksarkivet_forskarsalen,2023-05-11 15:45:24+00:00,2023-05-11 16:06:57+00:00,21.55,False,[]
177,riksarkivet_forskarsalen,2023-05-11 16:28:07+00:00,2023-05-11 17:09:35+00:00,41.47,False,[]
...,...,...,...,...,...,...
288,riksarkivet_forskarsalen,2025-12-23 08:49:44+00:00,2025-12-23 09:01:25+00:00,11.68,False,[]
411,riksarkivet_kyrkbok_c0005418,2025-12-23 08:51:16+00:00,2025-12-23 09:01:27+00:00,10.18,False,[]
175,riksarkivet_nad_se_ula_12231,2025-12-23 09:58:15+00:00,2026-01-06 12:56:41.699262+00:00,20338.44,True,[]
289,riksarkivet_forskarsalen,2025-12-23 09:58:46+00:00,2026-01-06 12:56:55.862023+00:00,20338.16,True,[]


In [11]:
incidents_df["downtime_days"] = (
    incidents_df["duration_min"] / 60 / 24
).round(2)

incidents_df


Unnamed: 0,endpoint,start,end,duration_min,ongoing,issues,downtime_days
0,riksarkivet_nad_se_ula_12231,2023-05-09 05:13:45+00:00,2023-05-09 05:31:58+00:00,18.22,False,[],0.01
1,riksarkivet_nad_se_ula_12231,2023-05-09 05:57:01+00:00,2023-05-09 06:16:55+00:00,19.90,False,[],0.01
2,riksarkivet_nad_se_ula_12231,2023-05-09 06:47:53+00:00,2023-05-09 06:56:13+00:00,8.33,False,[],0.01
176,riksarkivet_forskarsalen,2023-05-11 15:45:24+00:00,2023-05-11 16:06:57+00:00,21.55,False,[],0.01
177,riksarkivet_forskarsalen,2023-05-11 16:28:07+00:00,2023-05-11 17:09:35+00:00,41.47,False,[],0.03
...,...,...,...,...,...,...,...
288,riksarkivet_forskarsalen,2025-12-23 08:49:44+00:00,2025-12-23 09:01:25+00:00,11.68,False,[],0.01
411,riksarkivet_kyrkbok_c0005418,2025-12-23 08:51:16+00:00,2025-12-23 09:01:27+00:00,10.18,False,[],0.01
175,riksarkivet_nad_se_ula_12231,2025-12-23 09:58:15+00:00,2026-01-06 12:56:41.699262+00:00,20338.44,True,[],14.12
289,riksarkivet_forskarsalen,2025-12-23 09:58:46+00:00,2026-01-06 12:56:55.862023+00:00,20338.16,True,[],14.12


In [12]:
incidents_df.to_csv("incidents_clean.csv", index=False)
incidents_df.to_csv(
    DOCS_DIR / "incidents_clean_2.csv",
    index=False
)


In [13]:
summary = (
    incidents_df.groupby("endpoint")
      .agg(
          incidents=("duration_min", "count"),
          total_downtime_min=("duration_min", "sum"),
          max_incident_min=("duration_min", "max"),
          median_incident_min=("duration_min", "median"),
      )
      .reset_index()
)

summary["total_downtime_hours"] = (summary["total_downtime_min"] / 60).round(2)
summary


Unnamed: 0,endpoint,incidents,total_downtime_min,max_incident_min,median_incident_min,total_downtime_hours
0,riksarkivet_forskarsalen,114,32550.46,20338.16,16.525,542.51
1,riksarkivet_kyrkbok_c0005418,123,33402.67,20337.84,22.32,556.71
2,riksarkivet_nad_se_ula_12231,176,32346.58,20338.44,20.525,539.11


In [14]:
import pandas as pd

df = pd.read_csv("incidents_clean.csv", parse_dates=["start", "end"]) 

df["start"] = pd.to_datetime(df["start"], utc=True)
df["end"] = pd.to_datetime(df["end"], utc=True)


ValueError: time data "2026-01-06 12:56:41.699262+00:00" doesn't match format "%Y-%m-%d %H:%M:%S%z", at position 410. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.

In [None]:
from datetime import timedelta

rows = []

for _, r in df.iterrows():
    day = r["start"].date()
    end_day = r["end"].date()

    while day <= end_day:
        rows.append({
            "endpoint": r["endpoint"],
            "date": day,
            "duration_min": r["duration_min"],
        })
        day += timedelta(days=1)

calendar_df = pd.DataFrame(rows)


In [None]:
calendar_daily = (
    calendar_df
    .groupby(["endpoint", "date"])
    .agg(
        incidents=("duration_min", "count"),
        downtime_min=("duration_min", "sum"),
    )
    .reset_index()
)

calendar_daily.to_csv("calendar_daily_2.csv", index=False)
calendar_daily.to_csv(
    DOCS_DIR / "calendar_daily_2.csv",
    index=False
)


In [None]:
import json
import subprocess
from datetime import datetime, timezone

commit = subprocess.check_output(
    ["git", "rev-parse", "--short", "HEAD"],
    text=True
).strip()

meta = {
    "generated_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
    "commit": commit,
}

with open(DOCS_DIR / "meta_2.json", "w", encoding="utf-8") as f:
    json.dump(meta, f, indent=2)


In [None]:
 # End timer and calculate duration
end_time = time.time()
elapsed_time = end_time - start_time# Bygg audit-lager för den här etappen

# Print current date and total time
print("Date:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
minutes, seconds = divmod(elapsed_time, 60)
print("Total time elapsed: {:02.0f} minutes {:05.2f} seconds".format(minutes, seconds))
