# Analyse av metadata som finnes i JSON data

In [None]:
from rich import print

In [None]:
import pathlib

from rich.prompt import Prompt

index_file = pathlib.Path("index.json")
if not index_file.exists():
    raw_path = Prompt.ask("Velg index fil")
    index_file = pathlib.Path(raw_path)

In [None]:
import json

with open(index_file, mode="r") as fil:
    index: list[dict] = json.load(fil)

print(f"Lastet index ('{index_file}') med {len(index)} sider")

In [None]:
import httpx
from rich.progress import track

site_metadata = []
with httpx.Client() as client:
    for site in track(index, description="Laster ned metadata"):
        resp = client.get(site["json_url"]).raise_for_status()
        data = resp.json()
        content = data["pageProps"]["content"]
        metadata = site.copy()
        metadata["path"] = content["_path"]
        metadata["type"] = content["type"]
        metadata["language"] = content["language"]
        metadata["title"] = content["displayName"]
        site_metadata.append(metadata)

In [None]:
import polars as pl

df = pl.from_dicts(site_metadata)
df = df.with_columns(pl.col("last_modified").str.to_datetime())

In [None]:
df.describe()

In [None]:
df.select("type").group_by("type").len().plot.bar("type", "len")

In [None]:
df.select("language").group_by("language").len().plot.bar("language", "len")