# Analyse 

In [5]:
import json
from collections import Counter
import pandas as pd

In [7]:
# JSON-Datei laden
with open("data/mak_metadata_with_orcid.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# Initialisierung
total_publications = len(data)
unique_authors = set()
authors_with_orcid = set()
years = []

# Durchlauf durch alle Publikationen
for entry in data:
    year = entry.get("publicationYear")
    if year:
        years.append(str(year))

    for author in entry.get("authors", []):
        name = author.get("name")
        orcid = author.get("orcid")

        if name:
            unique_authors.add(name)
        if orcid:
            authors_with_orcid.add(orcid)

# Zählen
num_unique_authors = len(unique_authors)
num_authors_with_orcid = len(authors_with_orcid)
year_distribution = Counter(years)

# Ausgabe vorbereiten
df_summary = pd.DataFrame({
    "Metric": [
        "Total publications",
        "Unique authors",
        "Authors with ORCID",
        "Years with publications"
    ],
    "Value": [
        total_publications,
        num_unique_authors,
        num_authors_with_orcid,
        len(year_distribution)
    ]
})

df_years = pd.DataFrame(year_distribution.items(), columns=["Year", "Count"]).sort_values(by="Year")

# Anzeigen
print("\nSummary Statistics:")
print(df_summary)

print("\nPublication Count by Year:")
print(df_years)




Summary Statistics:
                    Metric  Value
0       Total publications   3975
1           Unique authors    327
2       Authors with ORCID     59
3  Years with publications      0

Publication Count by Year:
Empty DataFrame
Columns: [Year, Count]
Index: []
