In [1]:
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)

import requests
import urllib.parse
import json

In [2]:
suche = ['"university frankfurt"', '"universität frankfurt"', '"goethe universität"', '"goethe university"']
searchmode = 'OR'.join(suche)
query = urllib.parse.quote(searchmode)  # URL-kodiert den Suchbegriff

url = f"https://zenodo.org/api/records?q={query}&type=software&size=200"

result = requests.get(url)

In [3]:
#http-Abfrage in python-Objekt umwandelnd
data = result.json()

In [4]:
data

{'hits': {'hits': [{'created': '2023-08-30T12:32:03.781845+00:00',
    'modified': '2023-08-30T14:26:56.423360+00:00',
    'id': 8301158,
    'conceptrecid': '8301157',
    'doi': '10.5281/zenodo.8301158',
    'conceptdoi': '10.5281/zenodo.8301157',
    'doi_url': 'https://doi.org/10.5281/zenodo.8301158',
    'metadata': {'title': 'Supplement to the paper: Supporting the analysis of a large coin hoard with AI-based methods.',
     'doi': '10.5281/zenodo.8301158',
     'publication_date': '2023-08-30',
     'description': '<p><strong>ClaReNet is a joint project of the R&ouml;misch-Germanische Kommission (German Archaeological Institute) and the Big Data Lab (Goethe University Frankfurt), funded by the German Federal Ministry of Education and Research (BMBF). It tests the possibilities and limits of new digital methods of classification and representation. This supplement provides a snapshot of the methods used in the project, which will be published in the paper &quot;Supporting the ana

In [5]:
#eigentliche Datensätze abrufen
records = data["hits"]["hits"]

In [6]:
df = pd.DataFrame(records)

In [7]:
#metadata-Feld aufdröseln
df_normalized = pd.json_normalize(records)

In [31]:
# Spalten filtern und umbenennen
df_columns = (df_normalized[["doi", "title", 
                               "metadata.creators", "metadata.resource_type.title", 
                               "metadata.description", "metadata.license.id", 
                               "metadata.contributors", "metadata.keywords", 
                               "metadata.notes", "metadata.custom.code:programmingLanguage", 
                               "metadata.publication_date", "links.self_html"]]
                                .rename(columns={"metadata.resource_type.title":"dc.type", 
                                "metadata.description":"dc.description.abstract", 
                                "metadata.license.id":"dc.licence", 
                                "metadata.keywords":"dc.relation", 
                                "metadata.notes":"dc.notes", 
                                "title":"dc.title",
                                "links.self_html":"dc.identifyer.uri",
                                "creators":"dc.contributor.author",
                                "contributors":"dc.contributor.other", 
                                "metadata.publication_date":"dc.date.issued", 
                                "doi":"dc.identifier.doi"}))

In [32]:
# Spalte Quelle hinzufügen
df_columns["monitoring.source"] = "Zenodo"

In [33]:
# dc.relation säubern
df_columns['dc.relation'] = df_columns['dc.relation'].apply(lambda x: ", ".join(x) if isinstance(x, list) else x)
df_columns

Unnamed: 0,dc.identifier.doi,dc.title,metadata.creators,dc.type,dc.description.abstract,dc.licence,metadata.contributors,dc.relation,dc.notes,metadata.custom.code:programmingLanguage,dc.date.issued,dc.identifyer.uri,monitoring.source
0,10.5281/zenodo.8301158,Supplement to the paper: Supporting the analys...,"[{'name': 'Chrisowalandis Deligio', 'affiliati...",Software,<p><strong>ClaReNet is a joint project of the ...,cc-by-4.0,,"celtic coins, unsupervised learning, machine l...",,,2023-08-30,https://zenodo.org/records/8301158,Zenodo
1,10.5281/zenodo.7688961,CindyRicoCarmona/Expand_abbreviations_with_reg...,"[{'name': 'Cindy Rico Carmona', 'affiliation':...",Software,Abbreviation expansion with regular expression...,other-open,,,,,2023-03-01,https://zenodo.org/records/7688961,Zenodo
2,10.5281/zenodo.7670254,CindyRicoCarmona/svsal-teiedit: The School of ...,"[{'name': 'Cindy Rico Carmona', 'affiliation':...",Software,<p>The text editing workflow of the project <a...,other-open,,,,,2023-02-23,https://zenodo.org/records/7670254,Zenodo
3,10.5281/zenodo.6941258,hennylab/Neuronal-Convex-hull-intersection: N...,"[{'name': 'hennylab', 'affiliation': None}]",Software,"<p>Raw data, MATLAB scripts and data analysis ...",other-open,,,,,2022-07-29,https://zenodo.org/records/6941258,Zenodo
4,10.5281/zenodo.15301756,GRACE Ontology,"[{'name': 'Sander, Christoph', 'affiliation': ...",Software,The official GRACE ontology,mit-license,,"ontology, RDF, OWL","If you use this dataset, please cite it using ...",,2025-04-29,https://zenodo.org/records/15301756,Zenodo
5,10.5281/zenodo.14013121,Plastid and peroxisome movement tracks in the ...,"[{'name': 'Plomer, Solveig', 'affiliation': 'G...",Software,"<p>In movement analysis, correlated random wal...",mit-license,,"Cell organelles, Movement analysis, Arabidopsi...",<p>Funding provided by: Frankfurt Institute fo...,,2024-11-04,https://zenodo.org/records/14013121,Zenodo
6,10.5281/zenodo.6602946,BreakingTheCycle,"[{'name': 'Holger Dell', 'affiliation': 'Goeth...",Software,<p>This repository contains exact and heuristi...,gpl-2.0,,,This work was partially funded by the Deutsche...,,2022-06-01,https://zenodo.org/records/6602946,Zenodo
7,10.5281/zenodo.7318118,Parallel and I/O-Efficient Algorithms for Non-...,"[{'name': 'Allendorf, Daniel', 'affiliation': ...",Software,<p>This upload contains the frozen source code...,mit-license,,"Random graphs, Graph generator, Algorithm Engi...",This work was supported by the Deutsche Forsch...,,2022-11-14,https://zenodo.org/records/7318118,Zenodo
8,10.5281/zenodo.5121917,CL2QCD,"[{'name': 'Sciarra, Alessandro*', 'affiliation...",Software,<p>CL2QCD is a Lattice QCD application based o...,,"[{'name': 'Philipsen, Owe', 'affiliation': 'Go...",Lattice QCD,,,2021-02-12,https://zenodo.org/records/5121917,Zenodo
9,10.5281/zenodo.15196735,Altermagnetic splitting of magnons in hematite...,"[{'name': 'Hoyer, Rhea', 'affiliation': 'Johan...",Software,,cc-by-4.0,"[{'name': 'Stavropoulos, Panagiotis Peter', 'a...",,,"[{'id': 'python', 'title': {'en': 'Python'}}, ...",2025-06-18,https://zenodo.org/records/15196735,Zenodo


In [10]:
# metadata.creators aufteilen

def creators_split(eintrag):
    if not eintrag or not isinstance(eintrag, list):
        return pd.Series(["", ""])
    
    creators_liste = []
    affiliations_liste = []
    orcid_liste =[]

    for d in eintrag:
        if not isinstance(d, dict):
            continue
     
        name = (d.get("name") or "").strip()
        aff = (d.get("affiliation") or "").strip()
        orcid = (d.get("orcid") or "").strip()
        
        creators_liste.append(f"{name} ({aff})")
        affiliations_liste.append(f"{aff} ({name})")
        orcid_liste.append(f"{name} {orcid}")

    return pd.Series(["; ".join(creators_liste), "; ".join(affiliations_liste), "; ".join(orcid_liste)])

# Anwenden auf die Spalte 'creators' und 'contributors':
df_columns[["creators", "affiliation", "orcid"]] = df_columns["metadata.creators"].apply(creators_split)
df_columns[["contributors", "contributors_affiliation", "contributors_orcid"]] = df_columns["metadata.contributors"].apply(creators_split)

In [11]:
# Spalte affilliation.name und orcid hinzufügen
df_columns["contributor.affiliation.name"] = df_columns["affiliation"].fillna("") + "; " + df_columns["contributors_affiliation"].fillna("")
df_columns["person.identifier.orcid"] = df_columns["orcid"].fillna("") + ";" + df_columns["contributors_orcid"].fillna("")

In [12]:
# metadata.custom.code:programmingLanguage aufteilen

def language_split(eintrag):
    if not eintrag or not isinstance(eintrag, list):
        return ""

    language_liste = []

    for d in eintrag:
        if not isinstance(d, dict):
            continue

        language = d.get("title", {}).get("en")
                    
        language_liste.append(language)

    return "; ".join(language_liste)

df_columns["dc.programming.language"] = df_normalized["metadata.custom.code:programmingLanguage"].apply(language_split)

In [13]:
#Jahreszahl aus Publication_date extrahieren
def extract_year(value):
    try:
        date = pd.to_datetime(value)  # konvertiert in Datumsstandard
        return date.year  # Gibt nur das Jahr zurück
    except ValueError:
        return value # gibt alten Wert bei Error zurück (ist dann schon JJJJ)

df_columns["dc.date.issued"] = df_columns["dc.date.issued"].apply(extract_year)

In [29]:
#altes metadata.creators-Feld löschen
df_clean = df_columns.drop(columns = ["metadata.creators", "metadata.contributors", 
                                      "metadata.custom.code:programmingLanguage", 
                                      "contributors_orcid", "orcid",  "contributors_affiliation", "affiliation"])
df_clean

Unnamed: 0,dc.identifier.doi,dc.title,dc.type,dc.description.abstract,dc.licence,dc.relation,dc.notes,dc.date.issued,dc.identifyer.uri,monitoring.source,creators,contributors,contributor.affiliation.name,person.identifier.orcid,dc.programming.language
0,10.5281/zenodo.8301158,Supplement to the paper: Supporting the analys...,Software,<p><strong>ClaReNet is a joint project of the ...,cc-by-4.0,"[celtic coins, unsupervised learning, machine ...",,2023,https://zenodo.org/records/8301158,Zenodo,Chrisowalandis Deligio (Goethe Universität Fra...,,Goethe Universität Frankfurt am Main - Germany...,Chrisowalandis Deligio 0000-0002-5708-4271; Ka...,
1,10.5281/zenodo.7688961,CindyRicoCarmona/Expand_abbreviations_with_reg...,Software,Abbreviation expansion with regular expression...,other-open,,,2023,https://zenodo.org/records/7688961,Zenodo,Cindy Rico Carmona (Goethe Universität Frankfu...,,Goethe Universität Frankfurt am Main (Cindy Ri...,Cindy Rico Carmona ;,
2,10.5281/zenodo.7670254,CindyRicoCarmona/svsal-teiedit: The School of ...,Software,<p>The text editing workflow of the project <a...,other-open,,,2023,https://zenodo.org/records/7670254,Zenodo,Cindy Rico Carmona (Goethe Universität Frankfu...,,Goethe Universität Frankfurt am Main (Cindy Ri...,Cindy Rico Carmona ;,
3,10.5281/zenodo.6941258,hennylab/Neuronal-Convex-hull-intersection: N...,Software,"<p>Raw data, MATLAB scripts and data analysis ...",other-open,,,2022,https://zenodo.org/records/6941258,Zenodo,hennylab (),,(hennylab);,hennylab ;,
4,10.5281/zenodo.15301756,GRACE Ontology,Software,The official GRACE ontology,mit-license,"[ontology, RDF, OWL]","If you use this dataset, please cite it using ...",2025,https://zenodo.org/records/15301756,Zenodo,"Sander, Christoph (Deutsches Historisches Inst...",,Deutsches Historisches Institut in Rom (Sander...,"Sander, Christoph 0000-0003-4452-0107; Boute, ...",
5,10.5281/zenodo.14013121,Plastid and peroxisome movement tracks in the ...,Software,"<p>In movement analysis, correlated random wal...",mit-license,"[Cell organelles, Movement analysis, Arabidops...",<p>Funding provided by: Frankfurt Institute fo...,2024,https://zenodo.org/records/14013121,Zenodo,"Plomer, Solveig (Goethe University Frankfurt);...",,"Goethe University Frankfurt (Plomer, Solveig);...","Plomer, Solveig 0009-0004-3655-3392; Ernst, Th...",
6,10.5281/zenodo.6602946,BreakingTheCycle,Software,<p>This repository contains exact and heuristi...,gpl-2.0,,This work was partially funded by the Deutsche...,2022,https://zenodo.org/records/6602946,Zenodo,Holger Dell (Goethe University Frankfurt); Hen...,,Goethe University Frankfurt (Holger Dell); Goe...,Holger Dell 0000-0001-8955-0786; Henri Froese ...,
7,10.5281/zenodo.7318118,Parallel and I/O-Efficient Algorithms for Non-...,Software,<p>This upload contains the frozen source code...,mit-license,"[Random graphs, Graph generator, Algorithm Eng...",This work was supported by the Deutsche Forsch...,2022,https://zenodo.org/records/7318118,Zenodo,"Allendorf, Daniel (Goethe University Frankfurt...",,"Goethe University Frankfurt (Allendorf, Daniel...","Allendorf, Daniel 0000-0002-0549-7576; Meyer, ...",
8,10.5281/zenodo.5121917,CL2QCD,Software,<p>CL2QCD is a Lattice QCD application based o...,,[Lattice QCD],,2021,https://zenodo.org/records/5121917,Zenodo,"Sciarra, Alessandro* (Goethe University, Frank...","Philipsen, Owe (Goethe University, Frankfurt a...","Goethe University, Frankfurt am Main (Sciarra,...","Sciarra, Alessandro* 0000-0002-4608-1905; Pink...",
9,10.5281/zenodo.15196735,Altermagnetic splitting of magnons in hematite...,Software,,cc-by-4.0,,,2025,https://zenodo.org/records/15196735,Zenodo,"Hoyer, Rhea (Johannes Gutenberg University Mainz)","Stavropoulos, Panagiotis Peter (Goethe Univers...","Johannes Gutenberg University Mainz (Hoyer, Rh...","Hoyer, Rhea 0000-0003-2285-435X;Stavropoulos, ...",Python; Mathematica


In [15]:
df_clean.to_csv("data_zenodo.csv")