After exporting your CSV from OpenRefine, use this code to turn your data into text that QuickStatements can process.

In [None]:
import pandas as pd

INPUT_CSV   = "exported_openrefine.csv"
OUTPUT_FILE = "quickstatements_file.txt"

# Your CSV should include a "Description" column; the script uses it for the Den (description) on each item.
# If Description is empty, the fallback "Contributor to Print from WPHP" is used.

# --- Wikidata property IDs (from the Property table) ---
P_INSTANCE   = "P31"    # instance of
Q_HUMAN      = "Q5"     # human
P_DATE_BIRTH = "P569"   # date of birth
P_DATE_DEATH = "P570"   # date of death
P_FLORUIT    = "P1317"  # floruit
P_VIAF       = "P214"   # VIAF ID
P_WPHP_ID    = "P9780"  # The WPHP (Women's Print History Project) Person ID

# --- PROPERTIES: comment out any line to disable that property in the output ---
INCLUDE_INSTANCE_OF   = True   # P31 (Human: Q5)
INCLUDE_DATE_OF_BIRTH = True   # P569
INCLUDE_DATE_OF_DEATH = True   # P570
INCLUDE_FLORUIT       = True   # P1317
INCLUDE_VIAF          = True   # P214
INCLUDE_WPHP_ID       = True   # P9780

# --- CSV column names for each property (change if your CSV uses different headers) ---
COL_DATE_OF_BIRTH = "dob"
COL_DATE_OF_DEATH = "dod"
COL_FLORUIT       = "Floruit"   # add this column to your CSV if you use floruit
COL_VIAF          = "viaf_id_clean"
COL_WPHP_ID       = "id"

def _clean_str(val):
    s = str(val).replace(".0", "").strip()
    return s if s and s.lower() != "nan" else ""

def _format_date(val):
    """Pass through date string; QuickStatements expects +YYYY-MM-DD or +YYYY."""
    s = _clean_str(val)
    if not s:
        return ""
    return s if s.startswith("+") else "+" + s

df = pd.read_csv(INPUT_CSV)

with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
    for index, row in df.iterrows():
        
        name = str(row['Full Name']).strip()
        
        desc_raw = str(row['Description'])
        if desc_raw.lower() == 'nan' or desc_raw.strip() == '':
            desc = "Contributor to Print from WPHP"
        else:
            desc = desc_raw.strip()
            
        # viaf id must be a string
        viaf = _clean_str(row.get(COL_VIAF, ""))
        
        f.write("CREATE\n")
        line_label = ["LAST", "Len", f'"{name}"']
        f.write("\t".join(line_label) + "\n")
        
        line_desc = ["LAST", "Den", f'"{desc}"']
        f.write("\t".join(line_desc) + "\n")
        
        if INCLUDE_INSTANCE_OF:
            line_instance = ["LAST", P_INSTANCE, Q_HUMAN]
            f.write("\t".join(line_instance) + "\n")
        
        if INCLUDE_DATE_OF_BIRTH and COL_DATE_OF_BIRTH in row.index:
            dob = _format_date(row.get(COL_DATE_OF_BIRTH))
            if dob:
                f.write("\t".join(["LAST", P_DATE_BIRTH, dob]) + "\n")
        
        if INCLUDE_DATE_OF_DEATH and COL_DATE_OF_DEATH in row.index:
            dod = _format_date(row.get(COL_DATE_OF_DEATH))
            if dod:
                f.write("\t".join(["LAST", P_DATE_DEATH, dod]) + "\n")
        
        if INCLUDE_FLORUIT and COL_FLORUIT in row.index:
            floruit = _format_date(row.get(COL_FLORUIT))
            if floruit:
                f.write("\t".join(["LAST", P_FLORUIT, floruit]) + "\n")
        
        # viaf if it exists
        if INCLUDE_VIAF and viaf:
            line_viaf = ["LAST", P_VIAF, f'"{viaf}"']
            f.write("\t".join(line_viaf) + "\n")
        
        if INCLUDE_WPHP_ID and COL_WPHP_ID in row.index:
            wphp_id = _clean_str(row.get(COL_WPHP_ID))
            if wphp_id:
                f.write("\t".join(["LAST", P_WPHP_ID, f'"{wphp_id}"']) + "\n")

print(f"DONE! File saved as '{OUTPUT_FILE}'")

DONE! File saved as 'fixed_batch.txt'
