In [1]:
!pip install pywikibot



In [9]:
import pywikibot
import json
from pywikibot.family import Family

class RijksmuseumFamily(Family):
    name = 'rijksmuseum'
    
    @classmethod
    def __post_init__(cls):
        cls.langs = {
            'en': 'rijksmuseum-kg.wikibase.cloud'
        }

    def protocol(self, code):
        return 'https'
    
    def scriptpath(self, code):
        return '/w'
        
    def interface(self, code):
        return 'DataSite'

def setup_connection():
    """Setup connection with proper authentication"""
    try:
        # Add the family to pywikibot's families
        pywikibot.config.family_files['rijksmuseum'] = 'rijksmuseum_family.py'
        pywikibot.Family._families['rijksmuseum'] = RijksmuseumFamily()

        # Configure pywikibot
        pywikibot.config.usernames['rijksmuseum']['en'] = 'Le a'  # Replace with actual username
        pywikibot.config.password_file = "user-password.py"
        
        # Initialize site and login
        site = pywikibot.Site('en', 'rijksmuseum')
        site.login()
        
        return site
    except Exception as e:
        print(f"Error setting up connection: {e}")
        raise

# Define property datatypes that are definitely supported by Wikibase
LIDO_MAPPINGS = {
    "lido:objectID": {
        "label": "inventory number",
        "description": "Unique identifier for the artwork (equivalent to Wikidata P217)",
        "datatype": "external-id",  # Changed from string
        "wikidata_equiv": "P217",
        "extract_path": ["lido:objectIdentificationWrap", "lido:repositoryWrap", "lido:repositorySet", "lido:workID", "#text"]
    },
    "lido:titleSet": {
        "label": "title",
        "description": "Title of the artwork (equivalent to Wikidata P1476)",
        "datatype": "text",  # Changed from monolingualtext
        "wikidata_equiv": "P1476",
        "extract_path": ["lido:objectIdentificationWrap", "lido:titleWrap", "lido:titleSet", 0, "lido:appellationValue", 0, "#text"]
    },
    "lido:eventActor": {
        "label": "creator",
        "description": "The creator of the artwork (equivalent to Wikidata P170)",
        "datatype": "wikibase-item",
        "wikidata_equiv": "P170",
        "extract_path": ["lido:eventWrap", "lido:eventSet", 0, "lido:event", "lido:eventActor", "lido:actorInRole", "lido:actor", "lido:nameActorSet", "lido:appellationValue", 0, "#text"]
    },
    "lido:eventDate": {
        "label": "date of creation",
        "description": "The date when the artwork was created (equivalent to Wikidata P571)",
        "datatype": "string",  # Changed from time
        "wikidata_equiv": "P571",
        "extract_path": ["lido:eventWrap", "lido:eventSet", 0, "lido:event", "lido:eventDate", "lido:date", "lido:earliestDate"]
    },
    "lido:eventPlace": {
        "label": "place of creation",
        "description": "The place where the artwork was created (equivalent to Wikidata P1071)",
        "datatype": "wikibase-item",
        "wikidata_equiv": "P1071",
        "extract_path": ["lido:eventWrap", "lido:eventSet", 0, "lido:event", "lido:eventPlace", "lido:place", "lido:namePlaceSet", "lido:appellationValue", 0, "#text"]
    }
}

def get_nested_value(data, path):
    """Safely navigate nested dictionary using a path list"""
    for key in path:
        if isinstance(key, int) and isinstance(data, list):
            if len(data) > key:
                data = data[key]
            else:
                return None
        elif isinstance(data, dict) and key in data:
            data = data[key]
        else:
            return None
    return data

def create_property_if_missing(repo, lido_class):
    """Create a property based on LIDO class mapping if it doesn't exist"""
    mapping = LIDO_MAPPINGS[lido_class]
    
    try:
        # Create new property with explicit datatype
        new_property = pywikibot.PropertyPage(repo)
        data = {
            "datatype": mapping['datatype'],
            "labels": {
                "en": {"language": "en", "value": mapping['label']}
            },
            "descriptions": {
                "en": {"language": "en", "value": mapping['description']}
            }
        }
        
        summary = f"Creating new property for {lido_class}"
        new_property.editEntity(data, summary=summary)
        print(f"Created property: {mapping['label']} ({new_property.id})")
        return new_property.id
        
    except Exception as e:
        print(f"Error creating property: {e}")
        raise

def create_item(repo, label, description=""):
    """Create a new item"""
    try:
        new_item = pywikibot.ItemPage(repo)
        data = {
            "labels": {
                "en": {"language": "en", "value": label}
            }
        }
        if description:
            data["descriptions"] = {
                "en": {"language": "en", "value": description}
            }
        summary = f"Creating new item for {label}"
        new_item.editEntity(data, summary=summary)
        return new_item
    except Exception as e:
        print(f"Error creating item: {e}")
        raise

def process_lido_records(repo, records, properties):
    """Process LIDO records and create Wikibase items"""
    for record in records:
        try:
            # Get descriptive metadata
            desc_metadata = record.get("lido:descriptiveMetadata", {})
            
            # Extract title
            title = get_nested_value(desc_metadata, LIDO_MAPPINGS["lido:titleSet"]["extract_path"]) or "Untitled artwork"
            
            # Create artwork item
            artwork = create_item(repo, title, "Artwork from LIDO dataset")

            # Add claims based on available data
            for lido_class, mapping in LIDO_MAPPINGS.items():
                if lido_class in properties:
                    value = get_nested_value(desc_metadata, mapping["extract_path"])
                    if value:
                        claim = pywikibot.Claim(repo, properties[lido_class])
                        
                        if mapping["datatype"] == "text":
                            claim.setTarget(str(value))
                        elif mapping["datatype"] == "external-id":
                            claim.setTarget(str(value))
                        elif mapping["datatype"] == "wikibase-item":
                            value_item = create_item(repo, value, f"Created for {mapping['label']}")
                            claim.setTarget(value_item)
                        else:  # string
                            claim.setTarget(str(value))
                            
                        artwork.addClaim(claim)

            print(f"Successfully uploaded record: {title}")

        except Exception as e:
            print(f"Error uploading record: {e}")

def main():
    try:
        # Initialize connection with authentication
        print("Initializing connection to Wikibase...")
        site = setup_connection()
        repo = site.data_repository()
        
        print("Starting data upload...")
        
        # Create properties
        print("Creating/verifying properties...")
        properties = {}
        for lido_class in LIDO_MAPPINGS.keys():
            try:
                prop_id = create_property_if_missing(repo, lido_class)
                if prop_id:
                    properties[lido_class] = prop_id
                    print(f"Successfully mapped {lido_class} to property {prop_id}")
            except Exception as e:
                print(f"Failed to create/verify property for {lido_class}: {e}")
                continue

        # Load data
        print("Loading data file...")
        with open("s1_data.json", "r", encoding='utf-8') as file:
            data = json.load(file)

        # Process records
        print("Processing records...")
        records = data.get("records", {}).get("lido:lido", [])
        if not records:
            print("No records found in the data")
            return

        process_lido_records(repo, records, properties)

    except Exception as e:
        print(f"Fatal error: {e}")
        raise e

if __name__ == "__main__":
    main()

Initializing connection to Wikibase...


Logging in to rijksmuseum:en as Le a


Starting data upload...
Creating/verifying properties...
Error creating property: "datatype" is required for new property.
Failed to create/verify property for lido:objectID: "datatype" is required for new property.
Error creating property: "datatype" is required for new property.
Failed to create/verify property for lido:titleSet: "datatype" is required for new property.
Error creating property: "datatype" is required for new property.
Failed to create/verify property for lido:eventActor: "datatype" is required for new property.
Error creating property: "datatype" is required for new property.
Failed to create/verify property for lido:eventDate: "datatype" is required for new property.
Error creating property: "datatype" is required for new property.
Failed to create/verify property for lido:eventPlace: "datatype" is required for new property.
Loading data file...
Processing records...


Sleeping for 9.2 seconds, 2025-01-12 20:17:13


Successfully uploaded record: Prescott H. Butler


Sleeping for 9.1 seconds, 2025-01-12 20:17:23


Successfully uploaded record: Rt. Hon. W. E. Gladstone
