In [1]:
from wikibaseintegrator.models import Qualifiers, References, Reference
from wikibaseintegrator import WikibaseIntegrator
from wikibaseintegrator import wbi_login
from wikibaseintegrator import datatypes
from wikibaseintegrator.wbi_config import config as wbi_config
from os.path import join, isfile, exists, isdir, abspath, pardir
from os import listdir, getcwd
import pandas as pd
from wikibaseintegrator.datatypes import (URL, CommonsMedia, ExternalID, Form, GeoShape, GlobeCoordinate, Item, Lexeme, Math, MonolingualText, MusicalNotation, Property, Quantity, Sense, String, TabularData, Time)
from wikibaseintegrator.wbi_enums import ActionIfExists, WikibaseDatePrecision, WikibaseRank, WikibaseSnakType
from wikibaseintegrator.wbi_exceptions import MissingEntityException, ModificationFailed, MWApiError

In [2]:
# Default variables
WDUSER, WDPASS = "Admin", "90J8XXXaO4Sr9^^Z"
# WDUSER, WDPASS = "Mohammadzainabbas", "fHh!%shFa6^h"

In [3]:
parent_dir = abspath(join(getcwd(), pardir))
data_dir = join(parent_dir, "data")
column_mapping = join(data_dir, "column_mapping.csv")
project_mapping = join(data_dir, "project_mapping.csv")
team_mapping = join(data_dir, "team_mapping.csv")

In [4]:
def setup_config():
    """
    Set up WBI config to use local docker installation
    """
    wbi_config['MEDIAWIKI_API_URL'] = 'http://139.144.66.193:8181/api.php'
    wbi_config['SPARQL_ENDPOINT_URL'] = '"http://139.144.66.193:8282/proxy/wdqs/bigdata/namespace/wdq/sparql"'
    wbi_config['WIKIBASE_URL'] = 'http://wikibase.svc'

In [5]:
setup_config()

In [6]:
login = wbi_login.Login(user=WDUSER, password=WDPASS)



In [7]:
wbi = WikibaseIntegrator(login=login)

In [13]:
df = pd.read_csv(column_mapping)
p_df = pd.read_csv(project_mapping)
t_df = pd.read_csv(team_mapping)
#merge the two dataframes
df = df.merge(p_df, on="column", how="inner")
df = df.merge(t_df, on="column", how="inner")
df.head()

Unnamed: 0,column,prop,data_type_x,type_x,mapping_code_x,description_x,alias_x,data_type_y,type_y,mapping_code_y,description_y,alias_y


In [9]:
column_names = df.columns.to_list()

In [10]:
def create_mapping(file_name):
    # Read .csv file to create mappings
    __df = pd.read_csv(join(file_name))
    
    if __df.empty: return

    columns_with_mappings = list(__df.T.to_dict().values())

    for item in columns_with_mappings:
        __col_name = item['column']
        __data_type = item['data_type']
        __type = item['type']
        __mapping_code = item['mapping_code']
        __description = item['description']
        __alias = item['alias']

        try:

            p = wbi.property.new(datatype=__data_type)
            p.labels.set('en', __col_name)
            p.descriptions.set('en', __description)
            p.aliases.set('en', __alias)

            res = p.write()

            ident = [x for x in str(res).split('\n') if "_id='P" in x]
            if len(ident) == 1:
                prop_code = ident[0].split("'")[1]
                print("Column '{}' is mapped to '{}'".format(__col_name, prop_code))
            else:
                raise Exception("Surprise, this method didn't work.")
        
        except ModificationFailed as e:
            print("Property '{}' already exists".format(__col_name))
            continue

In [11]:
team_mapping = join(data_dir, "team_mapping.csv")
create_mapping(team_mapping)

Column 'Id' is mapped to 'P1'
Column 'Name' is mapped to 'P2'
Column 'Family Name' is mapped to 'P3'
Column 'Birthdate' is mapped to 'P4'
Column 'Nationality' is mapped to 'P5'
Column 'Category' is mapped to 'P6'
Column 'Start date' is mapped to 'P7'
Column 'End date' is mapped to 'P8'


In [12]:
project_mapping = join(data_dir, "project_mapping.csv")
create_mapping(project_mapping)

Column 'Client Type' is mapped to 'P9'
Column 'Client' is mapped to 'P10'
Column 'Project' is mapped to 'P11'
Column 'Project Type' is mapped to 'P12'
Column 'Task' is mapped to 'P13'
Column 'Owner' is mapped to 'P14'
Column 'Member' is mapped to 'P15'
Column 'Duration (in days)' is mapped to 'P16'
Column 'Project Start date' is mapped to 'P17'
Column 'Project End date' is mapped to 'P18'
