Reference: https://wiki.personaldata.io/w/api.php?action=help&modules=expandtemplates

In [1]:
import requests
import urllib.parse
import pandas as pd
import json

In [2]:
# manual list of “static” properties
properties = ['Q988', 'Q1010', 'Q1011', 'Q2581', 'Q1243']

In [3]:
# a bit more
properties.extend(['Q'+str(i) for i in range(1012, 1014)])

In [5]:
# to query wiki.personaldata.io for 1 item

def getText(q):
    data = {'key': q, 'data': None}
    endpoint = 'https://wiki.personaldata.io/w/api.php'

    load = {
        'action': 'expandtemplates',
        'text': '{{MailtoSwissAccess|' + q + '}}',
        'format': 'json',
        'prop': 'wikitext'
    }
    try:
        response = requests.post(endpoint, data=load)
    except requests.exceptions.RequestException as e: # on catch tout
        print (e)
        return False
    if response:
        data['data'] = response.json()['expandtemplates']['wikitext']
    return data

In [6]:
items = []

In [7]:
for prop in properties:
    items.append(getText(prop))

In [8]:
df = pd.DataFrame(items)

In [9]:
def getRecipient(text):
    mailto, subject = urllib.parse.parse_qsl(text)[0]
    return subject.replace("Demande d'accès", "").strip()

def getMailto(text):
    mailto, subject = urllib.parse.parse_qsl(text)[0]
    return mailto.split('?')[0]
df['Recipient'] = df['data'].apply(getRecipient)

In [10]:
df['Mailto'] = df['data'].apply(getMailto)

In [11]:
df['Text-unquoted'] = df['data'].apply(lambda x: urllib.parse.unquote(x).split('&body=')[1])

In [12]:
df['subject'] = df['Text-unquoted'].apply(lambda x: x.split('\n\n')[0])

In [13]:
df['message'] = df.apply(lambda row: row['Text-unquoted'].replace(row['subject'], '').strip(), axis=1)

In [22]:
df.sort_values('Recipient', inplace=True)

In [23]:
jsondata = {'data': df.to_dict(orient='records')}

In [24]:
with open('data/companies.json', 'w') as fp:
    json.dump(jsondata, fp)