Reference: https://wiki.personaldata.io/w/api.php?action=help&modules=expandtemplates

In [1]:
import requests
import urllib.parse
import pandas as pd
import json

In [22]:
df_companies = pd.read_excel('_helpers/companies_short_list.xlsx')

In [19]:
# if you need to add companies
sparql = '''SELECT ?item ?itemLabel ?email ?country
WHERE
{
  {?item pdiot:P3 pdio:Q96; rdfs:label ?itemLabel.}
  UNION
  {?item pdiot:P3 pdio:Q991; rdfs:label ?itemLabel.}
  FILTER (CONTAINS(LCASE(?itemLabel), LOWERCASE_SLUG)).
  OPTIONAL{?item pdiot:P55 ?country}
  OPTIONAL{?item pdiot:P17 ?email .}
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],fr". }
}'''

In [23]:
# to query wiki.personaldata.io for 1 item

def getText(q, country):
    # gdpr
    template = 'MailtoAccess'
    if country == 'CH':
        template = 'MailtoSwissAccess'
        
    data = {'key': q, 'data': None}
    endpoint = 'https://wiki.personaldata.io/w/api.php'

    load = {
        'action': 'expandtemplates',
        'text': '{{' + template + '|' + q + '}}',
        'format': 'json',
        'prop': 'wikitext'
    }
    try:
        response = requests.post(endpoint, data=load)
    except requests.exceptions.RequestException as e: # on catch tout
        print (e)
        return False
    if response:
        data['data'] = response.json()['expandtemplates']['wikitext']
    return data

In [26]:
items = []
for i, row in df_companies.iterrows():
    items.append(getText(row['key'], row['country']))

In [27]:
df = pd.DataFrame(items)

In [53]:
def getRecipient(text):
    mailto, subject = urllib.parse.parse_qsl(text)[0]
    return subject.replace("Demande d'accès", "").replace('Subject Access Request to', '').strip()

def getMailto(text):
    mailto, subject = urllib.parse.parse_qsl(text)[0]
    return mailto.split('?')[0]
df['Recipient'] = df['data'].apply(getRecipient)

In [54]:
df['Mailto'] = df['data'].apply(getMailto)

In [55]:
df['Text-unquoted'] = df['data'].apply(lambda x: urllib.parse.unquote(x).split('&body=')[1])

In [56]:
# ugly, todo: expand it
df['subject'] = df['Text-unquoted'].apply(lambda x: x.split('\n\n')[0] if x[2] == 'm' else 'Subject Access Request')

In [57]:
df['message'] = df.apply(lambda row: row['Text-unquoted'].replace(row['subject'], '').strip(), axis=1)

In [58]:
df['recipient'] = df['Mailto'].apply(lambda x: x.replace('mailto:', ''))

In [59]:
df.sort_values('Recipient', inplace=True)

In [60]:
jsondata = {'data': df.to_dict(orient='records')}

In [61]:
with open('data/companies.json', 'w') as fp:
    json.dump(jsondata, fp)

In [62]:
df

Unnamed: 0,key,data,Recipient,Mailto,Text-unquoted,subject,message,recipient
3,Q1243,mailto:dpo@blizzard.com?subject=Subject%20Acce...,Blizzard Entertainment,mailto:dpo@blizzard.com,"Dear Blizzard Entertainment,\n\nThis is a tran...",Subject Access Request,"Dear Blizzard Entertainment,\n\nThis is a tran...",dpo@blizzard.com
7,Q1938,mailto:datenschutz@sbb.ch?subject=Demande%20d%...,CFF,mailto:datenschutz@sbb.ch,Demande d'accès aux données personnelles (art....,Demande d'accès aux données personnelles (art....,"Cher CFF,\n\nMadame, Monsieur,\n\nPar la prése...",datenschutz@sbb.ch
5,Q241,mailto:datarequests@support.facebook.com?subje...,Facebook,mailto:datarequests@support.facebook.com,"Dear Facebook,\n\nThis is a transparency reque...",Subject Access Request,"Dear Facebook,\n\nThis is a transparency reque...",datarequests@support.facebook.com
9,Q3345,mailto:datenschutz@intrum.ch?subject=Demande%2...,Intrum Justitia,mailto:datenschutz@intrum.ch,Demande d'accès aux données personnelles (art....,Demande d'accès aux données personnelles (art....,"Cher Intrum Justitia,\n\nMadame, Monsieur,\n\n...",datenschutz@intrum.ch
10,Q1185,mailto:protectiondonnees@letemps.ch?subject=De...,Le Temps,mailto:protectiondonnees@letemps.ch,Demande d'accès aux données personnelles (art....,Demande d'accès aux données personnelles (art....,"Cher Le Temps,\n\nMadame, Monsieur,\n\nPar la ...",protectiondonnees@letemps.ch
1,Q1010,mailto:postfinance@postfinance.ch?subject=Dema...,PostFinance,mailto:postfinance@postfinance.ch,Demande d'accès aux données personnelles (art....,Demande d'accès aux données personnelles (art....,"Cher PostFinance,\n\nMadame, Monsieur,\n\nPar ...",postfinance@postfinance.ch
11,Q3531,mailto:datenschutz@ringieraxelspringer.ch?subj...,Ringier Axel Springer Switzerland SA,mailto:datenschutz@ringieraxelspringer.ch,Demande d'accès aux données personnelles (art....,Demande d'accès aux données personnelles (art....,"Cher Ringier Axel Springer Switzerland SA,\n\n...",datenschutz@ringieraxelspringer.ch
0,Q988,mailto:datenschutz@swisscom.com?subject=Demand...,Swisscom,mailto:datenschutz@swisscom.com,Demande d'accès aux données personnelles (art....,Demande d'accès aux données personnelles (art....,"Cher Swisscom,\n\nMadame, Monsieur,\n\nPar la ...",datenschutz@swisscom.com
6,Q1022,mailto:privacy@gotinder.com?subject=Subject%20...,Tinder,mailto:privacy@gotinder.com,"Dear Tinder,\n\nThis is a transparency request...",Subject Access Request,"Dear Tinder,\n\nThis is a transparency request...",privacy@gotinder.com
2,Q1011,mailto:twint@twint.ch?subject=Demande%20d%27ac...,Twint,mailto:twint@twint.ch,Demande d'accès aux données personnelles (art....,Demande d'accès aux données personnelles (art....,"Cher Twint,\n\nMadame, Monsieur,\n\nPar la pré...",twint@twint.ch
