In [None]:
import requests
import json

In [None]:
# Select fields
fields = ['itemid',
          'applicability',
          'application',
          'appno',
          'article',
          'conclusion',
          'decisiondate',
          'docname',
          'documentcollectionid',
          'documentcollectionid2',
          'doctype',
          'doctypebranch',
          'ecli',
          'externalsources',
          'extractedappno',
          'importance',
          'introductiondate',
          'isplaceholder',
          'issue',
          'judgementdate',
          'kpdate',
          'kpdateAsText',
          'kpthesaurus',
          'languageisocode',
          'meetingnumber',
          'originatingbody',
          'publishedby',
          'Rank',
          'referencedate',
          'reportdate',
          'representedby',
          'resolutiondate',
          'resolutionnumber',
          'respondent',
          'respondentOrderEng',
          'rulesofcourt',
          'separateopinion',
          'scl',
          'sharepointid',
          'typedescription',
          'nonviolation',
          'violation']
#TO DO: field selection to prevent automated download of all fields
#TO DO: documentcollectionid2 selection to allow choosing which document type to include. Judgments and Communicatedcases are currently included

# sorting by itemid makes the download systematic since this number always increases with new documents in hudoc.
META_URL = 'http://hudoc.echr.coe.int/app/query/results' \
      '?query=(contentsitename=ECHR) AND (documentcollectionid2:"JUDGMENTS" OR documentcollectionid2:"COMMUNICATEDCASES")' \
      '&select={select}' + \
      '&sort=itemid Ascending' + \
      '&start={start}&length={length}'
META_URL = META_URL.replace(' ', '%20')
META_URL = META_URL.replace('"', '%22') #example url: "https://hudoc.echr.coe.int/app/query/results?query=(contentsitename=ECHR)%20AND%20(documentcollectionid2:%22JUDGMENTS%22%20OR%20documentcollectionid2:%22COMMUNICATEDCASES%22)&select=itemid,applicability,application,appno,article,conclusion,decisiondate,docname,documentcollectionid,%20documentcollectionid2,doctype,doctypebranch,ecli,externalsources,extractedappno,importance,introductiondate,%20isplaceholder,issue,judgementdate,kpdate,kpdateAsText,kpthesaurus,languageisocode,meetingnumber,%20originatingbody,publishedby,Rank,referencedate,reportdate,representedby,resolutiondate,%20resolutionnumber,respondent,respondentOrderEng,rulesofcourt,separateopinion,scl,sharepointid,typedescription,%20nonviolation,violation&sort=itemid%20Ascending&start=0&length=2"
META_URL

In [None]:
def get_echr_metadata(meta_url, start, length, filename):
    end = start+length
    data = []
    
    if start+length > 500: #HUDOC does not allow fetching more than 500 items at the same time
        for i in range(start,length,500):
            print(" - Fetching information from cases {} to {}.".format(i, i+500))
            
            # Fromat URL based on the incremented index
            url = META_URL.format(select=','.join(fields),start=i,length=500)
            r = requests.get(url)
            
            # Get the results list
            temp_dict = r.json()['results']
            
            # Get every doc from the results list
            for result in temp_dict:
                data.append(result['columns'])
    elif start+length <= 500:
        # Format URL based on start and length
        url = META_URL.format(select=','.join(fields),start=start,length=length)
        r = requests.get(url)
        
        # Get the results list
        temp_dict = r.json()['results']

        # Get every doc from the results list
        for result in temp_dict:
            data.append(result['columns'])
    else:
        print("Something is going wrong. Check the code")  

    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(data, f) #this doesn't work in subsequent analyses, probably because multiple dictionaries are dumped

    print(f'Requested ECHR metadata are fetched and written to {filename}')

In [None]:
get_echr_metadata(META_URL, 0, 8, "ECHRrrrrr.json") 