In [354]:
# Some utils
from datetime import datetime
import re


def now_in_string():
    return datetime.now().strftime("%H:%M:%S")


def get_type_and_tribunal(long_name):
    """
    Parses the name of each tribunal, and returns the name and the speciality
    """
    result = re.search('([\s\w.]*) - ([\w]*)', long_name)
    return result.groups()

In [355]:
import urllib.parse

import requests
from bs4 import BeautifulSoup


def get_all_specialties():
    DOCENTES_URL = "https://ceice.gva.es/auto/Actas/"
    r = requests.get(DOCENTES_URL)
    bs_content = BeautifulSoup(r.text, 'lxml')
    table = bs_content.find('table', attrs={'id': 'indexlist'})
    rows = table.findChildren('tr')
    
    structured_rows = []
    for row in rows:
        if not row.get('class')[0] in ['even', 'odd']:
            continue
        cols = row.findChildren('td')
        name = cols[1].a.get_text().strip().replace('.', '').replace('/', '')
        structured = {
            'name': name,
            'link': urllib.parse.urljoin(DOCENTES_URL, cols[1].a.get('href')).strip(),
            'modified': cols[2].get_text().strip(),
        }
        structured_rows.append(structured)

    return structured_rows

specialties = get_all_specialties()

In [356]:
import os
from pathlib import Path


def get_all_tribunals(speciality_row):
    speciality_url = speciality_row['link']
    
    r = requests.get(speciality_url)
    bs_content = BeautifulSoup(r.text, 'lxml')

    table = bs_content.find('table', attrs={'id': 'indexlist'})
    rows = table.findChildren('tr')
    
    structured_rows = []
    for row in rows:
        if not row.get('class')[0] in ['even', 'odd']:
            continue
        cols = row.findChildren('td')
        name = cols[1].a.get_text().strip().replace('.', '')
        
        structured = {
            'type': get_type_and_tribunal(name)[0].strip(),
            'tribunal': get_type_and_tribunal(name)[1],
            'name': name,
            'link': urllib.parse.urljoin(speciality_url, cols[1].a.get('href')).strip(),
            'modified': cols[2].get_text().strip(),
        }
        structured_rows.append(structured)

    return structured_rows


def get_all_files(structured_row, exclude_pattern=None):
    url_files = structured_row['link']
    r = requests.get(url_files)
    bs_content = BeautifulSoup(r.text, 'lxml')
    table = bs_content.find('table', attrs={'id': 'indexlist'})
    rows = table.findChildren('tr')
    
    structured_rows = []
    for row in rows:
        if not row.get('class')[0] in ['even', 'odd']:
            continue
        cols = row.findChildren('td')
        name = cols[1].a.get_text().strip()
        
        # Exclude provisional files, in the second round
        if exclude_pattern and exclude_pattern in name:
            continue

        structured = {
            '_tribunal': structured_row,
            'name': name,
            'link': urllib.parse.urljoin(url_files, cols[1].a.get('href')),
            'modified': cols[2].get_text().strip(),
        }
        structured_rows.append(structured)

    return structured_rows

def download_file(structured_row):
    tribunal_type = structured_row['_tribunal']['type']
    tribunal = structured_row['_tribunal']['tribunal']
    modified = structured_row['modified'].replace(':', '_').replace('-', '_')
    url = structured_row['link']
    name = Path(structured_row['name'])
    name_wo_ext = name.with_suffix('')
    name_ext = name.suffix
    file_name = f"{tribunal} - {name_wo_ext} {modified}{name_ext}"
    final_name = f"{tribunal_type}/{file_name}"
        
    r_doc = requests.get(url)
    open(final_name, 'wb').write(r_doc.content)
    print(f"\t{file_name}")

    return final_name


for speciality in specialties:
    tribunals = get_all_tribunals(speciality)
    print(f"===== START {speciality['name']}")
    
    for idx, tribunal in enumerate(tribunals):
        # Create the directory for storing the PDFs
        tribunal_type = tribunal['type']
        exists = os.path.exists(tribunal_type)
        if not exists:
            os.makedirs(tribunal_type)
        
        counter_str = f"{idx}/{len(tribunals)}"
        print(f"{now_in_string()} ({counter_str}) {tribunal['type']} {tribunal['tribunal']}")
        
        # Uncomment this line if you only want to download the final files
        # files = get_all_files(tribunal, exclude_pattern="Prov")
        files = get_all_files(tribunal, exclude_pattern=None)
        
        for file in files:
            download_file(file)
        
    print(f"===== END {speciality['name']}")


===== START 128_EDUCACIO PRIMARIA
22:33:06 (0/135) EDUCACIO PRIMARIA V16
	V16 - DataConvocatoria2DID 2022_07_04 19_15.pdf
	V16 - ActaNotes1Aprovats 2022_07_01 10_22.pdf
	V16 - ActaNotes1Definitiva 2022_07_01 10_22.pdf
	V16 - ActaNotes1AprovatsProv 2022_06_29 11_39.pdf
	V16 - ActaNotes1Provisional 2022_06_29 11_38.pdf
22:33:07 (1/135) EDUCACIO PRIMARIA V26
	V26 - DataConvocatoria2DID 2022_07_04 18_57.pdf
	V26 - ActaNotes1Aprovats 2022_07_01 10_13.pdf
	V26 - ActaNotes1Definitiva 2022_07_01 10_13.pdf
	V26 - ActaNotes1Provisional 2022_06_29 11_31.pdf
	V26 - ActaNotes1AprovatsProv 2022_06_29 11_07.pdf
22:33:08 (2/135) EDUCACIO PRIMARIA V17
	V17 - DataConvocatoria2DID 2022_07_04 18_52.pdf
	V17 - ActaNotes1Aprovats 2022_07_01 10_25.pdf
	V17 - ActaNotes1Definitiva 2022_07_01 10_25.pdf
	V17 - ActaNotes1AprovatsProv 2022_06_29 12_42.pdf
	V17 - ActaNotes1Provisional 2022_06_29 12_41.pdf
22:33:09 (3/135) EDUCACIO PRIMARIA A43
	A43 - DataConvocatoria2DID 2022_07_04 18_42.pdf
	A43 - ActaNotes1Aprova

	A46 - ActaNotes1Aprovats 2022_07_01 10_22.pdf
	A46 - ActaNotes1Definitiva 2022_07_01 10_22.pdf
	A46 - ActaNotes1AprovatsProv 2022_06_29 12_07.pdf
	A46 - ActaNotes1Provisional 2022_06_29 12_06.pdf
22:33:38 (29/135) EDUCACIO PRIMARIA V1
	V1 - DataConvocatoria2DID 2022_07_04 13_31.pdf
	V1 - ActaNotes1Aprovats 2022_07_01 12_46.pdf
	V1 - ActaNotes1Definitiva 2022_07_01 12_45.pdf
	V1 - ActaNotes1AprovatsProv 2022_06_29 13_18.pdf
	V1 - ActaNotes1Provisional 2022_06_29 13_15.pdf
22:33:39 (30/135) EDUCACIO PRIMARIA A27
	A27 - DataConvocatoria2DID 2022_07_04 13_29.pdf
	A27 - ActaNotes1Aprovats 2022_07_01 10_39.pdf
	A27 - ActaNotes1Definitiva 2022_07_01 10_38.pdf
	A27 - ActaNotes1AprovatsProv 2022_06_29 11_11.pdf
	A27 - ActaNotes1Provisional 2022_06_29 11_11.pdf
22:33:40 (31/135) EDUCACIO PRIMARIA V57
	V57 - DataConvocatoria2DID 2022_07_04 12_43.pdf
	V57 - ActaNotes1Aprovats 2022_07_01 10_24.pdf
	V57 - ActaNotes1Definitiva 2022_07_01 10_24.pdf
	V57 - ActaNotes1AprovatsProv 2022_06_29 09_20.pdf
	

	A31 - DataConvocatoria2DID 2022_07_01 14_08.pdf
	A31 - ActaNotes1Aprovats 2022_07_01 10_43.pdf
	A31 - ActaNotes1Definitiva 2022_07_01 10_43.pdf
	A31 - ActaNotes1AprovatsProv 2022_06_29 11_40.pdf
	A31 - ActaNotes1Provisional 2022_06_29 11_39.pdf
22:34:10 (58/135) EDUCACIO PRIMARIA V4
	V4 - DataConvocatoria2DID 2022_07_01 14_07.pdf
	V4 - ActaNotes1Aprovats 2022_07_01 13_21.pdf
	V4 - ActaNotes1Definitiva 2022_07_01 13_19.pdf
	V4 - ActaNotes1AprovatsProv 2022_06_29 11_34.pdf
	V4 - ActaNotes1Provisional 2022_06_29 11_34.pdf
22:34:11 (59/135) EDUCACIO PRIMARIA V44
	V44 - DataConvocatoria2DID 2022_07_01 14_02.pdf
	V44 - ActaNotes1Aprovats 2022_07_01 12_04.pdf
	V44 - ActaNotes1Definitiva 2022_07_01 12_03.pdf
	V44 - ActaNotes1AprovatsProv 2022_06_29 12_40.pdf
	V44 - ActaNotes1Provisional 2022_06_29 12_39.pdf
22:34:12 (60/135) EDUCACIO PRIMARIA C8
	C8 - DataConvocatoria2DID 2022_07_01 14_01.pdf
	C8 - ActaNotes1Aprovats 2022_07_01 10_45.pdf
	C8 - ActaNotes1Definitiva 2022_07_01 10_45.pdf
	C8 - A

	V40 - DataConvocatoria2DID 2022_07_01 12_16.pdf
	V40 - ActaNotes1Aprovats 2022_07_01 10_47.pdf
	V40 - ActaNotes1Definitiva 2022_07_01 10_47.pdf
	V40 - ActaNotes1AprovatsProv 2022_06_29 10_25.pdf
	V40 - ActaNotes1Provisional 2022_06_29 10_24.pdf
22:34:44 (87/135) EDUCACIO PRIMARIA A2
	A2 - DataConvocatoria2DID 2022_07_01 12_16.pdf
	A2 - ActaNotes1Aprovats 2022_07_01 10_56.pdf
	A2 - ActaNotes1Definitiva 2022_07_01 10_56.pdf
	A2 - ActaNotes1AprovatsProv 2022_06_29 09_33.pdf
	A2 - ActaNotes1Provisional 2022_06_29 09_28.pdf
22:34:46 (88/135) EDUCACIO PRIMARIA V22
	V22 - DataConvocatoria2DID 2022_07_01 12_11.pdf
	V22 - ActaNotes1Aprovats 2022_07_01 10_14.pdf
	V22 - ActaNotes1Definitiva 2022_07_01 10_14.pdf
	V22 - ActaNotes1AprovatsProv 2022_06_29 10_27.pdf
	V22 - ActaNotes1Provisional 2022_06_29 10_25.pdf
22:34:47 (89/135) EDUCACIO PRIMARIA V37
	V37 - DataConvocatoria2DID 2022_07_01 12_08.pdf
	V37 - ActaNotes1Aprovats 2022_07_01 11_12.pdf
	V37 - ActaNotes1Definitiva 2022_07_01 11_11.pdf
	V3

	C2 - ActaNotes1AprovatsProv 2022_06_28 14_23.pdf
	C2 - ActaNotes1Provisional 2022_06_28 12_30.pdf
22:35:20 (115/135) EDUCACIO PRIMARIA V32
	V32 - DataConvocatoria2DID 2022_07_01 11_09.pdf
	V32 - ActaNotes1Aprovats 2022_07_01 10_42.pdf
	V32 - ActaNotes1Definitiva 2022_07_01 10_42.pdf
	V32 - ActaNotes1AprovatsProv 2022_06_29 13_38.pdf
	V32 - ActaNotes1Provisional 2022_06_29 13_36.pdf
22:35:21 (116/135) EDUCACIO PRIMARIA A16
	A16 - DataConvocatoria2DID 2022_07_01 11_09.pdf
	A16 - ActaNotes1Aprovats 2022_07_01 10_54.pdf
	A16 - ActaNotes1Definitiva 2022_07_01 10_53.pdf
	A16 - ActaNotes1AprovatsProv 2022_06_29 11_51.pdf
	A16 - ActaNotes1Provisional 2022_06_29 11_42.pdf
22:35:22 (117/135) EDUCACIO PRIMARIA C5
	C5 - DataConvocatoria2DID 2022_07_01 11_08.pdf
	C5 - ActaNotes1Aprovats 2022_07_01 10_33.pdf
	C5 - ActaNotes1Definitiva 2022_07_01 10_33.pdf
	C5 - ActaNotes1AprovatsProv 2022_06_28 12_33.pdf
	C5 - ActaNotes1Provisional 2022_06_28 12_33.pdf
22:35:24 (118/135) EDUCACIO PRIMARIA A28
	A28 

	V7 - DataConvocatoria2DID 2022_07_01 14_29.pdf
	V7 - ActaNotes1Definitiva 2022_07_01 13_30.pdf
	V7 - ActaNotes1Aprovats 2022_07_01 10_50.pdf
	V7 - ActaNotes1AprovatsProv 2022_06_29 12_07.pdf
	V7 - ActaNotes1Provisional 2022_06_29 12_06.pdf
22:35:55 (9/33) PEDAGOGIA TERAPEUTICA A10
	A10 - DataConvocatoria2DID 2022_07_01 13_46.pdf
	A10 - ActaNotes1Aprovats 2022_07_01 10_44.pdf
	A10 - ActaNotes1Definitiva 2022_07_01 10_44.pdf
	A10 - ActaNotes1AprovatsProv 2022_06_29 12_09.pdf
	A10 - ActaNotes1Provisional 2022_06_29 12_07.pdf
22:35:56 (10/33) PEDAGOGIA TERAPEUTICA A11
	A11 - DataConvocatoria2DID 2022_07_01 13_06.pdf
	A11 - ActaNotes1Aprovats 2022_07_01 10_51.pdf
	A11 - ActaNotes1Definitiva 2022_07_01 10_51.pdf
	A11 - ActaNotes1AprovatsProv 2022_06_30 07_51.pdf
	A11 - ActaNotes1Provisional 2022_06_29 12_11.pdf
22:35:57 (11/33) PEDAGOGIA TERAPEUTICA A4
	A4 - DataConvocatoria2DID 2022_07_01 12_34.pdf
	A4 - ActaNotes1Definitiva 2022_07_01 10_47.pdf
	A4 - ActaNotes1Aprovats 2022_07_01 10_46.pd

	V10 - DataConvocatoria1PRA 2022_06_18 15_47.pdf
22:36:27 (3/23) EDUCACIO FISICA V6
	V6 - DataConvocatoria2DID 2022_07_04 08_51.pdf
	V6 - ActaNotes1Aprovats 2022_07_01 11_11.pdf
	V6 - ActaNotes1Definitiva 2022_07_01 11_10.pdf
	V6 - ActaNotes1AprovatsProv 2022_06_29 09_16.pdf
	V6 - ActaNotes1Provisional 2022_06_29 09_16.pdf
	V6 - DataConvocatoria1PRA 2022_06_19 18_33.pdf
22:36:29 (4/23) EDUCACIO FISICA C3
	C3 - DataConvocatoria2DID 2022_07_04 08_50.pdf
	C3 - ActaNotes1Aprovats 2022_07_01 12_56.pdf
	C3 - ActaNotes1Definitiva 2022_07_01 11_12.pdf
	C3 - ActaNotes1AprovatsProv 2022_06_28 14_48.pdf
	C3 - ActaNotes1Provisional 2022_06_28 14_48.pdf
	C3 - DataConvocatoria1PRA 2022_06_21 08_52.pdf
22:36:30 (5/23) EDUCACIO FISICA C1
	C1 - DataConvocatoria2DID 2022_07_04 08_49.pdf
	C1 - ActaNotes1Aprovats 2022_07_01 10_21.pdf
	C1 - ActaNotes1Definitiva 2022_07_01 10_21.pdf
	C1 - ActaNotes1AprovatsProv 2022_06_28 13_53.pdf
	C1 - ActaNotes1Provisional 2022_06_28 13_52.pdf
	C1 - DataConvocatoria1PRA 

	V4 - ActaNotes1Provisional 2022_06_29 12_00.pdf
22:37:02 (6/18) AUDICIO I LLENGUATGE A6
	A6 - DataConvocatoria2DID 2022_07_04 13_44.pdf
	A6 - ActaNotes1Definitiva 2022_07_01 11_48.pdf
	A6 - ActaNotes1Aprovats 2022_07_01 11_48.pdf
	A6 - ActaNotes1AprovatsProv 2022_06_29 10_48.pdf
	A6 - ActaNotes1Provisional 2022_06_29 10_48.pdf
22:37:03 (7/18) AUDICIO I LLENGUATGE V5
	V5 - DataConvocatoria2DID 2022_07_04 12_42.pdf
	V5 - ActaNotes1Aprovats 2022_07_01 10_19.pdf
	V5 - ActaNotes1Definitiva 2022_07_01 10_19.pdf
	V5 - ActaNotes1AprovatsProv 2022_06_29 10_07.pdf
	V5 - ActaNotes1Provisional 2022_06_29 10_06.pdf
22:37:05 (8/18) AUDICIO I LLENGUATGE V6
	V6 - DataConvocatoria2DID 2022_07_04 10_04.pdf
	V6 - ActaNotes1Aprovats 2022_07_01 10_45.pdf
	V6 - ActaNotes1Definitiva 2022_07_01 10_45.pdf
	V6 - ActaNotes1AprovatsProv 2022_06_29 10_03.pdf
	V6 - ActaNotes1Provisional 2022_06_29 10_03.pdf
22:37:06 (9/18) AUDICIO I LLENGUATGE A5
	A5 - DataConvocatoria2DID 2022_07_04 08_24.pdf
	A5 - ActaNotes1Apro

	V58 - ActaNotes1Provisional 2022_06_29 10_06.pdf
22:37:35 (16/147) EDUCACIO INFANTIL C18
	C18 - DataConvocatoria2DID 2022_07_01 14_06.pdf
	C18 - ActaNotes1Aprovats 2022_07_01 11_08.pdf
	C18 - ActaNotes1Definitiva 2022_07_01 11_07.pdf
	C18 - ActaNotes1AprovatsProv 2022_06_28 16_54.pdf
	C18 - ActaNotes1Provisional 2022_06_28 16_53.pdf
22:37:37 (17/147) EDUCACIO INFANTIL V52
	V52 - DataConvocatoria2DID 2022_07_01 13_44.pdf
	V52 - ActaNotes1Aprovats 2022_07_01 12_46.pdf
	V52 - ActaNotes1Definitiva 2022_07_01 12_46.pdf
	V52 - ActaNotes1AprovatsProv 2022_06_29 09_53.pdf
	V52 - ActaNotes1Provisional 2022_06_29 07_58.pdf
22:37:38 (18/147) EDUCACIO INFANTIL V34
	V34 - DataConvocatoria2DID 2022_07_01 13_39.pdf
	V34 - ActaNotes1Aprovats 2022_07_01 11_10.pdf
	V34 - ActaNotes1Definitiva 2022_07_01 11_07.pdf
	V34 - ActaNotes1AprovatsProv 2022_06_29 09_40.pdf
	V34 - ActaNotes1Provisional 2022_06_29 09_39.pdf
22:37:39 (19/147) EDUCACIO INFANTIL V57
	V57 - DataConvocatoria2DID 2022_07_01 13_32.pdf
	V5

	V67 - ActaNotes1Definitiva 2022_07_01 11_09.pdf
	V67 - ActaNotes1AprovatsProv 2022_06_29 08_46.pdf
	V67 - ActaNotes1Provisional 2022_06_29 08_46.pdf
22:38:09 (45/147) EDUCACIO INFANTIL V62
	V62 - DataConvocatoria2DID 2022_07_01 12_15.pdf
	V62 - ActaNotes1Definitiva 2022_07_01 10_28.pdf
	V62 - ActaNotes1Aprovats 2022_07_01 10_27.pdf
	V62 - ActaNotes1AprovatsProv 2022_06_29 08_44.pdf
	V62 - ActaNotes1Provisional 2022_06_29 08_44.pdf
22:38:10 (46/147) EDUCACIO INFANTIL V63
	V63 - DataConvocatoria2DID 2022_07_01 12_15.pdf
	V63 - ActaNotes1Aprovats 2022_07_01 12_09.pdf
	V63 - ActaNotes1Definitiva 2022_07_01 10_28.pdf
	V63 - ActaNotes1AprovatsProv 2022_06_29 08_45.pdf
	V63 - ActaNotes1Provisional 2022_06_29 08_45.pdf
22:38:11 (47/147) EDUCACIO INFANTIL A33
	A33 - DataConvocatoria2DID 2022_07_01 12_14.pdf
	A33 - ActaNotes1Aprovats 2022_07_01 11_07.pdf
	A33 - ActaNotes1Definitiva 2022_07_01 11_07.pdf
	A33 - ActaNotes1AprovatsProv 2022_06_29 17_29.pdf
	A33 - ActaNotes1Provisional 2022_06_29 10

	C1 - DataConvocatoria2DID 2022_07_01 11_35.pdf
	C1 - ActaNotes1Aprovats 2022_07_01 11_17.pdf
	C1 - ActaNotes1Definitiva 2022_07_01 10_59.pdf
	C1 - ActaNotes1AprovatsProv 2022_06_28 11_58.pdf
	C1 - ActaNotes1Provisional 2022_06_28 11_56.pdf
22:38:44 (74/147) EDUCACIO INFANTIL C2
	C2 - DataConvocatoria2DID 2022_07_01 11_35.pdf
	C2 - ActaNotes1Aprovats 2022_07_01 11_27.pdf
	C2 - ActaNotes1Definitiva 2022_07_01 11_24.pdf
	C2 - ActaNotes1Provisional 2022_06_28 11_53.pdf
	C2 - ActaNotes1AprovatsProv 2022_06_28 11_53.pdf
22:38:45 (75/147) EDUCACIO INFANTIL A18
	A18 - DataConvocatoria2DID 2022_07_01 11_35.pdf
	A18 - ActaNotes1Aprovats 2022_07_01 10_27.pdf
	A18 - ActaNotes1Definitiva 2022_07_01 10_27.pdf
	A18 - ActaNotes1AprovatsProv 2022_06_29 09_47.pdf
	A18 - ActaNotes1Provisional 2022_06_29 09_43.pdf
22:38:47 (76/147) EDUCACIO INFANTIL C4
	C4 - DataConvocatoria2DID 2022_07_01 11_34.pdf
	C4 - ActaNotes1Aprovats 2022_07_01 11_26.pdf
	C4 - ActaNotes1Definitiva 2022_07_01 11_24.pdf
	C4 - ActaNo

	V29 - DataConvocatoria2DID 2022_07_01 11_16.pdf
	V29 - ActaNotes1Aprovats 2022_07_01 10_58.pdf
	V29 - ActaNotes1Definitiva 2022_07_01 10_56.pdf
	V29 - ActaNotes1AprovatsProv 2022_06_29 09_13.pdf
	V29 - ActaNotes1Provisional 2022_06_29 09_09.pdf
22:39:19 (103/147) EDUCACIO INFANTIL A50
	A50 - DataConvocatoria2DID 2022_07_01 11_14.pdf
	A50 - ActaNotes1Aprovats 2022_07_01 10_42.pdf
	A50 - ActaNotes1Definitiva 2022_07_01 10_41.pdf
	A50 - ActaNotes1AprovatsProv 2022_06_29 11_45.pdf
	A50 - ActaNotes1Provisional 2022_06_29 11_43.pdf
22:39:20 (104/147) EDUCACIO INFANTIL A6
	A6 - DataConvocatoria2DID 2022_07_01 11_13.pdf
	A6 - ActaNotes1Aprovats 2022_07_01 10_40.pdf
	A6 - ActaNotes1Definitiva 2022_07_01 10_40.pdf
	A6 - ActaNotes1AprovatsProv 2022_06_29 08_13.pdf
	A6 - ActaNotes1Provisional 2022_06_29 08_13.pdf
22:39:21 (105/147) EDUCACIO INFANTIL V17
	V17 - DataConvocatoria2DID 2022_07_01 11_13.pdf
	V17 - ActaNotes1Aprovats 2022_07_01 10_57.pdf
	V17 - ActaNotes1Definitiva 2022_07_01 10_57.pdf


	V27 - ActaNotes1Provisional 2022_06_29 09_45.pdf
22:39:51 (131/147) EDUCACIO INFANTIL A24
	A24 - DataConvocatoria2DID 2022_07_01 10_37.pdf
	A24 - ActaNotes1Aprovats 2022_07_01 10_29.pdf
	A24 - ActaNotes1Definitiva 2022_07_01 10_28.pdf
	A24 - ActaNotes1AprovatsProv 2022_06_29 12_02.pdf
	A24 - ActaNotes1Provisional 2022_06_29 12_02.pdf
22:39:52 (132/147) EDUCACIO INFANTIL A13
	A13 - DataConvocatoria2DID 2022_07_01 10_36.pdf
	A13 - ActaNotes1Aprovats 2022_07_01 10_25.pdf
	A13 - ActaNotes1Definitiva 2022_07_01 10_19.pdf
	A13 - ActaNotes1AprovatsProv 2022_06_29 08_50.pdf
	A13 - ActaNotes1Provisional 2022_06_29 08_50.pdf
22:39:53 (133/147) EDUCACIO INFANTIL V26
	V26 - DataConvocatoria2DID 2022_07_01 10_35.pdf
	V26 - ActaNotes1Aprovats 2022_07_01 10_17.pdf
	V26 - ActaNotes1Definitiva 2022_07_01 10_16.pdf
	V26 - ActaNotes1AprovatsProv 2022_06_29 09_29.pdf
	V26 - ActaNotes1Provisional 2022_06_29 09_28.pdf
22:39:54 (134/147) EDUCACIO INFANTIL V74
	V74 - DataConvocatoria2DID 2022_07_01 10_35.pdf

	V5 - ActaNotes1Provisional 2022_06_29 07_44.pdf
22:40:25 (13/28) ANGLES V3
	V3 - DataConvocatoria2DID 2022_07_01 11_19.pdf
	V3 - ActaNotes1Aprovats 2022_07_01 11_06.pdf
	V3 - ActaNotes1Definitiva 2022_07_01 11_06.pdf
	V3 - ActaNotes1AprovatsProv 2022_06_29 07_17.pdf
	V3 - ActaNotes1Provisional 2022_06_29 07_17.pdf
22:40:26 (14/28) ANGLES V7
	V7 - DataConvocatoria2DID 2022_07_01 11_07.pdf
	V7 - ActaNotes1Aprovats 2022_07_01 10_41.pdf
	V7 - ActaNotes1Definitiva 2022_07_01 10_41.pdf
	V7 - ActaNotes1AprovatsProv 2022_06_29 07_45.pdf
	V7 - ActaNotes1Provisional 2022_06_29 07_45.pdf
22:40:27 (15/28) ANGLES A3
	A3 - DataConvocatoria2DID 2022_07_01 11_00.pdf
	A3 - ActaNotes1Definitiva 2022_07_01 10_13.pdf
	A3 - ActaNotes1Aprovats 2022_07_01 10_12.pdf
	A3 - ActaNotes1Provisional 2022_06_29 09_28.pdf
	A3 - ActaNotes1AprovatsProv 2022_06_29 09_26.pdf
22:40:28 (16/28) ANGLES V13
	V13 - DataConvocatoria2DID 2022_07_01 10_58.pdf
	V13 - ActaNotes1Aprovats 2022_07_01 10_16.pdf
	V13 - ActaNotes1Defini

In [365]:
import tabula

CSV_PATH = "./csv/tmp/"

def extract_data_from_pdf_to_csv(file, suffix):
    result = re.search('([\w]*) - ([\w]*)', file.name)
    specialty = str(file.parent).strip()
    tribunal = result[1]    
    
    tables = tabula.read_pdf(file,pages="all")
    print(f"({len(tables)} tables on this file)")
    
    for idx, table in enumerate(tables):
        table['specialty'] = [specialty for x in range(len(table))]
        table['tribunal'] = [tribunal for x in range(len(table))]
        filename = Path().joinpath(CSV_PATH, f"{specialty}_{tribunal}_{suffix}_{idx}.csv")
        table.to_csv(filename, index=False)

def extract_data_from_pattern(glob_pattern, suffix):
    p = Path('.').glob(glob_pattern)
    files = [x for x in p if x.is_file()]
    for idx, file in enumerate(files):
        counter_str = f"{idx}/{len(files)}"
        print(f"{now_in_string()} ({counter_str}) Reading '{file}'...", end=' ')
        extract_data_from_pdf_to_csv(file, suffix)

    print("FINISH")

    
# Create the directory for storing the CSVs
exists = os.path.exists(CSV_PATH)
if not exists:
    os.makedirs(CSV_PATH)

start_time_str = now_in_string()
print(f"===== START {start_time_str}")
extract_data_from_pattern('**/*Notes1Provisional*.pdf', suffix='provisional')
extract_data_from_pattern('**/*Notes1Definitiva*.pdf', suffix='definitiva')
print(f"===== END ({start_time_str} -> {now_in_string()}")

===== START 22:58:37
22:58:37 (0/395) Reading 'EDUCACIO INFANTIL/V36 - ActaNotes1Provisional 2022_06_29 10_13.pdf'... (2 tables on this file)
22:58:40 (1/395) Reading 'EDUCACIO INFANTIL/V12 - ActaNotes1Provisional 2022_06_29 09_58.pdf'... (2 tables on this file)
22:58:43 (2/395) Reading 'EDUCACIO INFANTIL/C1 - ActaNotes1Provisional 2022_06_28 11_56.pdf'... (2 tables on this file)
22:58:46 (3/395) Reading 'EDUCACIO INFANTIL/V24 - ActaNotes1Provisional 2022_06_29 10_59.pdf'... (2 tables on this file)
22:58:49 (4/395) Reading 'EDUCACIO INFANTIL/V28 - ActaNotes1Provisional 2022_06_29 11_14.pdf'... (2 tables on this file)
22:58:52 (5/395) Reading 'EDUCACIO INFANTIL/A33 - ActaNotes1Provisional 2022_06_29 10_02.pdf'... (2 tables on this file)
22:58:55 (6/395) Reading 'EDUCACIO INFANTIL/A15 - ActaNotes1Provisional 2022_06_29 10_19.pdf'... (2 tables on this file)
22:58:58 (7/395) Reading 'EDUCACIO INFANTIL/V5 - ActaNotes1Provisional 2022_06_29 12_43.pdf'... (2 tables on this file)
22:59:01 (8/3

23:02:54 (67/395) Reading 'EDUCACIO INFANTIL/A49 - ActaNotes1Provisional 2022_06_29 12_19.pdf'... (2 tables on this file)
23:02:58 (68/395) Reading 'EDUCACIO INFANTIL/V77 - ActaNotes1Provisional 2022_06_29 10_12.pdf'... (2 tables on this file)
23:03:02 (69/395) Reading 'EDUCACIO INFANTIL/A32 - ActaNotes1Provisional 2022_06_29 12_44.pdf'... (2 tables on this file)
23:03:05 (70/395) Reading 'EDUCACIO INFANTIL/V48 - ActaNotes1Provisional 2022_06_29 09_32.pdf'... (2 tables on this file)
23:03:09 (71/395) Reading 'EDUCACIO INFANTIL/A44 - ActaNotes1Provisional 2022_06_29 09_15.pdf'... (2 tables on this file)
23:03:13 (72/395) Reading 'EDUCACIO INFANTIL/A27 - ActaNotes1Provisional 2022_06_29 12_48.pdf'... (2 tables on this file)
23:03:16 (73/395) Reading 'EDUCACIO INFANTIL/A7 - ActaNotes1Provisional 2022_06_29 08_58.pdf'... (2 tables on this file)
23:03:20 (74/395) Reading 'EDUCACIO INFANTIL/A10 - ActaNotes1Provisional 2022_06_29 11_36.pdf'... (2 tables on this file)
23:03:23 (75/395) Reading

23:07:15 (134/395) Reading 'EDUCACIO INFANTIL/A9 - ActaNotes1Provisional 2022_06_29 09_02.pdf'... (2 tables on this file)
23:07:18 (135/395) Reading 'EDUCACIO INFANTIL/A42 - ActaNotes1Provisional 2022_06_29 11_58.pdf'... (2 tables on this file)
23:07:22 (136/395) Reading 'EDUCACIO INFANTIL/V13 - ActaNotes1Provisional 2022_06_29 09_58.pdf'... (2 tables on this file)
23:07:26 (137/395) Reading 'EDUCACIO INFANTIL/A31 - ActaNotes1Provisional 2022_06_29 12_47.pdf'... (2 tables on this file)
23:07:29 (138/395) Reading 'EDUCACIO INFANTIL/C5 - ActaNotes1Provisional 2022_06_28 11_53.pdf'... (2 tables on this file)
23:07:32 (139/395) Reading 'EDUCACIO INFANTIL/V37 - ActaNotes1Provisional 2022_06_29 10_11.pdf'... (2 tables on this file)
23:07:36 (140/395) Reading 'EDUCACIO INFANTIL/A2 - ActaNotes1Provisional 2022_06_29 08_38.pdf'... (2 tables on this file)
23:07:39 (141/395) Reading 'EDUCACIO INFANTIL/V43 - ActaNotes1Provisional 2022_06_29 02_01.pdf'... (2 tables on this file)
23:07:43 (142/395) 

23:11:17 (202/395) Reading 'ANGLES/A1 - ActaNotes1Provisional 2022_06_29 08_34.pdf'... (2 tables on this file)
23:11:20 (203/395) Reading 'ANGLES/V7 - ActaNotes1Provisional 2022_06_29 07_45.pdf'... (2 tables on this file)
23:11:23 (204/395) Reading 'ANGLES/V13 - ActaNotes1Provisional 2022_06_29 08_57.pdf'... (2 tables on this file)
23:11:26 (205/395) Reading 'ANGLES/V4 - ActaNotes1Provisional 2022_06_29 08_42.pdf'... (2 tables on this file)
23:11:30 (206/395) Reading 'ANGLES/A2 - ActaNotes1Provisional 2022_06_29 09_19.pdf'... (2 tables on this file)
23:11:35 (207/395) Reading 'ANGLES/V1 - ActaNotes1Provisional 2022_06_29 07_06.pdf'... (4 tables on this file)
23:11:40 (208/395) Reading 'ANGLES/A3 - ActaNotes1Provisional 2022_06_29 09_28.pdf'... (2 tables on this file)
23:11:43 (209/395) Reading 'ANGLES/V14 - ActaNotes1Provisional 2022_06_29 08_41.pdf'... (2 tables on this file)
23:11:46 (210/395) Reading 'ANGLES/A9 - ActaNotes1Provisional 2022_06_29 08_50.pdf'... (2 tables on this file)

23:15:12 (270/395) Reading 'EDUCACIO PRIMARIA/C5 - ActaNotes1Provisional 2022_06_28 12_33.pdf'... (2 tables on this file)
23:15:15 (271/395) Reading 'EDUCACIO PRIMARIA/A42 - ActaNotes1Provisional 2022_06_29 12_06.pdf'... (2 tables on this file)
23:15:18 (272/395) Reading 'EDUCACIO PRIMARIA/V8 - ActaNotes1Provisional 2022_06_29 11_39.pdf'... (2 tables on this file)
23:15:21 (273/395) Reading 'EDUCACIO PRIMARIA/V57 - ActaNotes1Provisional 2022_06_29 09_20.pdf'... (2 tables on this file)
23:15:24 (274/395) Reading 'EDUCACIO PRIMARIA/V4 - ActaNotes1Provisional 2022_06_29 11_34.pdf'... (2 tables on this file)
23:15:27 (275/395) Reading 'EDUCACIO PRIMARIA/V55 - ActaNotes1Provisional 2022_06_29 12_49.pdf'... (2 tables on this file)
23:15:30 (276/395) Reading 'EDUCACIO PRIMARIA/A52 - ActaNotes1Provisional 2022_06_29 13_56.pdf'... (2 tables on this file)
23:15:33 (277/395) Reading 'EDUCACIO PRIMARIA/C18 - ActaNotes1Provisional 2022_06_28 16_42.pdf'... (2 tables on this file)
23:15:36 (278/395) 

23:18:34 (336/395) Reading 'EDUCACIO PRIMARIA/A40 - ActaNotes1Provisional 2022_06_29 11_58.pdf'... (2 tables on this file)
23:18:37 (337/395) Reading 'EDUCACIO PRIMARIA/V10 - ActaNotes1Provisional 2022_06_29 09_54.pdf'... (2 tables on this file)
23:18:40 (338/395) Reading 'EDUCACIO PRIMARIA/A28 - ActaNotes1Provisional 2022_06_29 13_53.pdf'... (2 tables on this file)
23:18:43 (339/395) Reading 'EDUCACIO PRIMARIA/A50 - ActaNotes1Provisional 2022_06_29 12_19.pdf'... (2 tables on this file)
23:18:46 (340/395) Reading 'EDUCACIO PRIMARIA/A3 - ActaNotes1Provisional 2022_06_29 09_13.pdf'... (2 tables on this file)
23:18:49 (341/395) Reading 'EDUCACIO PRIMARIA/C16 - ActaNotes1Provisional 2022_06_28 16_42.pdf'... (2 tables on this file)
23:18:52 (342/395) Reading 'EDUCACIO PRIMARIA/V39 - ActaNotes1Provisional 2022_06_29 10_23.pdf'... (2 tables on this file)
23:18:55 (343/395) Reading 'EDUCACIO PRIMARIA/V23 - ActaNotes1Provisional 2022_06_29 11_13.pdf'... (2 tables on this file)
23:18:58 (344/395

23:21:58 (8/395) Reading 'EDUCACIO INFANTIL/A36 - ActaNotes1Definitiva 2022_07_01 10_26.pdf'... (2 tables on this file)
23:22:01 (9/395) Reading 'EDUCACIO INFANTIL/V49 - ActaNotes1Definitiva 2022_07_01 12_35.pdf'... (2 tables on this file)
23:22:04 (10/395) Reading 'EDUCACIO INFANTIL/V36 - ActaNotes1Definitiva 2022_07_01 11_01.pdf'... (2 tables on this file)
23:22:07 (11/395) Reading 'EDUCACIO INFANTIL/C4 - ActaNotes1Definitiva 2022_07_01 11_24.pdf'... (2 tables on this file)
23:22:10 (12/395) Reading 'EDUCACIO INFANTIL/V33 - ActaNotes1Definitiva 2022_07_01 10_55.pdf'... (2 tables on this file)
23:22:14 (13/395) Reading 'EDUCACIO INFANTIL/V52 - ActaNotes1Definitiva 2022_07_01 12_46.pdf'... (2 tables on this file)
23:22:17 (14/395) Reading 'EDUCACIO INFANTIL/A23 - ActaNotes1Definitiva 2022_07_01 10_28.pdf'... (2 tables on this file)
23:22:20 (15/395) Reading 'EDUCACIO INFANTIL/V62 - ActaNotes1Definitiva 2022_07_01 10_28.pdf'... (2 tables on this file)
23:22:23 (16/395) Reading 'EDUCACIO

23:25:28 (76/395) Reading 'EDUCACIO INFANTIL/A40 - ActaNotes1Definitiva 2022_07_01 11_11.pdf'... (2 tables on this file)
23:25:31 (77/395) Reading 'EDUCACIO INFANTIL/V47 - ActaNotes1Definitiva 2022_07_01 11_57.pdf'... (2 tables on this file)
23:25:34 (78/395) Reading 'EDUCACIO INFANTIL/V51 - ActaNotes1Definitiva 2022_07_01 11_02.pdf'... (2 tables on this file)
23:25:37 (79/395) Reading 'EDUCACIO INFANTIL/C14 - ActaNotes1Definitiva 2022_07_01 10_59.pdf'... (2 tables on this file)
23:25:40 (80/395) Reading 'EDUCACIO INFANTIL/C5 - ActaNotes1Definitiva 2022_07_01 11_13.pdf'... (2 tables on this file)
23:25:43 (81/395) Reading 'EDUCACIO INFANTIL/C16 - ActaNotes1Definitiva 2022_07_01 11_07.pdf'... (2 tables on this file)
23:25:46 (82/395) Reading 'EDUCACIO INFANTIL/A28 - ActaNotes1Definitiva 2022_07_01 10_16.pdf'... (2 tables on this file)
23:25:49 (83/395) Reading 'EDUCACIO INFANTIL/C13 - ActaNotes1Definitiva 2022_07_01 11_08.pdf'... (2 tables on this file)
23:25:52 (84/395) Reading 'EDUCAC

23:28:53 (143/395) Reading 'EDUCACIO INFANTIL/A14 - ActaNotes1Definitiva 2022_07_01 10_45.pdf'... (2 tables on this file)
23:28:56 (144/395) Reading 'EDUCACIO INFANTIL/V34 - ActaNotes1Definitiva 2022_07_01 11_07.pdf'... (2 tables on this file)
23:28:59 (145/395) Reading 'EDUCACIO INFANTIL/A38 - ActaNotes1Definitiva 2022_07_01 11_17.pdf'... (2 tables on this file)
23:29:02 (146/395) Reading 'EDUCACIO INFANTIL/A31 - ActaNotes1Definitiva 2022_07_01 10_22.pdf'... (2 tables on this file)
23:29:05 (147/395) Reading 'AUDICIO I LLENGUATGE/A6 - ActaNotes1Definitiva 2022_07_01 11_48.pdf'... (2 tables on this file)
23:29:08 (148/395) Reading 'AUDICIO I LLENGUATGE/A4 - ActaNotes1Definitiva 2022_07_01 12_12.pdf'... (2 tables on this file)
23:29:11 (149/395) Reading 'AUDICIO I LLENGUATGE/A5 - ActaNotes1Definitiva 2022_07_01 11_13.pdf'... (2 tables on this file)
23:29:14 (150/395) Reading 'AUDICIO I LLENGUATGE/A1 - ActaNotes1Definitiva 2022_07_01 10_16.pdf'... (2 tables on this file)
23:29:17 (151/39

23:32:29 (213/395) Reading 'ANGLES/V13 - ActaNotes1Definitiva 2022_07_01 10_16.pdf'... (2 tables on this file)
23:32:32 (214/395) Reading 'ANGLES/V7 - ActaNotes1Definitiva 2022_07_01 10_41.pdf'... (2 tables on this file)
23:32:35 (215/395) Reading 'ANGLES/A3 - ActaNotes1Definitiva 2022_07_01 10_13.pdf'... (2 tables on this file)
23:32:38 (216/395) Reading 'PEDAGOGIA TERAPEUTICA/V10 - ActaNotes1Definitiva 2022_07_01 10_42.pdf'... (2 tables on this file)
23:32:41 (217/395) Reading 'PEDAGOGIA TERAPEUTICA/A2 - ActaNotes1Definitiva 2022_07_01 11_18.pdf'... (2 tables on this file)
23:32:44 (218/395) Reading 'PEDAGOGIA TERAPEUTICA/A9 - ActaNotes1Definitiva 2022_07_01 10_18.pdf'... (2 tables on this file)
23:32:47 (219/395) Reading 'PEDAGOGIA TERAPEUTICA/V15 - ActaNotes1Definitiva 2022_07_01 10_42.pdf'... (2 tables on this file)
23:32:50 (220/395) Reading 'PEDAGOGIA TERAPEUTICA/A13 - ActaNotes1Definitiva 2022_07_01 10_42.pdf'... (2 tables on this file)
23:32:53 (221/395) Reading 'PEDAGOGIA TER

23:35:53 (280/395) Reading 'EDUCACIO PRIMARIA/V35 - ActaNotes1Definitiva 2022_07_01 10_36.pdf'... (2 tables on this file)
23:35:56 (281/395) Reading 'EDUCACIO PRIMARIA/V50 - ActaNotes1Definitiva 2022_07_01 10_59.pdf'... (2 tables on this file)
23:35:59 (282/395) Reading 'EDUCACIO PRIMARIA/V44 - ActaNotes1Definitiva 2022_07_01 12_03.pdf'... (2 tables on this file)
23:36:02 (283/395) Reading 'EDUCACIO PRIMARIA/C10 - ActaNotes1Definitiva 2022_07_01 10_42.pdf'... (2 tables on this file)
23:36:05 (284/395) Reading 'EDUCACIO PRIMARIA/A16 - ActaNotes1Definitiva 2022_07_01 10_53.pdf'... (2 tables on this file)
23:36:08 (285/395) Reading 'EDUCACIO PRIMARIA/A10 - ActaNotes1Definitiva 2022_07_01 10_08.pdf'... (2 tables on this file)
23:36:11 (286/395) Reading 'EDUCACIO PRIMARIA/V13 - ActaNotes1Definitiva 2022_07_01 10_36.pdf'... (2 tables on this file)
23:36:14 (287/395) Reading 'EDUCACIO PRIMARIA/V55 - ActaNotes1Definitiva 2022_07_01 11_18.pdf'... (2 tables on this file)
23:36:17 (288/395) Readi

23:39:17 (347/395) Reading 'EDUCACIO PRIMARIA/A49 - ActaNotes1Definitiva 2022_07_01 10_13.pdf'... (2 tables on this file)
23:39:20 (348/395) Reading 'EDUCACIO PRIMARIA/V10 - ActaNotes1Definitiva 2022_07_01 11_47.pdf'... (2 tables on this file)
23:39:23 (349/395) Reading 'EDUCACIO PRIMARIA/V11 - ActaNotes1Definitiva 2022_07_01 11_14.pdf'... (2 tables on this file)
23:39:27 (350/395) Reading 'EDUCACIO PRIMARIA/A47 - ActaNotes1Definitiva 2022_07_01 10_46.pdf'... (2 tables on this file)
23:39:30 (351/395) Reading 'EDUCACIO PRIMARIA/A13 - ActaNotes1Definitiva 2022_07_01 10_13.pdf'... (2 tables on this file)
23:39:33 (352/395) Reading 'EDUCACIO PRIMARIA/V6 - ActaNotes1Definitiva 2022_07_01 12_49.pdf'... (2 tables on this file)
23:39:36 (353/395) Reading 'EDUCACIO PRIMARIA/V32 - ActaNotes1Definitiva 2022_07_01 10_42.pdf'... (2 tables on this file)
23:39:39 (354/395) Reading 'EDUCACIO PRIMARIA/V48 - ActaNotes1Definitiva 2022_07_01 10_31.pdf'... (2 tables on this file)
23:39:42 (355/395) Readin

In [367]:
import pandas as pd
import glob

# This step requires running a couple of commands manually:
# (it could be included on this jupyter, but sometimes the shell is faster :)
#
#
# cat csv/tmp/*provisional*.csv | grep "\*\*\*" > csv/PROVISIONAL.csv
# cat csv/tmp/*definitiva*.csv | grep "\*\*\*" > csv/DEFINITIVE.csv

def load_csv_file(filename):
    return pd.read_csv(filename,
                     names=['DNI', 'nombre', 'tema', 'caso_practico', 'total', 'especialidad', 'tribunal'])

def compare_valuations(row):
    return (
        row['tema_provisional'] == row['tema_definitivo']
        and
        row['caso_practico_provisional'] == row['caso_practico_definitivo']
    )


################


start_time_str = now_in_string()
print(f"===== START {start_time_str}")

provisional = load_csv_file('csv/PROVISIONAL.csv')
definitive = load_csv_file('csv/DEFINITIVE.csv')

# Merge both files (as columns), and add a new column 'iguales' that compares provisional and definitive results
merged = pd.merge(provisional, definitive, on=['DNI', 'nombre', 'especialidad', 'tribunal'], suffixes=('_provisional', '_definitivo'), how="outer")
merged['iguales'] = concatenated.apply(compare_valuations, axis=1)

# Sort the columns to look better on the spreedsheet
merged.reindex(columns=['DNI', 'nombre', 'tema_provisional', 'caso_practico_provisional',
       'total_provisional', 'iguales', 'tema_definitivo',
       'caso_practico_definitivo', 'total_definitivo', 'especialidad', 'tribunal'])

# Delete the name column (for sharing):
del merged['nombre']

with pd.ExcelWriter('results.xlsx') as writter:
    for speciality in merged['especialidad'].unique():
        temp_df = merged.loc[concatenated['especialidad'] == speciality]    
        temp_df.to_excel(writter, sheet_name=speciality)
        
print(f"===== END ({start_time_str} -> {now_in_string()}")
