In [149]:
import requests
import pandas as pd
from tqdm import tqdm

In [150]:
def pull_vote_data(term_num, max_sitting_num): 

    for sitting_num in tqdm(range(42, max_sitting_num + 1)):
        response = requests.get(f"https://api.sejm.gov.pl/sejm/term{term_num}/votings/{sitting_num}")
        data = response.json()
        df = pd.DataFrame.from_dict(data)

        if response.status_code != 200:
            raise RuntimeError(f'Status code {response.status_code}! (sitting {sitting_num})')

        for vote_num in df['votingNumber'].values:
            df_voting_info = df[df['votingNumber'] == vote_num][['date', 'term', 'sitting', 'votingNumber']]
            response2 = requests.get(f"https://api.sejm.gov.pl/sejm/term{term_num}/votings/{sitting_num}/{vote_num}") 
            if response2.status_code != 200:
                raise RuntimeError(f'Status code {response2.status_code}! (voting {vote_num})')

            data2 = response2.json()
            df_votes = pd.json_normalize(data2, ['votes'])

            df_r = df_voting_info.join(df_votes, how='cross')
            df_all = pd.concat([df_all, df_r], ignore_index=True)

    df_all = df_all[['date', 'term', 'sitting', 'votingNumber', 'MP', 'club', 'firstName', 'lastName', 'vote']]
    df_all = df_all.drop(df_all[df_all['vote'] == 'VOTE_VALID'].index)
    df_all = df_all.reset_index(drop=True)

    df_all['vote_id'] = df_all['term'] * 10**6 + df_all['sitting'] * 10**3 + df_all['votingNumber']
    df_all['date'] = pd.to_datetime(df_all['date'])
    df_all.to_csv(f'data/GLO{term_num}k1p{max_sitting_num}p_i.csv', index=False)
    return df_all

In [145]:
df_all = pull_vote_data(10, 10) # data till May 2024

In [146]:
df_all

Unnamed: 0,date,term,sitting,votingNumber,MP,club,firstName,lastName,vote,vote_id
0,2023-11-13 15:17:22,10,1,1,234,PiS,Dariusz,Matecki,ABSENT,10001001
1,2023-11-13 18:02:12,10,1,2,1,PiS,Andrzej,Adamczyk,YES,10001002
2,2023-11-13 18:02:12,10,1,2,2,KO,Piotr,Adamowicz,YES,10001002
3,2023-11-13 18:02:12,10,1,2,3,PiS,Adam,Andruszkiewicz,YES,10001002
4,2023-11-13 18:02:12,10,1,2,4,PiS,Waldemar,Andzel,YES,10001002
...,...,...,...,...,...,...,...,...,...,...
161009,2024-04-26 11:30:28,10,10,26,460,Lewica,Anna,Żukowska,YES,10010026
161010,2024-04-26 11:30:28,10,10,26,461,PiS,Anna,Baluch,YES,10010026
161011,2024-04-26 11:30:28,10,10,26,462,KO,Magdalena,Łośko,YES,10010026
161012,2024-04-26 11:30:28,10,10,26,463,niez.,Monika,Pawłowska,YES,10010026


In [185]:
term_num = 9
max_sitting_num = 81

df_all = pd.DataFrame()
for sitting_num in tqdm(range(1, max_sitting_num + 1)):
    response = requests.get(f"https://api.sejm.gov.pl/sejm/term{term_num}/votings/{sitting_num}")
    data = response.json()
    df = pd.DataFrame.from_dict(data)

    if response.status_code != 200:
        raise RuntimeError(f'Status code {response.status_code}! (sitting {sitting_num})')

    for vote_num in df['votingNumber'].values:
        df_voting_info = df[df['votingNumber'] == vote_num][['date', 'term', 'sitting', 'votingNumber']]
        response2 = requests.get(f"https://api.sejm.gov.pl/sejm/term{term_num}/votings/{sitting_num}/{vote_num}") 
        if response2.status_code != 200:
            raise RuntimeError(f'Status code {response2.status_code}! (voting {vote_num})')

        data2 = response2.json()
        df_votes = pd.json_normalize(data2, ['votes'])

        df_r = df_voting_info.join(df_votes, how='cross')
        df_all = pd.concat([df_all, df_r], ignore_index=True)

df_all = df_all[['date', 'term', 'sitting', 'votingNumber', 'MP', 'club', 'firstName', 'lastName', 'vote']]
df_all = df_all.drop(df_all[df_all['vote'] == 'VOTE_VALID'].index)
df_all = df_all.reset_index(drop=True)

df_all['vote_id'] = df_all['term'] * 10**6 + df_all['sitting'] * 10**3 + df_all['votingNumber']
df_all['date'] = pd.to_datetime(df_all['date'])
df_all.to_csv(f'data/GLO{term_num}k1p{max_sitting_num}p_i.csv', index=False)

100%|███████████████████████████████████████████████████████████████████████████████| 40/40 [1:27:15<00:00, 130.90s/it]


In [None]:
# https://api.sejm.gov.pl/sejm/term9/votings/32 -> []
# https://api.sejm.gov.pl/sejm/term9/votings/41 -> []

In [186]:
df_all

Unnamed: 0,date,term,sitting,votingNumber,MP,club,firstName,lastName,vote,vote_id
0,2019-11-12 15:26:06,9,1,1,1.0,PiS,Andrzej,Adamczyk,YES,9001001
1,2019-11-12 15:26:06,9,1,1,2.0,SLD,Rafał,Adamczyk,YES,9001001
2,2019-11-12 15:26:06,9,1,1,3.0,KO,Piotr,Adamowicz,ABSTAIN,9001001
3,2019-11-12 15:26:06,9,1,1,4.0,SLD,Romuald,Ajchler,YES,9001001
4,2019-11-12 15:26:06,9,1,1,5.0,PiS,Adam,Andruszkiewicz,YES,9001001
...,...,...,...,...,...,...,...,...,...,...
4315321,2023-08-30 19:15:03,9,81,183,471.0,PiS,Elżbieta,Zielińska,YES,9081183
4315322,2023-08-30 19:15:03,9,81,183,472.0,PiS,Krzysztof,Głuchowski,YES,9081183
4315323,2023-08-30 19:15:03,9,81,183,473.0,PiS,Leszek,Kowalczyk,YES,9081183
4315324,2023-08-30 19:15:03,9,81,183,474.0,PiS,Iwona,Kurowska,YES,9081183
