In [1]:
import os
import re
from params.paths import ROOT_DIR
from api_requests.meeting_convo_collector import MeetingConvoCollector
from file_handling.file_read_writer import read_json, write_json, create_dir, write_file
#paths
LOWER_HOUSE_DATA_DIR = os.path.join(ROOT_DIR, 'data', 'data_shugiin')
UPPER_HOUSE_DATA_DIR = os.path.join(ROOT_DIR, 'data', 'data_sangiin')

#reading the reprentative data for lower and upper house
lower_repr_dir = os.path.join(LOWER_HOUSE_DATA_DIR, 'repr_list')
lower_repr_file = os.listdir(lower_repr_dir)[0]
lower_house_member_list = read_json(os.path.join(lower_repr_dir, lower_repr_file))
speech_freq_file_path = os.path.join(LOWER_HOUSE_DATA_DIR, 'speech_freq.json')

upper_repr_dir = os.path.join(UPPER_HOUSE_DATA_DIR, 'repr_list')
upper_repr_file = os.listdir(upper_repr_dir)[0]
upper_house_member_list = read_json(os.path.join(upper_repr_dir, upper_repr_file))
# %%
mcc = MeetingConvoCollector("https://kokkai.ndl.go.jp/api/speech?")

In [2]:
def clean_repr_name(repr_name):
	repr_name = re.sub('\s|君|\[(.*?)\]', '', repr_name)
	return repr_name
def get_total_speech_freq_from_api(responses):
	freq = 0
	for response in responses:
		freq += response['numberOfRecords']
	return freq

def get_speech_freq_for_repr_for_year(repr_name, start_year):
	conditions_list = [
			f"speaker={repr_name}",
			f"from={start_year}-01-01",
			f"until={start_year}-12-31",
			# f"nameOfHouse=両院",
			'recordPacking=json'
	]
	search_requests = mcc.make_requests(conditions_list=conditions_list)
	return get_total_speech_freq_from_api(search_requests)


In [3]:
while True:
	try:
		output_arr = []
		if os.path.exists(speech_freq_file_path):
			output_arr = read_json(speech_freq_file_path)
		for idx, year in enumerate(range(1990, 2023)):
			if idx < len(output_arr):
				continue
			year_dict = {
				'year': year,
			}
			for party, members in lower_house_member_list['reprs'].items():
				for member in members:
					name = member['name']
					name = clean_repr_name(name)
					freq = get_speech_freq_for_repr_for_year(name, year)
					year_dict[f'{party} {name}'] = freq
					
			output_arr.append(year_dict)	
			write_json(output_arr, speech_freq_file_path)
	except Exception as e:
		print(e)
		print('Exception occured trying again')





https://kokkai.ndl.go.jp/api/speech?startRecord=1&speaker=逢沢一郎&from=2021-01-01&until=2021-12-31&recordPacking=json
https://kokkai.ndl.go.jp/api/speech?startRecord=31&speaker=逢沢一郎&from=2021-01-01&until=2021-12-31&recordPacking=json
https://kokkai.ndl.go.jp/api/speech?startRecord=None&speaker=逢沢一郎&from=2021-01-01&until=2021-12-31&recordPacking=json
No more records
https://kokkai.ndl.go.jp/api/speech?startRecord=1&speaker=青山周平&from=2021-01-01&until=2021-12-31&recordPacking=json
https://kokkai.ndl.go.jp/api/speech?startRecord=None&speaker=青山周平&from=2021-01-01&until=2021-12-31&recordPacking=json
No more records
https://kokkai.ndl.go.jp/api/speech?startRecord=1&speaker=赤澤亮正&from=2021-01-01&until=2021-12-31&recordPacking=json
https://kokkai.ndl.go.jp/api/speech?startRecord=31&speaker=赤澤亮正&from=2021-01-01&until=2021-12-31&recordPacking=json
https://kokkai.ndl.go.jp/api/speech?startRecord=61&speaker=赤澤亮正&from=2021-01-01&until=2021-12-31&recordPacking=json
https://kokkai.ndl.go.jp/api/speech?sta

KeyboardInterrupt: 