## Function to Search API and Save Top Arguments to Files

This is a Python function that searches for the top arguments for each topic in a query topics XML file using an API, and saves the top arguments for each topic to a separate file. It takes four arguments:

- `root`: the root element of the query topics XML file.
- `output_dir`: the directory where the output files will be saved.
- `pageSize`: the page size for the search query.
- `url_template`: the URL template for the search query.

The function loops through each topic in the query topics XML file, extracts the topic number and title, formats the search URL with the topic title and page size, performs the search query, and extracts the top arguments. For each topic, the function saves the top arguments to a separate file in the specified output directory.

### Output
The function saves a separate file for the top arguments for each topic in the specified output directory.

### Required Libraries
The function requires the following Python libraries:

- os
- requests
- xml.etree.ElementTree
- pandas

In [None]:
#Imports
import os
import requests
import xml.etree.ElementTree as ET
import pandas as pd

In [None]:
#Global variables
data_path = '/Users/balazs/Desktop/dissertationProjectCode/dissertationCodeBase/'

In [None]:
# Search API function
def search_api(root, output_dir, pageSize, url_template):
    # Loop through topics and search for the top 1000 arguments
    for topic in root.findall('topic'):
        topic_number = topic.find('number')
        if topic_number is not None:
            topic_id = topic_number.text
            topic_title = topic.find('title')
            if topic_title is not None and topic_title.text:
                topic_title = topic_title.text.strip()
            else:
                topic_title = ''
                
            url = url_template.format(topic_title, pageSize)

            # Execute the search query using API
            response = requests.get(url).json()
            results = response['arguments']
            print(len(results))
            
            # Save the top 1000 arguments for the current topic to a file
            os.makedirs(output_dir, exist_ok=True)
            topic_file_name = os.path.join(output_dir, f"{topic_id}.txt")
            header = ['id', 'conclusion', 'premises', 'stance', 'rank']
            arguments = []
            for i, result in enumerate(results):
                argument = [result['id'], result['conclusion'], result['premises'][0]['text'], result['stance'], i + 1]
                arguments.append(argument)
            
            retrieved_arguments_pd = pd.DataFrame(arguments, columns=header)
            retrieved_arguments_pd.to_csv(topic_file_name, sep='\t', index=False)

In [None]:
#Load the XML files for 2020
tree20 = ET.parse(data_path + 'Data/topic_files/topics-task-1-2020.xml')
root20 = tree20.getroot()

#Load the XML files for 2021
tree21 = ET.parse(data_path + 'Data/topic_files/topics-task-1-only-titles.xml')
root21 = tree21.getroot()

#Define the output directories for 2020
output_dir20 = data_path + 'Data/arguments_api_retrieved_2020'

#Define the output directories for 2021
output_dir21 = data_path + 'Data/arguments_api_retrieved_2021'

pageSize = 1000
url_template = "https://args.me/api/v2/arguments?query={}&corpora=args-me-2020-04-01&pageSize={}&format=json"

In [None]:
# Call search functions for 2020
search_api(root20, output_dir20, pageSize, url_template)

#Call search functions for 2021
search_api(root21, output_dir21, pageSize, url_template)