In [61]:
import requests
import os
import pandas as pd
import xml.etree.ElementTree as ET

In [62]:
# Search API function
def search_api(root, output_dir, pageSize, url_template):
    # Loop through topics and search for the top 1000 arguments
    for topic in root.findall('topic'):
        topic_number = topic.find('number')
        if topic_number is not None:
            topic_id = topic_number.text
            topic_title = topic.find('title')
            if topic_title is not None and topic_title.text:
                topic_title = topic_title.text.strip()
            else:
                topic_title = ''
                
            url = url_template.format(topic_title, pageSize)

            # Execute the search query on Elasticsearch
            response = requests.get(url).json()
            results = response['arguments']
            print (len(results))
            
            # Save the top 1000 arguments for the current topic to a file
            os.makedirs(output_dir, exist_ok=True)
            topic_file_name = os.path.join(output_dir, f"{topic_id}.txt")
            header = ['id', 'conclusion', 'premises', 'stance', 'rank']
            arguments = []
            for i, result in enumerate(results):
                argument = [result['id'], result['conclusion'], result['premises'][0]['text'], result['stance'], i + 1]
                arguments.append(argument)
            
            retrieved_arguments_pd = pd.DataFrame(arguments, columns=header)
            retrieved_arguments_pd.to_csv(topic_file_name, sep='\t')


In [67]:
# Create runfiles using API
def make_runfile(root, output_dir, pageSize, url_template, run_tag):
    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    # Set the output file name
    output_file_name = os.path.join(output_dir, "runfile21.txt")
    # Open the output file for writing
    with open(output_file_name, 'w') as f:
        # Loop through topics and search for the top 50 arguments
        for topic in root.findall('topic'):
            topic_number = topic.find('number')
            if topic_number is not None:
                topic_id = topic_number.text
                topic_title = topic.find('title')
                if topic_title is not None and topic_title.text:
                    topic_title = topic_title.text.strip()
                else:
                    topic_title = ''
                    
                # Format the search URL with the topic title and page size
                url = url_template.format(topic_title, pageSize)

                # Execute the search query
                response = requests.get(url).json()
                # Get the top 50 results
                results = response['arguments'][:50]
                print (len(results))
                
                # Loop through the top 50 results
                for i, result in enumerate(results):
                    # Get the argument ID, rank, and score
                    argument_id = result['id']
                    rank = i + 1
                    score = result['explanation']['score']
                    # Format the output line
                    line = f"{topic_id} Q0 {argument_id} {rank} {score} {run_tag}\n"
                    # Write the output line to the file
                    f.write(line)

In [None]:
# Evaluate runfiles

In [None]:
# Feature Extraction

In [None]:
# Re-ranking

In [64]:
# Load the XML files for 2020
tree20 = ET.parse('/Users/balazs/Desktop/dissertationProjectCode/dissertationCodeBase/Data/topic_files/topics-task-1-2020.xml')
root20 = tree20.getroot()

# Load the XML files for 2021
tree21 = ET.parse('/Users/balazs/Desktop/dissertationProjectCode/dissertationCodeBase/Data/topic_files/topics-task-1-only-titles.xml')
root21 = tree21.getroot()

# Define the output directories for 2020
output_dir20 = "/Users/balazs/Desktop/dissertationProjectCode/dissertationCodeBase/Data/args20api/"

# Define the output directories for 2021
output_dir21 = "/Users/balazs/Desktop/dissertationProjectCode/dissertationCodeBase/Data/args21api/"

pageSize = 1000
url_template = "https://args.me/api/v2/arguments?query={}&corpora=args-me-2020-04-01&pageSize={}&format=json"
#url_template = "https://args.me/api/v2/arguments?query={}&pageSize={}&format=json"

In [65]:
# Call search functions for 2020
search_api(root20, output_dir20, pageSize, url_template)

#Call search functions for 2021
search_api(root21, output_dir21, pageSize, url_template)

0
0
0
162
0
0
0
0
0
0
802
0
10
0
23
1000
0
0
0
0
0
8
0
54
126
1000
10
14
326
0
0
160
0
21
0
5
2
8
0
2
0
8
0
2
0
0
0
0
0
0


In [69]:
# Call search functions for 2020
make_runfile(root20, output_dir20, pageSize, url_template, "40323335_Run_File_20")

# Call search functions for 2021
make_runfile(root21, output_dir21, pageSize, url_template, "40323335_Run_File_21")

0
0
0
50
0
0
0
0
0
0
50
0
10
0
23
50
0
0
0
0
0
8
0
50
50
50
10
14
50
0
0
50
0
21
0
5
2
8
0
2
0
8
0
2
0
0
0
0
0
0
