In [1]:
import pandas as pd
import networkx as nx

### Loading the dataframe

In [2]:
df = pd.read_csv("datasets/arguments_dataset_cleaned.csv")

Convert it into a graph for easier search

In [3]:
G = nx.DiGraph()

for _, row in df.iterrows():
    G.add_edge(row['Argument1'], row['Argument2'], relation=row['Relation'])

#### Function to generate indirect relations

In [4]:
# Recursive function to determine the indirect relation between two arguments
def get_indirect_relation(a, b, G, memo):
    # Avoid recomputating for the same pair of nodes during recursion
    if (a, b) in memo:
        return memo[(a, b)]
    
    # Check if there is a direct attack or support from a to b
    if G.has_edge(a, b):
        relation = G[a][b]['relation']
        memo[(a, b)] = relation
        return relation
    
    # Look through intermediaries
    for c in G.nodes():
        if c != a and c != b:
            # If c attacks a
            if G.has_edge(a, c) and G[a][c]['relation'] == 'Attack':
                # If b supports c then b attacks a
                if get_indirect_relation(c, b, G, memo) == 'Support':
                    memo[(a, b)] = 'Attack'
                    return 'Attack'
                # If b attacks c then b supports a
                elif get_indirect_relation(c, b, G, memo) == 'Attack':
                    memo[(a, b)] = 'Support'
                    return 'Support'
            
            # If c supports a
            if G.has_edge(a, c) and G[a][c]['relation'] == 'Support':
                # If b attacks c then b attacks a
                if get_indirect_relation(c, b, G, memo) == 'Attack':
                    memo[(a, b)] = 'Attack'
                    return 'Attack'
                # If b supports c then b supports a
                elif get_indirect_relation(c, b, G, memo) == 'Support':
                    memo[(a, b)] = 'Support'
                    return 'Support'
    
    # If no valid relation is found, assume no indirect relation
    memo[(a, b)] = None
    return None

# Function to format paths with relations
def format_path_with_relations(path, edge_data):
    formatted_path = []
    for i in range(len(path) - 1):
        formatted_path.append(f"{path[i]} ({edge_data[i]['relation']})")
    formatted_path.append(path[-1])
    return ' -> '.join(formatted_path)

# Function to get indirect relations between arguments up to length n
def get_indirect_relations(G, n, sample_size):
    indirect_relations = []
    memo = {}
    sample_collected = False
    # Go through the node in the graph
    for source in G.nodes():
        if sample_collected: 
            break
        for target in G.nodes():
            if source != target:
                # Find all simple paths from source to target with a maximum length of n.
                paths = list(nx.all_simple_paths(G, source, target, cutoff=n))
                for path in paths:
                    # Check if the path length matches exactly n
                    if len(path) - 1 == n:
                        edge_data = [G[path[i]][path[i+1]] for i in range(len(path)-1)]
                        # Retrieve the indirect relation type between the source and target nodes
                        relation = get_indirect_relation(path[0], path[-1], G, memo)
                        # If a valid relation is found, process and format the path.
                        if relation:
                            formatted_path = format_path_with_relations(path, edge_data)
                            indirect_relations.append({
                                'Argument1': source,
                                'Argument2': target,
                                'Relation': relation,
                                'Path': formatted_path
                            })
                        # If the required sample size is collected, set the flag and break the inner loops.
                        if len(indirect_relations) == sample_size:
                            sample_collected = True
                            break
            if sample_collected:
                break
    # Convert the collected relations into a pandas DataFrame and return it.
    result_df = pd.DataFrame(indirect_relations)
    return result_df


#### N=2

In [5]:
indirect_df = get_indirect_relations(G, 2, 500)

In [6]:
indirect_df.to_csv("generated_relations/indirect_n2.csv", index=False)

#### N=3

In [7]:
indirect_df = get_indirect_relations(G, 3, 500)

In [9]:
indirect_df.to_csv("generated_relations/indirect_n3.csv", index=False)

### N=5

In [10]:
indirect_df = get_indirect_relations(G, 5, 500)

In [11]:
indirect_df.to_csv("generated_relations/indirect_n5.csv", index=False)