## FORMAT of INPUT and OUTPUT files 
 
- INPUT FILE FORMAT: 
 
    ``` 
    #nodes 
    node01 node02 weight 
    ``` 
 
    ... 
 
- OUTPUT FILE FORMAT: 
 
    ``` 
    #nodes 
    node01-1 node02-1 weight 
    ``` 
 
    ...

### For every forward edge A-B, cancel the corresponding backward edge B-A (if it exists):

In [51]:
import networkx as nx
import matplotlib.pyplot as plt
from networkx.drawing.nx_agraph import to_agraph

# Code for preprocessing of a graph dataset

In [52]:
inputFileName = "../inputs/airports_8000.txt"
outputFileName = "../inputs/airports_8000_directed.txt"

inputFileNameDirected =  outputFileName
outputFileNameDAG = "../inputs/airport_8000_dag.txt"


In [53]:
def countNodes(fileName):
    fin = open(fileName, "r")
    numNodes = fin.readline().strip()
    
    lst = [" " for i in range(int(numNodes))]

    for line in fin:
        vals = line.strip().split()
        lst[int(vals[0])-1] = "*"
        lst[int(vals[1])-1] = "*"

    cnt = 0
    for i in range(len(lst)):
        if lst[i] == "*":
            cnt +=1
    print(f"Number of nodes in graph from {fileName}: ", cnt)

In [54]:
# for every edge A-B, remove backward edge B-A (if it exist)
fin = open(inputFileName, "r")
fout = open(outputFileName, "w")

countNodes(inputFileName)
d = dict()
fout.write("                  \n")

first = fin.readline()
for line in fin:
    val = line.strip().split()
    # check if key in dictionary and if val in list of key 
    if not (val[1] in d and val[0] in d[val[1]]):
        fout.write(f"{int(val[0]) - 1} {int(val[1]) - 1} {int(val[2])}\n")        
        if (val[0] not in d):   
            d[val[0]] = []
        d[val[0]].append(val[1])

fin.close()
fout.close()

fout = open(outputFileName, "r+")
fout.write(f"{len(d)}")
print(d.keys())
fout.flush()

fout.close()
countNodes(outputFileName)



Number of nodes in graph from ../inputs/airports_8000.txt:  2939
dict_keys(['1', '2', '3', '4', '5', '6', '7', '8', '9', '11', '12', '15', '16', '18', '21', '24', '27', '28', '29', '30', '32', '37', '40', '41', '42', '45', '48', '49', '50', '54', '55', '58', '60', '62', '65', '67', '69', '70', '71', '72', '73', '75', '78', '79', '80', '85', '87', '90', '91', '94', '98', '99', '100', '105', '108', '109', '111', '115', '117', '119', '120', '121', '122', '127', '128', '130', '132', '133', '136', '140', '141', '143', '144', '146', '148', '149', '152', '153', '154', '155', '156', '160', '161', '166', '168', '169', '172', '173', '174', '175', '176', '178', '182', '183', '184', '187', '188', '189', '192', '193', '196', '200', '202', '209', '210', '211', '214', '220', '221', '222', '230', '231', '234', '235', '236', '237', '238', '239', '240', '244', '245', '246', '247', '248', '253', '260', '262', '263', '264', '268', '269', '270', '273', '280', '286', '287', '290', '293', '295', '296', '298'

IndexError: list assignment index out of range

- Node enumeration starting from 1

input file: nodes start from 0

output file: nodes start from 1
* useful to generate input file with dataset to be tested with Matlab's max flow algorithm

In [None]:
# generate output file with node values from 1 onwards

fin = open(inputFileName, "r")
fout = open(outputFileName, "w")

line = fin.readline()
fout.write(line)

for line in fin:
    vals = line.split()
    vals[0] = int(vals[0]) + 1
    vals[1] = int(vals[1]) + 1
    fout.write(str(vals[0]) + " " + str(vals[1]) + " " + vals[2] + "\n")

fin.close()
fout.close()

Clear cycles from the graph

In [None]:
def graph_from_file(filename):
    G = nx.DiGraph()    # G is a directed graph
    with open(filename) as f:
        line = f.readline()
        for line in f:
            node1, node2, weight = line.split()
            G.add_edge(node1, node2, capacity=int(weight))
    return G

def remove_cycle(G):
    # Find a cycle in the graph
    cycle = nx.find_cycle(G)
    # print(f"Cycle found: {cycle}")
    
    # Remove one edge from the cycle to break it
    node1 = cycle[0][0]
    node2 = cycle[0][1]
    G.remove_edge(node1, node2)
    # print(f"Removed edge: {node1}, {node2}")

    return G

def main():

    G = graph_from_file(inputFileNameDirected)
    fout = open(outputFileNameDAG, "w")
       
    # check if the directed graph is acyclic, i.e. if it is a DAG
    isDag = nx.is_directed_acyclic_graph(G)
    print("The graph is DAG:", isDag)

    # if not acyclic (i.e. there is at least 1 cycle), remove all cycle(s)
    while not isDag:
        G = remove_cycle(G)
        isDag = nx.is_directed_acyclic_graph(G)

    print("The graph is DAG:", isDag)
    fout.write(f"{G.number_of_nodes()}\n")
    # print(f"{G.number_of_nodes()}")
    for edge in G.edges():
         fout.write(f"{edge[0]} {edge[1]} {G[edge[0]][edge[1]]['capacity']}\n")
        # print(f"{edge[0]} {edge[1]} {G[edge[0]][edge[1]]['capacity']}\n")
    
    fout.close()

    # print("maximum flow " + str(nx.maximum_flow(G,"0","999")[0]))
main()