In [1]:
import os
import re

def get_values(filename):
  """
  This function opens a file, searches for the sentence "Finished within Xsec, crossing number: Y",
  and extracts the values of X and Y.

  Args:
      filename: The path to the file.

  Returns:
      A tuple containing the values of X and Y, or None if the sentence is not found.
  """
  with open(filename, 'r') as f:
    for line in f:
      match = re.search(r"Finished within (\d+\.?\d*)sec, crossing number: (\d+)", line, flags=re.MULTILINE)
      if match:
        return float(match.group(1)), float(match.group(2))
  return None

def main():
  folder_path = "res/big_tests"
  file_pattern = r".*\.out$"  # Matches all files ending with ".out"

  # Dictionary to store results {filename_base: (X_sum, Y_sum, count)}
  results = {}
  for filename in os.listdir(folder_path):
    if re.match(file_pattern, filename):
      filepath = os.path.join(folder_path, filename)
      values = get_values(filepath)
      if values:
        filename_base = filename.rsplit('_', 1)[0]
        if filename_base not in results:
          results[filename_base] = (0, 0, 0)
        time, cnb = values
        results[filename_base] = (results[filename_base][0] + time, results[filename_base][1] + cnb, results[filename_base][2] + 1)

  # Print the averages for each filename base
  for filename_base, (time_sum, cnb_sum, count) in results.items():
    if count > 0:
      print(f"Filename: {filename_base}")
      print(f"Average Time taken: {time_sum / count} sec")
      print(f"Average Max Crossing Number: {cnb_sum / count}")
      #print(f"Filename: {filename_base}: {cnb_sum / count} & {time_sum / count} \\\ ")
      print()

if __name__ == "__main__":
  main()


Filename: BIOGRID-SYSTEM-Affinity_Capture-RNA-3.5.169.edgelist
Average Time taken: 7.338999999999999 sec
Average Max Crossing Number: 872.0

Filename: dip20170205.edgelist
Average Time taken: 1.299 sec
Average Max Crossing Number: 203.6

Filename: BIOGRID-SYSTEM-Affinity_Capture-MS-3.5.169.edgelist
Average Time taken: 15.917777777777777 sec
Average Max Crossing Number: 886.8888888888889

Filename: t.CAL-w.txt
Average Time taken: 772.5888888888888 sec
Average Max Crossing Number: 14.0

Filename: epinions1-d.txt
Average Time taken: 53.65 sec
Average Max Crossing Number: 1150.8

Filename: buddha-w.txt
Average Time taken: 78.72666666666667 sec
Average Max Crossing Number: 34.0

Filename: y-BerkStan-d.txt
Average Time taken: 7153.197999999999 sec
Average Max Crossing Number: 1595.0

Filename: CAIDA_as_20130601.edgelist
Average Time taken: 33.651999999999994 sec
Average Max Crossing Number: 899.9

Filename: twitter_combined-d.txt
Average Time taken: 78.74222222222221 sec
Average Max Crossing

In [7]:
def get_sums(filename):
    with open(filename, 'r') as f:
        deg_sum=0
        cross_sum=0
        for line in f:
            linesplit = line.split()
            if len(linesplit) == 2:
                deg_sum+= int(linesplit[0])
                cross_sum+= int(linesplit[1])
            else:
                return None
    return (deg_sum, cross_sum)



def main():
  folder_path = "res/big_tests"
  file_pattern = r".*_deg_cross\.txt$"  

  results = {}
  for filename in os.listdir(folder_path):
    if re.match(file_pattern, filename):
      filepath = os.path.join(folder_path, filename)
      sums = get_sums(filepath)
      if sums:
        filename_base = filename.rsplit('__', 1)[0]
        if filename_base not in results:
          results[filename_base] = (0, 0, 0)
        deg_sum, cnb_sum = sums
        results[filename_base] = (results[filename_base][0] + deg_sum, results[filename_base][1] + cnb_sum, results[filename_base][2] + 1)

  # Print the averages for each filename base
  for filename_base, (sum_deg_sum, sum_cnb_sum, count) in results.items():
    if count > 0:
      print(f"Filename: {filename_base}")
      print(f"Average Deg Sum: {sum_deg_sum / count}")   #just to check
      print(f"Average Crossing Numbers Sum: {sum_cnb_sum / count}")
      print()

if __name__ == "__main__":
  main()

Filename: oregon2_010331.txt_5_deg_cross.txt
Average Deg Sum: 62360.0
Average Crossing Numbers Sum: 98143.0

Filename: z-alue7065.txt_3_deg_cross.txt
Average Deg Sum: 109682.0
Average Crossing Numbers Sum: 219351.0

Filename: epinions1-d.txt_4_deg_cross.txt
Average Deg Sum: 811480.0
Average Crossing Numbers Sum: 1511572.0

Filename: t.FLA-w.txt_4_deg_cross.txt
Average Deg Sum: 2687902.0
Average Crossing Numbers Sum: 5375796.0

Filename: CAIDA_as_20130601.edgelist_2_deg_cross.txt
Average Deg Sum: 302868.0
Average Crossing Numbers Sum: 448723.0

Filename: z-alue7065.txt_4_deg_cross.txt
Average Deg Sum: 109682.0
Average Crossing Numbers Sum: 219344.0

Filename: grid300-10.txt_2_deg_cross.txt
Average Deg Sum: 325070.0
Average Crossing Numbers Sum: 650094.0

Filename: t.CAL-w.txt_2_deg_cross.txt
Average Deg Sum: 4630444.0
Average Crossing Numbers Sum: 9260874.0

Filename: oregon2_010331.txt_9_deg_cross.txt
Average Deg Sum: 62360.0
Average Crossing Numbers Sum: 96540.0

Filename: grid300-10.

In [28]:
import os
import re

def main():
  folder_path = "../resap7"

  for filename in os.listdir(folder_path):
      file_path = os.path.join(folder_path, filename)
      with open(file_path, 'r') as f:
        content = f.read()  # Read the entire file content
        match = re.search(
          r"Crossing/stabbing number : (\d+) \(random :(\d+), bfs: (\d+)\)\n"
          r"Given order Crossing Number Sum= (\d+), Max Crossing Number=(\d+)\n"
          r"LowCross order Crossing Number Sum= (\d+), Max Crossing Number=(\d+)", 
          content, 
          re.MULTILINE
          )
        if match:
          filename_base1 = filename.rsplit('.')[0]
          filename_base = filename_base1.rsplit('.')[0]
          print(f"{filename_base} & {match.group(4)} & {match.group(6)} & {match.group(1)} & {match.group(2)} & {match.group(3)}\\\\")
          
      
if __name__ == "__main__":
  main()

CAIDA_as_20130601 & 676142 & 445350 & 472326 & 689972 & 562213\\
com-dblp & 3963023 & 4559311 & 2884539 & 4833413 & 3164243\\
grid300-10 & 506748 & 831272 & 641571 & 831295 & 553373\\
BIOGRID-MV-Physical-3 & 304694 & 286442 & 238891 & 315756 & 256888\\
p2p-Gnutella09 & 106984 & 116506 & 107049 & 120044 & 107021\\
gnutella31-d & 617190 & 715592 & 634244 & 716603 & 618632\\
dip20170205 & 348826 & 325935 & 287351 & 348813 & 293091\\
BIOGRID-SYSTEM-Affinity_Capture-MS-3 & 1241487 & 1059858 & 1064727 & 1273144 & 1119378\\
epinions1-d & 1573231 & 1590346 & 1560779 & 1770494 & 1558816\\
xgrid500-10 & 1404022 & 2304958 & 1777529 & 2304966 & 1519918\\
BIOGRID-SYSTEM-Affinity_Capture-RNA-3 & 166453 & 98995 & 108030 & 178734 & 126420\\
twitter_combined-d & 5479672 & 4018636 & 3626607 & 5520794 & 4617659\\
ca-HepPh & 485702 & 216454 & 165950 & 491858 & 277365\\
oregon2_010331 & 139913 & 107915 & 108737 & 144226 & 117648\\
facebook_combined & 307786 & 171245 & 164456 & 349813 & 303951\\
z-alue7065 