# Dependency Extraction via include/import directive

## Steps
1. Recurse through each file of source, use the imports to determine dependency relations, generate TAs
2. Save the extracted dependencies to a raw.ta file

In [6]:
# Step 1: Generate dependencies
import os

src = ".\\flink-1.17.1"
dependencies = []

def package_to_path(parent_module, package_name):
  return parent_module + package_name.replace(".", "\\") + ".java"

def process_line(file_path, line):
  # Example: 
  # line = import org.apache.flink.api.common.functions.MapFunction;
  # first need to remove the ; at end and split by space
  dependency = line.split(" ")[1].strip().replace(";", "")

  # Example:
  # file_path = .\flink-1.17.1\flink-core\src\main\java\org\apache\flink\api\common\functions\MapFunction.java
  # want to only get the parent directory up until /java/ at which point things will be scoped by package (org.apache.flink.*)
  parent_module = file_path.split("\\java\\")[0] + "\\java\\"

  # now we can convert the package to a path and add the tuple to our list
  dependency = package_to_path(parent_module, dependency)
  dependencies.append((file_path[2:].replace("\\", "/"), dependency[2:].replace("\\", "/")))

def process_file(file_path):
  with open(file_path, "r", encoding="utf-8") as f:
    lines = f.readlines()
    for line in lines:
        if not line.startswith("import") or not "org.apache.flink" in line:
          continue

        if "static" in line: 
          line.replace("static", "")

        process_line(file_path, line)


def generate_dependencies():
  for dir_path, _, file_names in os.walk(src):
    for file_name in file_names:
      if file_name.endswith(".java"):
        process_file(dir_path + "\\" + file_name)


generate_dependencies()
limit = 15 

# print out the dependencies to verify they are correct
for file_path, dependency in dependencies:
  print(file_path + " -> " + dependency)

  limit -= 1
  if limit == 0:
    break

print(f"Size: {len(dependencies)}")


flink-1.17.1/flink-annotations/src/main/java/org/apache/flink/FlinkVersion.java -> flink-1.17.1/flink-annotations/src/main/java/org/apache/flink/annotation/Public.java
flink-1.17.1/flink-annotations/src/main/java/org/apache/flink/annotation/docs/ConfigGroup.java -> flink-1.17.1/flink-annotations/src/main/java/org/apache/flink/annotation/Internal.java
flink-1.17.1/flink-annotations/src/main/java/org/apache/flink/annotation/docs/ConfigGroups.java -> flink-1.17.1/flink-annotations/src/main/java/org/apache/flink/annotation/Internal.java
flink-1.17.1/flink-annotations/src/main/java/org/apache/flink/annotation/docs/Documentation.java -> flink-1.17.1/flink-annotations/src/main/java/org/apache/flink/annotation/Internal.java
flink-1.17.1/flink-architecture-tests/flink-architecture-tests-base/src/main/java/org/apache/flink/architecture/common/Conditions.java -> flink-1.17.1/flink-architecture-tests/flink-architecture-tests-base/src/main/java/static.java
flink-1.17.1/flink-architecture-tests/flin

In [7]:
# Step 2: Generate the raw.ta file
raw_ta_output = "./source_raw_ta/import_dependencies.raw.ta"

with open(raw_ta_output, "w+") as f:
  f.write("FACT TUPLE : \n")

  unique_file_paths = set(file_path for file_path, _ in dependencies)

  # first generate all the concrete instances
  for file_path in unique_file_paths:
    f.write(f"$INSTANCE {file_path} cFile\n")

  # now add in all the dependencies
  for file_path, dependency in dependencies:
    f.write(f"cLinks {file_path} {dependency}\n")