In [20]:
pip install javalang

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
import json
import re
from collections import defaultdict
import os
import javalang

In [10]:
BASE_DIR = r"O:\UL\Dissertation\Tools Testing\train-ticket"

Check if Entity is ENUM

In [11]:
def is_enum(entity_name, base_dir):
    for root, _, files in os.walk(base_dir):
        if 'entity' not in root.lower().split(os.sep):
            continue
        for file in files:
            if file.endswith(".java") and file.lower().startswith(entity_name.lower()):
                file_path = os.path.join(root, file)
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        code = f.read()
                        tree = javalang.parse.parse(code)
                        for type_decl in tree.types:
                            if type_decl.name.lower() == entity_name.lower():
                                return isinstance(type_decl, javalang.tree.EnumDeclaration)
                except Exception:
                    pass
    return False


Parse imports and return list of imported entities whose package contains '.entity.'

In [12]:
def extract_imported_entities(java_code):

    tree = javalang.parse.parse(java_code)
    imported_entities = []
    for imp in tree.imports:
        if '.entity.' in imp.path and not imp.static:
            entity_name = imp.path.split('.')[-1]
            imported_entities.append(entity_name)
    return imported_entities


In [None]:
def extract_field_usages(java_code, imported_entities):

    tree = javalang.parse.parse(java_code)
    used_fields = defaultdict(set)

    for path, node in tree.filter(javalang.tree.MemberReference):
        if node.qualifier in imported_entities:
            used_fields[node.qualifier].add(node.member)

    return used_fields


In [14]:
def infer_service_name(file_path):
    parts = file_path.lower().split(os.sep)
    for part in parts:
        if "service" in part:
            return part
    return "UnknownService"

def is_in_service_package(file_path):
    return any("service" in part.lower() for part in file_path.split(os.sep))


In [None]:
def process_directory(base_dir):
    service_to_nanoentities = defaultdict(set)

    for root, _, files in os.walk(base_dir):
        normalized_root = root.replace("\\", "/").lower()
        if "src/main/java" not in normalized_root:
            continue
        if not is_in_service_package(root):
            continue

        for file in files:
            if file.endswith(".java"):
                file_path = os.path.join(root, file)
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        java_code = f.read()

                    imported_entities = extract_imported_entities(java_code)
                    # Filter out enums
                    imported_entities = [e for e in imported_entities if not is_enum(e, base_dir)]

                    field_usages = extract_field_usages(java_code, imported_entities)

                    nanoentities = []
                    for entity in imported_entities:
                        fields = field_usages.get(entity, set())
                        if fields:
                            for f in fields:
                                nanoentities.append(f"{entity.lower()}.{f}")
                        else:
                            nanoentities.append(entity.lower())

                    service_name = infer_service_name(file_path)
                    for ne in nanoentities:
                        service_to_nanoentities[service_name].add(ne)

                except Exception as e:
                    print(f"Failed to parse {file_path}: {e}")

    # Prepare output
    output = []
    for service, nano_set in service_to_nanoentities.items():
        output.append({
            service: sorted(nano_set)
        })

    return output


#Relations

In [None]:

def extract_relations(nanoentities_list):
    entity_to_service = {}
    for entry in nanoentities_list:
        service = entry["service"]
        for entity in entry["nanoentities"]:
            entity_to_service[entity.lower()] = service 

    relations_map = defaultdict(set)

    for entry in nanoentities_list:
        serviceA = entry["service"]
        for entity in entry["nanoentities"]:
            owner_service = entity_to_service.get(entity.lower())
            if owner_service and owner_service != serviceA:
                relations_map[(serviceA, owner_service)].add(entity)

    # Format output list
    relations = []
    for (serviceA, serviceB), shared_entities in relations_map.items():
        relations.append({
            "serviceA": serviceA,
            "serviceB": serviceB,
            "sharedEntities": sorted(shared_entities),
            "direction": "OUTGOING"
        })

    return relations

In [17]:
nanoentities = process_directory(BASE_DIR)
print(json.dumps(nanoentities, indent=2))
print(f"\nExtracted nanoentities for {len(nanoentities)} services.")

[
  {
    "ts-admin-basic-info-service": [
      "config",
      "contacts",
      "station",
      "traintype"
    ]
  },
  {
    "ts-admin-route-service": [
      "route",
      "routeinfo"
    ]
  },
  {
    "ts-admin-travel-service": [
      "admintrip",
      "route",
      "traintype",
      "travelinfo"
    ]
  },
  {
    "ts-admin-user-service": [
      "user"
    ]
  },
  {
    "ts-assurance-service": [
      "assurance",
      "assurancetypebean",
      "plainassurance"
    ]
  },
  {
    "ts-auth-service": [
      "user"
    ]
  },
  {
    "ts-basic-service": [
      "travel"
    ]
  },
  {
    "ts-cancel-service": [
      "account",
      "notifyinfo",
      "order",
      "user"
    ]
  },
  {
    "ts-config-service": [
      "config"
    ]
  },
  {
    "ts-consign-price-service": [
      "consignprice"
    ]
  },
  {
    "ts-consign-service": [
      "consign",
      "consignrecord"
    ]
  },
  {
    "ts-contacts-service": [
      "contacts"
    ]
  },
  {
    "ts-delive