## Imports & helper functions

In [None]:
%pip install --upgrade openai
%pip install --upgrade tqdm

In [None]:
import json
from tqdm import tqdm

def read_json_file(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data


def parse_json(json_string):
    json_dict = json.loads(json_string)
    return json_dict


def prettify_json(obj):
    pretty_json = json.dumps(obj, indent=2)
    return pretty_json


def write_to_json_file(obj, file_path):
    with open(file_path, 'w') as json_file:
        json.dump(obj, json_file, indent=2)


In [None]:
import configparser

def read_ini_file(file_path):
    config = configparser.ConfigParser()
    config.read(file_path)
    ini_dict = {section: dict(config.items(section))
                for section in config.sections()}
    return ini_dict


In [None]:
from openai import OpenAI

In [None]:
import re


def remove_java_comments(java_source):
    # Regular expression to match Java comments (both single-line and multi-line)
    pattern = r"(//.*?$)|(/\*.*?\*/)"

    # Remove comments using the regular expression
    java_source_without_comments = re.sub(
        pattern, "", java_source, flags=re.MULTILINE | re.DOTALL)

    return java_source_without_comments.strip()


In [None]:
def sentence(s):
  '''
  Capitalize the first letter of a string `s` and ensures that the string 
  ends with a period (if it's not already a sentence-ending punctuation).
  '''
  t = s.strip()
  if t[-1] in '.?!…~–—':
    return f'{t[0].upper()}{t[1:]}'
  else:
    return f'{t[0].upper()}{t[1:]}.'

In [None]:
def transform_graph(graph):
	nodes = { node['data']['id']: node['data'] for node in graph['elements']['nodes'] }
	edges = {}
	for edge in graph['elements']['edges']:
		if 'label' in edge['data']:
			label = edge['data']['label']
		else:
			label = ','.join(edge['data']['labels'])
			edge['data']['label'] = label
		
		if label not in edges:
			edges[label] = []
		edges[label].append(edge['data'])
	return (nodes, edges)
	

In [None]:
def invert(edgeList):
    prefix = "inv_"
    invertedEdges = []
    for edge in edgeList:
        invertedEdge = {
            'source': edge['target'],
            'target': edge['source'],
            'label': prefix + edge.get('label', ''),
            **{key: value for key, value in edge.items() if key not in ['source', 'target', 'label']}
        }
        invertedEdges.append(invertedEdge)
    return invertedEdges

def find_paths(edgeList1, edgeList2):
    source_mapping = {}
    for edge in edgeList1:
        source_mapping[edge['target']] = edge['source']

    paths = set()
    for edge in edgeList2:
        if edge['source'] in source_mapping:
            source1 = source_mapping[edge['source']]
            path = [source1, edge['source'], edge['target']]
            paths.add(tuple(path))

    return paths

## Parameters

In [None]:
# If True: do not call the API, just print the prompts
only_print_prompt = False

In [None]:
config = read_ini_file('config.ini')
project_name = config['project']['name']
project_desc = config['project']['desc']
ifile = config['project']['ifile']
(project_name,project_desc,ifile)

## Read graph file

To access knowledge graph extracted using javapers.

In [None]:
graph = read_json_file(ifile)
nodes,edges = transform_graph(graph)

In [None]:
nodes

## Connect to openai

In [None]:
cliet_args = dict()

if 'apikey' in config['openai']:
  cliet_args['api_key'] = config['openai']['apikey']
if 'apibase' in config['openai']:
  cliet_args['base_url'] = config['openai']['apibase']
if 'model' in config['openai']:
  model = config['openai']['model']
else:
  model = "gpt-3.5-turbo"

(list(cliet_args.keys()), model)

In [None]:
client = OpenAI(**cliet_args)
client.base_url

In [None]:
# test the LLM server---create a completion
completion = client.chat.completions.create(
  model=model,
  messages=[{"role":"user","content":"What is your name?"}],
  temperature=0
)
# print the completion
print(completion.choices[0].message.content)

## Elements to be inspected

In [None]:
methods = sorted([(pkg_id,cls_id,met_id) for pkg_id,cls_id,met_id in find_paths(edges['contains'], edges['hasScript']) if nodes[met_id]['properties']['visibility']=='public'])
len(methods)

In [None]:
classes = sorted({(pkg,clz) for pkg,clz,_ in methods})
len(classes)

In [None]:
packages = sorted({pkg for pkg,_ in classes})
len(packages),packages

In [None]:
results = {pkg_id:{
	'qualifiedName': nodes[pkg_id]['properties']['qualifiedName'],
	'classes': {cls_id: {
		'qualifiedName': nodes[cls_id]['properties']['qualifiedName'],
		'methods': {met_id: {
			'qualifiedName': nodes[met_id]['properties']['qualifiedName']
		} for _,c,met_id in methods if c == cls_id}
	} for p,cls_id in classes if p == pkg_id}
} for pkg_id in packages}

# results

## Ask LLM to classify methods into layers

In [None]:
prompt1_template = '''**Layered Software Architecture and Responsibilities:**

1. **Presentation Layer**: Manages the user interface, defines UI elements and behavior, displays information, responds to user input, and updates views.
  
2. **Service Layer**: Controls the application flow, orchestrates domain operations, connects UI events with domain logic, and synchronizes domain changes with the UI.

3. **Domain Layer**: Handles business logic, represents domain data and behavior, and performs necessary computations for domain operations.

4. **Data Source Layer**: Interacts with databases, filesystems, hardware, messaging systems, or other data sources, performs CRUD operations, handles data conversion, and ensures data integrity.

**Project Context**: {project_desc}

Consider the following Java method from the class {class_name} in the project:

```java
{method_src}
```

**Task**:

1. **Summarize the Method**: Briefly describe the main responsibility of this method (1-2 sentences).
2. **Layer Evaluation**: Evaluate by comparing the method’s responsibility with each layer's role described above. Then, identify which layer(s) the method best fits into based on the evaluation and the project context, and explain your choice.
3. **Justification**:
    - **Multiple Layers**: If the method could belong to more than one layer, evaluate each possibility. Argue for the best fit and provide clear reasoning.
    - **Single Layer**: If the method clearly fits one layer, justify your choice with specific examples and reasoning.
    - **No Layer**: If the method doesn’t fit any layer, check if it could partially fit any layer. If not, classify it as "Cross-Cutting" and explain your reasoning.'''

prompt2 = "In conclusion, state the single layer that you think fits this method the most. Just answer with the name of the layer and nothing else."

In [None]:
import time

timestr = time.strftime("%Y%m%d-%H%M%S")
timestr

In [None]:
# timestr = '20240522-120247'

In [None]:
layer_names = [
	"Presentation Layer",
 	"Service Layer",
	"Domain Layer",
 	"Data Source Layer",
  	# "Cross-Cutting"
]

In [None]:
with open(f'layerinator-{timestr}.log', 'a') as file:

    kind = 'class'
    current_pkg = None
    current_cls = None
    
    for pkg_id,cls_id,met_id in tqdm(methods, desc='Processing methods'):
            
        if current_pkg != pkg_id:
            if current_pkg:
                file.write('\n\n===============================================\n\n')
                
            last_pkg = current_pkg
            current_pkg = pkg_id
            file.write('# ' + current_pkg + "\n")
        
        if current_cls != cls_id:
            file.flush()
            last_cls = current_cls
            current_cls = cls_id
            file.write('\t* ' + current_cls + "\n")

        if not 'layer' in results[pkg_id]['classes'][cls_id]['methods'][met_id] \
                or not results[pkg_id]['classes'][cls_id]['methods'][met_id]['layer'] in layer_names:

            file.write('\t\t- ' + met_id + "\n")

            package = nodes[pkg_id]
            clasz = nodes[cls_id]
            method = nodes[met_id]

            method_name = method['properties']['simpleName']
            
            prompt1 = prompt1_template.format(
                project_desc=project_desc,
                method_src=method['properties']["sourceText"],
                class_name=clasz['properties']['qualifiedName']
            )
            if only_print_prompt:
                file.write('\t\t\t' + prompt1)
                file.write("\n\n")
            else:
                response = None
                try:
                    response = client.chat.completions.create(
                        model=model,
                        messages=[{
                            "role": "user",
                            "content": prompt1
                        }],
                        temperature=0)
                    ast_message = response.choices[0].message
                    
                    file.write('\t\t\t' + "[USER]\n\n")
                    file.write('\t\t\t' + prompt1)
                    file.write("\n\n")
                    file.write('\t\t\t' + "[LLM]\n\n")
                    file.write('\t\t\t' + ast_message.content)
                    file.write("\n\n")

                    response = client.chat.completions.create(
                        model=model,
                        messages=[{
                            "role": "user",
                            "content": prompt1
                        }, 
                        ast_message,
                        {
                            "role": "user",
                            "content": prompt2
                        }],
                        temperature=0)
                    answer = response.choices[0].message.content
                    
                    file.write('\t\t\t' + "[USER]\n\n")
                    file.write('\t\t\t' + prompt2)
                    file.write("\n\n")
                    file.write('\t\t\t' + "[LLM]\n\n")
                    file.write('\t\t\t' + answer)
                    file.write("\n\n")

                    results[pkg_id]['classes'][cls_id]['methods'][met_id]['layer'] = answer
                except:
                    answer = None
                    file.write('\t\t\t' + (str(response) if response else "no response"))
                    file.write("\n\n")
                    results[pkg_id]['classes'][cls_id]['methods'][met_id]['layer'] = "undefined"
                    
            file.write("\n\n")

    file.write("ALL RESULTS:\n\n")
    file.write(prettify_json(results))


In [None]:
if not only_print_prompt:
  write_to_json_file(results, f"{project_name}-layers-{timestr}.json")

In [None]:
# results = read_json_file(f"{project_name}-layers-20240522-120247.json")
# results

In [None]:
def count_layer_occurrences(input_dict):
    layer_count = {}

    for _, details in input_dict.items():
        layer = details.get("layer")
        if layer:
            if layer in layer_count:
                layer_count[layer] += 1
            else:
                layer_count[layer] = 1

    return layer_count

In [None]:
results

In [None]:
for pkg_id in results:
	for cls_id in results[pkg_id]['classes']:
		results[pkg_id]['classes'][cls_id]['layers'] = count_layer_occurrences(results[pkg_id]['classes'][cls_id]['methods'])

results

In [None]:
def sum_layer_counts(input_dicts):
    layer_count = {}

    for _, details in input_dicts.items():
        layers = details.get("layers", {})
        for layer, count in layers.items():
            if layer in layer_count:
                layer_count[layer] += count
            else:
                layer_count[layer] = count

    return layer_count

In [None]:
for pkg_id in results:
	results[pkg_id]['layers'] = sum_layer_counts(results[pkg_id]['classes'])

results

In [None]:
write_to_json_file(results, f"{project_name}-layers-recap-{timestr}.json")

In [None]:
rows = []

for pkg_id in results:
	for cls_id in [c for c in results[pkg_id]['classes']]:
		for met_id in [m for m in results[pkg_id]['classes'][cls_id]['methods']]:
			rows.append((pkg_id,cls_id,met_id,results[pkg_id]['classes'][cls_id]['methods'][met_id]['layer']))

rows

In [None]:
header = ("package", "class", "method", "layer")

In [None]:
import csv

with open(f"{project_name}-layers1-{timestr}.csv", mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(header)
    writer.writerows(rows)

In [None]:
rows = []

for pkg_id in results:
	for cls_id in [c for c in results[pkg_id]['classes']]:
		for layer in results[pkg_id]['classes'][cls_id]['layers']:
			rows.append((pkg_id,cls_id,layer,results[pkg_id]['classes'][cls_id]['layers'][layer]))

rows

In [None]:
header = ("package", "class", "layer", "count")

In [None]:
import csv

with open(f"{project_name}-layers2-{timestr}.csv", mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(header)
    writer.writerows(rows)

In [None]:
rows = []

for pkg_id in results:
	for cls_id in [c for c in results[pkg_id]['classes']]:
		for layer in results[pkg_id]['classes'][cls_id]['layers']:
			rows.append((pkg_id,cls_id,layer,results[pkg_id]['classes'][cls_id]['layers'][layer]/sum(results[pkg_id]['classes'][cls_id]['layers'].values())))

rows

In [None]:
import csv

with open(f"{project_name}-layers3-{timestr}.csv", mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(header)
    writer.writerows(rows)

## Add the layer information back to the graph nodes

In [None]:
for pkg_id,package in results.items():
	for cls_id,clasz in package['classes'].items():
		for met_id,method in clasz['methods'].items():
			nodes[met_id]['properties']['layer'] = method['layer']

In [None]:
nodes

In [None]:
graph['elements']['nodes'] = [{'data':node_data} for node_data in nodes.values()]

In [None]:
write_to_json_file(graph,f'{project_name}-with-layers-{timestr}.json')