In [None]:
# %pip install --upgrade py2neo
# %pip install --upgrade openai

In [None]:
from py2neo import Graph


In [None]:
from itertools import groupby

In [None]:
import json


def read_json_file(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data


def parse_json(json_string):
  json_dict = json.loads(json_string)
  return json_dict


def dict_to_pretty_json(dictionary):
  pretty_json = json.dumps(dictionary, indent=2)
  return pretty_json


def write_dict_to_json(dictionary, file_path):
  with open(file_path, 'w') as json_file:
    json.dump(dictionary, json_file, indent=2)


In [None]:
import configparser

def read_ini_file(file_path):
    config = configparser.ConfigParser()
    config.read(file_path)
    ini_dict = {section: dict(config.items(section))
                for section in config.sections()}
    return ini_dict


In [None]:
from openai import OpenAI

In [None]:
import re


def remove_java_comments(java_source):
    # Regular expression to match Java comments (both single-line and multi-line)
    pattern = r"(//.*?$)|(/\*.*?\*/)"

    # Remove comments using the regular expression
    java_source_without_comments = re.sub(
        pattern, "", java_source, flags=re.MULTILINE | re.DOTALL)

    return java_source_without_comments


In [None]:
def sentence(s):
  '''
  Capitalize the first letter of a string `s` and ensures that the string 
  ends with a period (if it's not already a sentence-ending punctuation).
  '''
  t = s.strip()
  if t[-1] in '.?!…~–—':
    return f'{t[0].upper()}{t[1:]}'
  else:
    return f'{t[0].upper()}{t[1:]}.'
  
sentence(' hello world~  ')

## Parameters

In [None]:
# If True: do not call the API, just print the prompts
only_print_prompt = False

In [None]:
secrets = read_ini_file('secrets.ini')
project_name = secrets['project']['name']
project_name

In [None]:
project_desc = 'an open source email client for Android focused on making it easy to chew through large volumes of email'

## Connect to neo4j

To access knowledge graph extracted using javapers which is then loaded to neo4j graph database.

In [None]:
graph = Graph(secrets['neo4j']['url'], auth=(secrets['neo4j']['username'], secrets['neo4j']['password']))

## Connect to openai

In [None]:
args = dict()

if 'apikey' in secrets['openai']:
  args['api_key'] = secrets['openai']['apikey']
if 'apibase' in secrets['openai']:
  args['base_url'] = secrets['openai']['apibase']
if 'model' in secrets['openai']:
  model = secrets['openai']['model']
else:
  model = "gpt-3.5-turbo"

(args['base_url'], model)

In [None]:
client = OpenAI(**args)
client.base_url

In [None]:
# test the LLM server---create a completion
completion = client.chat.completions.create(
  model=model,
  messages=[{"role":"user","content":"What is your name?"}],
  temperature=0
)
# print the completion
print(completion.choices[0].message.content)

In [None]:
prompt1_template = '''Here are the layers in a layered software architecture and their responsibilities:

1. **Presentation Layer**: Manages the user interface, including defining UI elements and their behavior, displays information, reacting to user input, and updating UI views accordingly.
  
2. **Service Layer**: Orchestrates domain operations, encapsulates business logic, selects the appropriate business logic for a user request, and coordinates responses between the presentation and domain layers.

3. **Domain Layer**: Organizes and implements business logic, represents domain data and its behavior, and carries out the necessary computation for responding to user requests.

4. **Data Source Layer**: Communicates with databases, messaging systems, or other sources of data, performs CRUD operations, handles data conversion, and ensures data integrity before committing changes to the data source.

Consider the following Java method from {project_desc}:

```java
{method_src}
```

Reason about whether this method fits with each of the layers above. Think step by step. First, summarize what is the responsibility of the method. Then compare it to the layers above.'''

prompt2 = "In conclusion, state a single layer that you think fits this method the most. Just answer with the name of the layer and nothing else."

In [None]:
packages = { record['p']['qualifiedName'] for record in graph.run('match (p:Container)-[:contains]->(:Structure) where p.kind="package" return p') }

In [None]:
len(packages), packages

In [None]:
num_methods = 0

samples = dict()

for pkg_name in sorted(list(packages)):
    classes = [ record['c'] for record in graph.run('MATCH (p:Container)-[:contains]->(c:Structure) '
                                                                    f'WHERE p.qualifiedName="{pkg_name}" AND p.kind="package" '
                                                                    'RETURN c') ]
    # top_classes = [c for c in classes if not '$' in c['qualifiedName']]
    # class_samples = random.sample(top_classes, min(len(top_classes),3))
    
    samples[pkg_name] = dict()
    for clss in classes:

      class_name = clss['qualifiedName']
      kind = clss['kind']
      methods = [ record['m'] for record in graph.run('MATCH (c:Structure)-[:hasScript]->(m:Operation) '
                                                      f'WHERE c.qualifiedName="{class_name}" AND m.visibility="public" AND m.kind="method" '
                                                      'return m') ]
    #   ok_methods = [m for m in methods if 300<len(m['sourceText'])<3000]
    #   method_samples = random.sample(ok_methods, min(len(ok_methods),10))
      # print(len(method_samples), [(clss['qualifiedName'], m['simpleName']) for m in method_samples])
      num_methods += len(methods)

      samples[pkg_name][class_name] = methods
# print(samples)
num_methods

In [None]:
samples.keys()

In [None]:
samples['com.fsck.k9.ui']

In [None]:
only_print_prompt = False

In [None]:
import time

timestr = time.strftime("%Y%m%d-%H%M%S")
timestr

In [None]:
with open(f'layerinator-{timestr}.log', 'a') as file:

  results = dict()
  kind = 'class'
  for pkg_name in samples.keys():
    results[pkg_name] = dict()
    for class_name in samples[pkg_name].keys():
      results[pkg_name][class_name] = dict()
      for method in samples[pkg_name][class_name]:
        method_name = method['simpleName']
        results[pkg_name][class_name][method_name] = dict()
        file.write(f"# {pkg_name}, {class_name}, {method_name}")
        file.write("\n\n")
        
        prompt1 = prompt1_template.format(
          project_desc=project_desc,
          method_src=method["sourceText"]
        )
        if only_print_prompt:
          file.write(prompt1)
          file.write("\n\n")
        else:
          response = None
          try:
            response = client.chat.completions.create(
                model=model,
                messages=[{
                  "role": "user",
                  "content": prompt1
                }],
                temperature=0)
            ast_message = response.choices[0].message
            
            file.write("[USER]\n\n")
            file.write(prompt1)
            file.write("\n\n")
            file.write("[LLM]\n\n")
            file.write(ast_message.content)
            file.write("\n\n")

            response = client.chat.completions.create(
                model=model,
                messages=[{
                  "role": "user",
                  "content": prompt1
                }, 
                ast_message,
                {
                  "role": "user",
                  "content": prompt2
                }],
                temperature=0)
            answer = response.choices[0].message.content
            
            file.write("[USER]\n\n")
            file.write(prompt2)
            file.write("\n\n")
            file.write("[LLM]\n\n")
            file.write(answer)
            file.write("\n\n")

            # print(answer)
            # results[pkg_name][class_name][method_name][layer['name']] = parse_json(answer.split('\n')[-1])
            results[pkg_name][class_name][method_name]['layer'] = answer
          except:
            answer = None
            file.write(str(response) if response else "no response")
            file.write("\n\n")
            # results[pkg_name][class_name][method_name][layer['name']] = []
            results[pkg_name][class_name][method_name]['layer'] = "undefined"
          # print(answer)
          # print()
        file.write("===============================================\n\n")
        # print(dict_to_pretty_json(results[pkg_name][class_name][method_name]))
        # print()
        # break
      file.write("CLASS RESULT:\n\n")
      file.write(dict_to_pretty_json(results[pkg_name][class_name]))
      file.write("\n\n")
      file.flush()
      # break
    file.write("PACKAGE RESULT:\n\n")
    file.write(dict_to_pretty_json(results[pkg_name]))
    file.write("\n\n")
    # break
  file.write("ALL RESULTS:\n\n")
  file.write(dict_to_pretty_json(results))


In [None]:
if not only_print_prompt:
  write_dict_to_json(results, f"layerinator-v2/{project_name}-layers-{timestr}.json")

In [None]:
def count_layer_occurrences(input_dict):
    layer_count = {}

    for method, details in input_dict.items():
        layer = details.get("layer")
        if layer:
            if layer in layer_count:
                layer_count[layer] += 1
            else:
                layer_count[layer] = 1

    return layer_count

In [None]:
results

In [None]:
for package in results:
	for clss in results[package]:
		results[package][clss]['layers'] = count_layer_occurrences(results[package][clss])

results

In [None]:
def sum_layer_counts(input_dicts):
    layer_count = {}

    for class_name, details in input_dicts.items():
        layers = details.get("layers", {})
        for layer, count in layers.items():
            if layer in layer_count:
                layer_count[layer] += count
            else:
                layer_count[layer] = count

    return layer_count

In [None]:
for package in results:
	results[package]['layers'] = sum_layer_counts(results[package])

results

In [None]:
write_dict_to_json(results, f"layerinator-v2/{project_name}-layers-recap-{timestr}.json")

In [None]:
rows = []

for package in results:
	for clss in [c for c in results[package] if c != 'layers']:
		for method in [m for m in results[package][clss] if m != 'layers']:
			rows.append((package,clss,method,results[package][clss][method]['layer']))

rows

In [None]:
header = ("package", "class", "method", "layer")

In [None]:
import csv

with open(f"layerinator-v2/{project_name}-layers1-{timestr}.csv", mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(header)
    writer.writerows(rows)

In [None]:
rows = []

for package in results:
	for clss in [c for c in results[package] if c != 'layers']:
		for layer in results[package][clss]['layers']:
			rows.append((package,clss,layer,results[package][clss]['layers'][layer]))

rows

In [None]:
header = ("package", "class", "layer", "count")

In [None]:
import csv

with open(f"layerinator-v2/{project_name}-layers2-{timestr}.csv", mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(header)
    writer.writerows(rows)

In [None]:
rows = []

for package in results:
	for clss in [c for c in results[package] if c != 'layers']:
		for layer in results[package][clss]['layers']:
			rows.append((package,clss,layer,results[package][clss]['layers'][layer]/sum(results[package][clss]['layers'].values())))

rows

In [None]:
import csv

with open(f"layerinator-v2/{project_name}-layers3-{timestr}.csv", mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(header)
    writer.writerows(rows)