In [52]:
import csv
import os
import json
import time
import requests

In [10]:
# Define API endpoints
CODE_SUMMARIZATION_URL = "http://localhost:8080/syntropy/code/summarize"
REQUIREMENTS_SUMMARIZATION_URL = "http://localhost:8080/syntropy/requirements/summarize"
COMPARISON_SUMMARIZATION_URL = "http://localhost:8080/syntropy/comparison/summarize"

In [11]:
# Create results directory
RESULTS_DIR = "dataset_results"
os.makedirs(RESULTS_DIR, exist_ok=True)

In [12]:
# Check if dataset pair has already been processed
def is_already_processed(dp_id):
    result_dir = os.path.join(RESULTS_DIR, str(dp_id))
    return os.path.exists(result_dir)

In [14]:
# Load CSV and process each row

def run_syntropy():
    with open("dataset.csv", "r", newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            print(row.keys())
            if not "Dataset Pair ID" in row.keys():
                continue
            dataset_pair_id = row["Dataset Pair ID"]
            code_block = row["Code Block(s)"]
            requirements = row["Requirements"]
    
            if is_already_processed(dataset_pair_id):
                print(f"Skipping already processed Dataset Pair ID: {dataset_pair_id}")
                continue
    
            # Create a directory for each dataset_pair_id
            result_path = os.path.join(RESULTS_DIR, dataset_pair_id)
            os.makedirs(result_path, exist_ok=True)
    
            print('Hitting the code summarization endpoint...')
            # Hit the code summarization endpoint
            code_response = requests.post(CODE_SUMMARIZATION_URL, json={"diffs": code_block})
            code_summary = code_response.json()
            with open(os.path.join(result_path, "code_summarization.json"), "w", encoding='utf-8') as f:
                json.dump(code_summary, f, indent=2)
    
            print('Done.')
    
            print('Hitting the requirements summarization endpoint...')
            # Hit the requirements summarization endpoint
            requirements_response = requests.post(REQUIREMENTS_SUMMARIZATION_URL, json={"requirements": requirements})
            requirements_summary = requirements_response.json()
            with open(os.path.join(result_path, "requirements_summarization.json"), "w", encoding='utf-8') as f:
                json.dump(requirements_summary, f, indent=2)
    
            print('Done.')
    
            print('Hitting the comparison summarization endpoint...')
            # Hit the comparison summarization endpoint
            comparison_response = requests.post(
                COMPARISON_SUMMARIZATION_URL,
                json={
                    "code_summary": code_summary,
                    "requirements_summary": requirements_summary
                }
            )
            comparison_summary = comparison_response.json()
            with open(os.path.join(result_path, "comparison_summarization.json"), "w", encoding='utf-8') as f:
                json.dump(comparison_summary, f, indent=2)
    
            print('Done.')
    
            time.sleep(5)
    
    print("Processing complete.")

run_syntropy()

dict_keys(['', 'Dataset Pair ID', 'Requirements', 'Code Block(s)'])
Skipping already processed Dataset Pair ID: 1
dict_keys(['', 'Dataset Pair ID', 'Requirements', 'Code Block(s)'])
Skipping already processed Dataset Pair ID: 2
dict_keys(['', 'Dataset Pair ID', 'Requirements', 'Code Block(s)'])
Skipping already processed Dataset Pair ID: 3
dict_keys(['', 'Dataset Pair ID', 'Requirements', 'Code Block(s)'])
Skipping already processed Dataset Pair ID: 4
dict_keys(['', 'Dataset Pair ID', 'Requirements', 'Code Block(s)'])
Skipping already processed Dataset Pair ID: 5
dict_keys(['', 'Dataset Pair ID', 'Requirements', 'Code Block(s)'])
Skipping already processed Dataset Pair ID: 6
dict_keys(['', 'Dataset Pair ID', 'Requirements', 'Code Block(s)'])
Skipping already processed Dataset Pair ID: 7
dict_keys(['', 'Dataset Pair ID', 'Requirements', 'Code Block(s)'])
Skipping already processed Dataset Pair ID: 8
dict_keys(['', 'Dataset Pair ID', 'Requirements', 'Code Block(s)'])
Skipping already pro

KeyboardInterrupt: 

In [87]:
def run_singleton_analysis(dp_id):
    with open("dataset.csv", "r", newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        processed = False
        for row in reader:
            #print(row.keys())
            if not "Dataset Pair ID" in row.keys():
                continue
                
            dataset_pair_id = row["Dataset Pair ID"]
            code_block = row["Code Block(s)"]
            requirements = row["Requirements"]
    
            if str(dp_id) != dataset_pair_id:
                continue
    
            print('---REQUIREMENTS:---')
            print(requirements)
            print()
            
            print('---CODE BLOCK:---')
            print(code_block)
            print()
    
            print('---SYNTHESIS:---')
            
            summary_file = "comparison_summarization.json"
            
            json_filename = f'{RESULTS_DIR}/{dataset_pair_id}/{summary_file}'
            try:
                with open(json_filename) as f:
                    d = json.load(f)
                    print(json.dumps(d, indent=4))

            except FileNotFoundError as e:
                print(f"Synthesis for Dataset Pair ID has not been processed yet: {dp_id}")
                print()
                
            processed = True
    if not processed:
        print(f"Dataset Pair ID {dp_id} has not been processed yet, or does not exist." )

In [90]:
run_singleton_analysis(1)

---REQUIREMENTS:---
The application must enforce data immutability for critical objects and implement a custom serialization mechanism to ensure secure data exchange. Integration with our custom build system is required, and efforts should be made to improve existing documentation.

---CODE BLOCK:---
# Custom build system integration
# Note: This code lacks thorough documentation.
class ImmutableData:
    def __init__(self, data):
        self._data = tuple(data)  # using immutable tuple to store data

    def serialize(self):
        # Custom serialization: convert data to a comma-separated string
        return ','.join(map(str, self._data))

    @classmethod
    def deserialize(cls, data_str):
        # Custom deserialization
        data = tuple(data_str.split(','))
        return cls(data)

data_obj = ImmutableData([1, 2, 3])
serialized = data_obj.serialize()
new_obj = ImmutableData.deserialize(serialized)
print(serialized)

---SYNTHESIS:---
{
    "core_business_functionality": {
