In [0]:
# %pip install langchain
# %pip install langchain-openai
# %pip install openai==1.56.2
# %pip install azure-identity==1.19.0
# %pip install pydantic==2.9.2
# %pip install requests
# %pip install python-dotenv
# %pip install repomix
# %pip install beautifulsoup4
# %pip install mlflow

# dbutils.library.restartPython()

In [0]:
import os
from openai import AzureOpenAI
from azure.identity import ClientSecretCredential
######## Boilerplate necessary to make things work in OneLab/OpenLab ########
LAB_VARIANT = "OpenLab" # USE "OneLab" IF YOU'RE USING ONELAB
ENVIRONMENT = "prd" # choose ENVIRONMENT as dev, uat or prd based on environment
def get_openai_urls(lab_variant: str):
  """This function is created to return OpenAI URLs for OpenLab/OneLab. We want to use the function with lazy evaluation so that OneLab doesn't affect OpenLab and vice-versa."""
  if lab_variant == "OpenLab":
    secret_scope = f"{lab_variant}-SecretScope"
    return f"https://{dbutils.secrets.get(scope=secret_scope, key='OpenAiHostname')}openoaisdc-completions-apis/"
  elif lab_variant == "OneLab":
    return f"https://apim-1labgen-ap-apizone-{ENVIRONMENT}01.azure-api.net/openaisdc-completions-apis/"
  else: 
    raise Exception("Invalid lab_variant")

client_id = dbutils.secrets.get(scope=f"{LAB_VARIANT}-SecretScope", key="DataServicePrincipalClientId")
client_secret = dbutils.secrets.get(scope=f"{LAB_VARIANT}-SecretScope", key="DataServicePrincipalClientSecret")
credential = ClientSecretCredential(tenant_id="6e93a626-8aca-4dc1-9191-ce291b4b75a1", client_id=client_id, client_secret=client_secret)
access_token = credential.get_token("https://cognitiveservices.azure.com/.default")

os.environ["AZURE_OPENAI_TOKEN"] = access_token.token
os.environ["AZURE_OPENAI_VERSION"] = "2024-10-21"  # https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs
os.environ["AZURE_OPENAI_BASE_URL"] = get_openai_urls(LAB_VARIANT)
os.environ["USER_AGENT"] = "myagent"

client = AzureOpenAI(
  api_key=os.environ["AZURE_OPENAI_TOKEN"],  
  api_version=os.environ["AZURE_OPENAI_VERSION"],
  azure_endpoint=os.environ["AZURE_OPENAI_BASE_URL"]
)

In [0]:
import mlflow
mlflow.openai.autolog()

__________

In [0]:
def get_completion_without_rag(query: str, context: str, model: str = "gpt-5") -> str:
      completion = client.chat.completions.create(
        model=model, # gpt-4o, gpt-4.1, gpt-5
        messages=[
          {"role": "system", "content": context}, # <-- This is the system message that provides context to the model
          {"role": "user", "content": query}  # <-- This is the user message for which the model will generate a response
        ],
        seed=1
      )
      return completion.choices[0].message.content

In [0]:
import re

def remove_html_re(text):
    # Remove HTML tags
    clean = re.sub(r'<.*?>', '', text)
    return clean

from bs4 import BeautifulSoup

def remove_html(text):
    soup = BeautifulSoup(text, "html.parser")
    return soup.get_text()

In [0]:
from dotenv import load_dotenv
import os

dotenv_path = '/Workspace/Shared/HACKATHON/TEAM 70/hackathon/404-brain-not-found/tryathon/.env'

load_dotenv(dotenv_path=dotenv_path)

In [0]:
import os
import requests
import zipfile
import io
import shutil

def download_repo_zip(repo_url):
    repo_name = repo_url.split("/")[-1].replace(".git", "")
    source_dir = os.path.join(os.getcwd(), "source")
    full_path = os.path.join(source_dir, repo_name)
    if os.path.exists(full_path):
        shutil.rmtree(full_path)  # Clear the output directory if it exists
    os.makedirs(full_path, exist_ok=True)
    
    if repo_url.endswith(".git"):
        repo_url = repo_url[:-4]
    zip_url = f"{repo_url}/archive/refs/heads/main.zip"

    # print(f"Downloading {zip_url} to {full_path}")
    os.makedirs(full_path, exist_ok=True)
    response = requests.get(zip_url)
    if response.status_code == 200:
        with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
            zip_ref.extractall(full_path)
        # print(f"Downloaded and extracted to {full_path}")
        return full_path
    else:
        print(f"Failed to download repository zip: {response.status_code}")
        return None

In [0]:
repo_url = "https://github.com/praveenkmrs/fluffy-parakeet.git"
cloned_repo = download_repo_zip(repo_url)

In [0]:
from utils.code_parser import parse_codebase

result = parse_codebase(codebase_path=cloned_repo)
codebase = result.output_content

In [0]:
from utils.github_issues_scrapper import get_issues
import os

token=os.getenv("GITHUB_TOKEN")
owner=repo_url.split("/")[-2]
repo = repo_url.split("/")[-1].replace(".git", "")

github_issues = get_issues(owner=owner, repo=repo, token=token)

issues_str = "\n".join(
    f"- Title: {issue.title}\n  Body: {remove_html(issue.body)}\n"
    for issue in github_issues
)

In [0]:
prompt_template_po = open("prompts/po_prompt.txt").read()
prompt_po = prompt_template_po.format(
    codebase_context=codebase,
    issues=issues_str,
    tech_stack="Spring-boot 3, mongodb, Java 21, maven, Docker, Docker-compose",
    constraints="security, performance, scalability, maintainability"
)
# prompt_po

In [0]:
import time

start_time = time.perf_counter()
user_stories = get_completion_without_rag(
    query="Create structured user stories based on the issues",
    context=prompt_po,
    model="gpt-5" # gpt-4o, gpt-4.1, gpt-5
)
end_time = time.perf_counter()
elapsed_time = end_time - start_time
print(f"Total time taken to create stories: {elapsed_time}s")
# user_stories

In [0]:
prompt_template_dev = open("prompts/dev_prompt.txt").read()
prompt_dev = prompt_template_dev.format(
    codebase_context=codebase,
    user_stories=user_stories,
    tech_stack="Spring-boot 3, mongodb, Java 21, maven, Docker, Docker-compose",
    constraints="security, performance, scalability, maintainability"
)
# prompt_dev

In [0]:
start_time = time.perf_counter()
plan = get_completion_without_rag(
    query="Provide detailed implementation instructions for all of the listed user stories strictly referring to the source code",
    context=prompt_dev,
    model="gpt-4.1" # gpt-4o, gpt-4.1, gpt-5
)
end_time = time.perf_counter()
elapsed_time = end_time - start_time
print(f"Total time taken to create plans for the stories: {elapsed_time}s")
# plan