In [1]:
import argparse
import json
import os
import warnings
from concurrent.futures import ThreadPoolExecutor

import dotenv
import requests

warnings.filterwarnings("ignore", category=UserWarning)
dotenv.load_dotenv()

from langchain.document_loaders import JSONLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAIChat
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.text_splitter import TokenTextSplitter
from langchain.vectorstores import Chroma


In [2]:
def cache_jira(project_key="TAC"):
    """Download all Jira issues and cache them in a local file."""
    auth = requests.auth.HTTPBasicAuth(
        os.environ["JIRA_EMAIL"], os.environ["JIRA_API_TOKEN"]
    )

    def flatten_jira_text(text):
        """Flatten Jira text into a single string."""
        result = ""
        if "type" in text:
            if text["type"] == "text":
                result += text["text"] + " "
            elif text["type"] == "listItem":
                result += "- "
            elif text["type"] == "inlineCard":
                result += text["attrs"]["url"].split("/")[-1] + " "
        if "content" in text:
            for item in text["content"]:
                result += flatten_jira_text(item)
            if text["type"] == "paragraph":
                result += "\n"
        return result

    def extract_text_from_description(description):
        """Extract text from a Jira issue description."""
        if not description:
            return ""
        return flatten_jira_text(description)

    def extract_text_from_comments(comments):
        """Extract text from Jira issue comments."""
        result = ""
        for comment in comments:
            result += (
                comment["author"]["displayName"]
                + " said: "
                + flatten_jira_text(comment["body"])
                + "\n"
            )
        return result

    def remap_issue(issue):
        """Remap a Jira issue into a simpler more compact format."""
        mapped = {
            "key": issue.get("key", ""),
            
            "creator": issue["fields"].get("creator", {}).get("displayName", "Unknown creator"),
            "assignee": issue["fields"].get("assignee", {}).get("displayName", "Unassigned"),
            "status": issue["fields"].get("status", {}).get("name", "No status"),
            "created": issue["fields"].get("created", "Unknown date"),
            "updated": issue["fields"].get("updated", "Unknown date"),
            "related_issues": ", ".join(
                [
                    link["outwardIssue"]["key"]
                    for link in issue["fields"].get("issuelinks", [])
                    if link.get("outwardIssue")
                ]
            ),
        }

        url = f"https://{os.environ['JIRA_DOMAIN']}/rest/api/3/issue/{issue['key']}/comment"
        headers = {"Accept": "application/json"}
        response = requests.get(url, headers=headers, auth=auth)
        response.raise_for_status()

        if not issue["fields"]["description"] and not response.json()["comments"]:
            return None

        mapped[
            "text"
        ] = """
        Summary: {summary}
        --
        Key: {key}
        --
        Description:
        {description}
        
        Comments:
        {comments}
        """.format(
            key=issue["key"],
            description=extract_text_from_description(issue["fields"]["description"]),
            comments=extract_text_from_comments(response.json()["comments"]),
        )
        print(f"Downloaded {issue['key']} from Jira...")
        return mapped

    url = f"https://{os.environ['JIRA_DOMAIN']}/rest/api/3/search"
    params = {"jql": f"project = {project_key}", "maxResults": 1000, "startAt": 0}
    headers = {"Accept": "application/json"}
    total_issues = 0
    issues = []
    next_issues = True
    while next_issues:
        response = requests.get(url, params=params, headers=headers, auth=auth)
        response.raise_for_status()
        data = response.json()
        if not data["issues"]:
            break
        total_issues = data["total"]
        with ThreadPoolExecutor(max_workers=3) as executor:
            remapped_issues = [
                remapped_issue
                for remapped_issue in list(executor.map(remap_issue, data["issues"]))
                if remapped_issue
            ]
            issues.extend(remapped_issues)
        params["startAt"] += len(data["issues"])
        next_issues = params["startAt"] <= total_issues
        print(f"Downloaded {len(issues)} issues from Jira...")

    with open("jira.json", "w") as f:
        json.dump(issues, f)


In [4]:
cache_jira()

KeyError: 'summary'