In [1]:
import pandas as pd

def read_csvs(repo_name):
    clustered_data = pd.read_csv('data/clustered_data_' + repo_name + '.csv')
    epic_data = pd.read_csv('data/epic_data_' + repo_name + '.csv')
    return clustered_data, epic_data

In [2]:
clustered_data, epic_data = read_csvs('ckeditor5')
clustered_data.head()

Unnamed: 0,Repo Owner,Repo Name,Issue Title,Issue Body,Author,Assignees,Comments,Labels,Issue State,Issue Number,References,Combined Text,Cluster,GroundTruthCluster
0,ckeditor,ckeditor5,There should be an empty paragraph in the newl...,"When you create an editor, but not call `setDa...",pjasiun,[],"[{'body': 'As a matter of fact, that paragraph...","['resolution:duplicate', 'type:improvement']",CLOSED,331,[],There should be an empty paragraph in the newl...,6,0
1,ckeditor,ckeditor5,Typings for TypeScript,"Hi,\r\n\r\nI'm currently using ckeditor5 in Ty...",ssougnez,[],"[{'body': ""Quoting myself from https://github....","['type:feature', 'support:2', 'squad:core']",CLOSED,504,"['504', '11704']","Typings for TypeScript Hi,\r\n\r\nI'm currentl...",1,21
2,ckeditor,ckeditor5,Table feature,We plan to bootstrap the table feature very so...,Reinmar,['jodator'],[{'body': 'One of the first things we have to ...,"['type:feature', 'domain:accessibility']",CLOSED,610,[],Table feature We plan to bootstrap the table f...,23,5
3,ckeditor,ckeditor5,Style feature,I would like to propose the **Styles** feature...,fredck,[],"[{'body': 'As much as I understand the idea, I...","['status:discussion', 'type:feature', 'support...",OPEN,648,[],Style feature I would like to propose the **St...,16,7
4,ckeditor,ckeditor5,Feature Request: Cycle Through List Styles on ...,"Rather than supporting ""ordered list"" and ""ord...",duracell80,[],[{'body': '> This isn\'t something a designer ...,['resolution:duplicate'],CLOSED,1028,[],Feature Request: Cycle Through List Styles on ...,1,1


In [26]:
import openai
from openai_key import OPEN_AI_KEY

# Function to concatenate issue data into a single string with flags
def concatenate_issue_data(group):
    # Create a string for each row and then concatenate them with a space
    return ' '.join(
        f"<TITLE> {row['Issue Title']} <BODY> {row['Issue Body']}" +
        f"<LABELS> {row['Labels']} <ISSUE_NUMBER> {row['Issue Number']}"
        for _, row in group.iterrows()
    )

# Group the DataFrame by 'GroundTruthCluster' and concatenate the data
concatenated_issues = clustered_data.groupby('Cluster').apply(concatenate_issue_data).to_dict()

# OpenAI API key setup (make sure to use your own API key)
openai.api_key = OPEN_AI_KEY


def create_tasks_section(dataframe, cluster_id, repo_name, repo_owner):
    # Filter the DataFrame for the specified cluster
    cluster_issues = dataframe[dataframe['Cluster'] == cluster_id]

    # Create tasks section with hyperlinks for each issue
    tasks_section = "\n".join(
        f"- [ ] [{row['Issue Title']}](https://github.com/{repo_owner}/{repo_name}/issues/{row['Issue Number']})\n"
        for _, row in cluster_issues.iterrows()
    )
    return tasks_section

def create_openai_query(issues_string):
    # Format the query string
    query = (
        "You are an expert on software project management. You are given the issue data:\n\n"
        f"{issues_string}\n\n"
        "Given the issues above, create an epic issue that encloses all the issues."
        "Create a <BODY> section that gives the description for the epic issue very briefly. Don't list the issues in the <BODY> section."
        "Don't give too much information in the <BODY> section, just enough to describe the epic issue."
        "Then provide a <TITLE> and <LABELS> for the epic issue."
        "Give your output in the format of <TITLE>: Epic Issue Title<BODY>: Epic issue body<LABELS>: Labels."
        "Give newlines when necessary."
    )

    # Make a request to the OpenAI API
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-16k",
        messages=[{"role": "user", "content": query}],
        #prompt=query,
        max_tokens=1024
    )
    return response.choices[0].message.content 

In [30]:
def create_epics(clustered_data):
    #get repo name and owner
    repo_name, repo_owner = clustered_data.iloc[0][['Repo Name', 'Repo Owner']]

    #get number of clusters
    cluster_ids = clustered_data['Cluster'].unique()
    predicted_epics = pd.DataFrame(columns=['Cluster', 'Epic Title', 'Epic Body', 'Epic Labels', 'Task Section'])

    for id in cluster_ids:
        tasks_section = create_tasks_section(clustered_data, id, repo_name, repo_owner)
        # Assuming 'concatenated_issues' contains the issues data as a string
        issues_string = concatenated_issues[id]
        response = create_openai_query(issues_string)
        
        #parse the output
        title = response.split('<TITLE>')[1].split('<BODY>')[0]
        body = response.split('<BODY>')[1].split('<LABELS>')[0]
        labels = response.split('<LABELS>')[1].split('\n-[]')[0]

        predicted_epics = predicted_epics.append({'Cluster': id, 'Epic Title': title, 'Epic Body': body, 'Epic Labels': labels, 'Task Section': tasks_section}, ignore_index=True)
    return predicted_epics

In [None]:
predicted_epics = create_epics(clustered_data)

In [32]:
predicted_epics

Unnamed: 0,Cluster,Epic Title,Epic Body,Epic Labels,Task Section
0,6,Epic Issue: Software Project Management Impro...,This epic issue aims to address multiple soft...,"['type:epic', 'squad:core']",- [ ] [There should be an empty paragraph in t...
1,1,Epic Issue: TypeScript Typings and List Style...,This epic issue aims to address two areas of ...,['type:epic'],- [ ] [Typings for TypeScript](https://github....
2,23,Epic Issue: Table Feature Implementation\n,Implementing the table feature is a critical ...,"['type:epic', 'domain:accessibility']",- [ ] [Table feature](https://github.com/ckedi...
3,16,Styles Feature Epic Issue\n,This epic issue encompasses various issues re...,"['type:epic', 'domain:ui/ux', 'status:discuss...",- [ ] [Style feature](https://github.com/ckedi...
4,10,Epic: Improve CKEditor 5 Image Feature Compat...,This epic aims to improve the image feature i...,"['type:task', 'package:core', 'domain:compati...",- [ ] [[Proposal] Provide TS type checking bas...
5,21,Software Project Management Epic: Improve Sof...,This epic aims to improve the modularity and ...,"['type:epic', 'status:open', 'squad:core']",- [ ] [Conversion from simple model to complex...
6,22,"Epic Issue: Improve CKEditor5 Navigation, Tab...",This epic issue aims to address multiple impr...,": Epic, Improvement, Bug Fix, Performance, Nav...",- [ ] [Implement a custom navigation through t...
7,24,Epic Issue: Alignment Dropdown and Table Sele...,This epic issue aims to address several issue...,"['type:epic', 'squad:core']",- [ ] [The alignment dropdown has unnecessary ...
8,9,Epic Issue: Enhancements and Fixes for CKEdit...,This epic issue aims to address several enhan...,"['type:epic', 'squad:core']",- [ ] [Setting image height and width](https:/...
9,13,Epic Issue: Software Project Management\n,This epic issue encompasses multiple issues r...,": ['type:epic', 'squad:core']",- [ ] [Placeholders for non-restricted areas i...


In [35]:
predicted_epics.to_csv('data/predicted_epics_ckeditor5.csv', index=False)