In [87]:
import pandas as pd

#read and display the data
df = pd.read_csv('ckeditor5_issues.csv')
#drop repo owner and repo name columns
df.drop(['Repo Owner', 'Repo Name'], axis=1, inplace=True)

df.head()

Unnamed: 0,Issue Title,Issue Body,Author,Assignees,Comments,Labels,Issue State,Issue Number
0,Introduce tests and Bender.js,The infrastructure for testing must be created...,fredck,['fredck'],[],['type:improvement'],CLOSED,1.0
1,Introduce the AMD API,The AMD API must be introduced according to [i...,fredck,[],[{'body': 'This includes the introduction of t...,['type:improvement'],CLOSED,2.0
2,Introduce CKEDITOR.create,Introduce the `CKEDITOR.create` method and the...,fredck,[],"[{'body': 'Because of #19, nothing must be don...",['type:improvement'],CLOSED,3.0
3,Plugins path resolution,"The RequireJS ""plugin"" plugin must be created ...",fredck,[],[{'body': 'This is prototyped here:\nhttps://g...,['type:improvement'],CLOSED,4.0
4,Introduce the build process,The build process must be introduced.\n,fredck,[],[{'body': 'This has been prototyped here:\nhtt...,['type:improvement'],CLOSED,5.0


In [88]:
#github issue link format is https://github.com/ckeditor/ckeditor5/issues/8666
#https://github.com/repo_owner/repo_name/issues/issue_number
#define a regular expression to extract issue number from a given text
import re
import json

def extract_issue_ref(text):
    if not isinstance(text, str):
        # If text is not a string, make it a string
        text = str(text)
    issue_url_regex = r'https://github\.com/([\w-]+)/([\w-]+)/issues/(\d+)'
    matches = re.findall(issue_url_regex, text)
    issue_numbers = [match[2] for match in matches]
    return issue_numbers


In [90]:
# Apply the function to each row for 'Issue Body' and 'Comments' and combine the results, ensuring uniqueness
df['References'] = df.apply(lambda row: str(list(set(extract_issue_ref(row['Issue Body'])) | 
                                             set(extract_issue_ref(row['Comments'])))), axis=1)

#TO-DO Remove the comment references for the epic issuesas

df

Unnamed: 0,Issue Title,Issue Body,Author,Assignees,Comments,Labels,Issue State,Issue Number,References
0,Introduce tests and Bender.js,The infrastructure for testing must be created...,fredck,['fredck'],[],['type:improvement'],CLOSED,1.0,[]
1,Introduce the AMD API,The AMD API must be introduced according to [i...,fredck,[],[{'body': 'This includes the introduction of t...,['type:improvement'],CLOSED,2.0,[]
2,Introduce CKEDITOR.create,Introduce the `CKEDITOR.create` method and the...,fredck,[],"[{'body': 'Because of #19, nothing must be don...",['type:improvement'],CLOSED,3.0,[]
3,Plugins path resolution,"The RequireJS ""plugin"" plugin must be created ...",fredck,[],[{'body': 'This is prototyped here:\nhttps://g...,['type:improvement'],CLOSED,4.0,[]
4,Introduce the build process,The build process must be introduced.\n,fredck,[],[{'body': 'This has been prototyped here:\nhtt...,['type:improvement'],CLOSED,5.0,[]
...,...,...,...,...,...,...,...,...,...
12451,Make the `UpcastEvent` definition visible,## Provide a description of the task\r\n\r\nht...,pomek,['pomek'],[],"['type:task', 'squad:devops', 'squad:core', 's...",OPEN,15424.0,[]
12452,Linter error when changing `augmentation.ts` f...,The linter rule forbids committing changes whe...,mremiszewski,[],"[{'body': 'For future reference, we figured ou...","['type:bug', 'squad:devops', 'domain:linters']",OPEN,15426.0,[]
12453,CKEditor In Vue3 - CKEditorError: ckeditor-dup...,"Here I am trying to use the CK Editor in vue3,...",rg225,[],[],['type:question'],OPEN,15427.0,['6296']
12454,ListProperties not showin in Toolbar,Does anyone have a clue why the List styles ar...,timtempel,[],"[{'body': 'Apologies, super stubborn browser c...",['type:question'],CLOSED,15428.0,[]


In [91]:
#check how many Nan values are there
print("Amount of nan values:", df.isnull().sum())

#display Nan Issue Body values
df[df['Issue Body'].isnull()].head()

Amount of nan values: Issue Title       0
Issue Body      127
Author           32
Assignees         0
Comments          0
Labels            0
Issue State       0
Issue Number      0
References        0
dtype: int64


Unnamed: 0,Issue Title,Issue Body,Author,Assignees,Comments,Labels,Issue State,Issue Number,References
5,package.json should be indented with tabs,,Reinmar,[],[{'body': 'The current formatting is on purpos...,['type:bug'],CLOSED,7.0,[]
47,[Windows] Builder builds tests as source code,,Reinmar,[],[],[],CLOSED,64.0,[]
235,Check automated and manual tests compatibility...,,maxbarnas,['maxbarnas'],"[{'body': ""What about manual tests? It's worth...",['type:task'],CLOSED,329.0,['156']
318,Initial integration with Umberto. Prepare exam...,,m-turek,['m-turek'],[],['type:task'],CLOSED,427.0,[]
330,Add a gulp task for generating JSDoc output only,,m-turek,['m-turek'],[],['type:feature'],CLOSED,442.0,[]


In [92]:
#display Nan Author values
df[df['Author'].isnull()].head()

Unnamed: 0,Issue Title,Issue Body,Author,Assignees,Comments,Labels,Issue State,Issue Number,References
99,Feature request: Toolbar customization,Would be nice to provide a way to let users/de...,,[],"[{'body': ""Hey, thanks for the ideas!\n\nWe're...",['resolution:duplicate'],CLOSED,136.0,['143']
101,Question: CKE5 and React,Will the CKE5 release good to work with ReactJ...,,[],"[{'body': ""That's an interesting question, @Ze...","['resolution:duplicate', 'type:question', 'sta...",CLOSED,138.0,[]
328,Switch headings using dropdown causing unexpec...,I am using windows 10 with chrome 58.0.3029.11...,,[],"[{'body': ""Thanks for the ticket! We're aware ...",['resolution:duplicate'],CLOSED,440.0,['228']
494,Mobile examples not working for me,"Hi,\r\n\r\nCongrats on the big achievement wit...",,[],"[{'body': ""Please see here for information abo...",['resolution:invalid'],CLOSED,621.0,"['542', '603']"
843,Detect changes in the editor content,## Is this a bug report or feature request? (c...,,[],[{'body': 'CKEditor 5 provides an event which ...,['type:question'],CLOSED,996.0,[]


In [93]:
#check how many duplicate issues are there
print("\nAmount of duplicate issues:", df.duplicated().sum())



Amount of duplicate issues: 0


In [94]:
#filter out the issues with label epic in it
epic_df = df[df['Labels'].str.contains('Epic', na=False)]
df = df[~df['Labels'].str.contains('Epic', na=False)]

epic_df

Unnamed: 0,Issue Title,Issue Body,Author,Assignees,Comments,Labels,Issue State,Issue Number,References
298,Introduce a way to reach unaccessible places (...,Problem:\r\n\r\n![image](https://user-images.g...,Reinmar,['oleq'],[{'body': 'DUP reported in https://github.com/...,"['type:bug', 'type:improvement', 'type:feature...",CLOSED,407.0,"['1400', '407', '1703', '1621', '685', '1546',..."
568,Allow linking images,## 🐞 Is this a bug report or feature request? ...,davidpolberger,[],"[{'body': ""I thought there was a ticket for it...","['type:feature', 'package:link', 'support:2', ...",CLOSED,702.0,"['7519', '649', '85']"
876,Implement list styles,The user should be able to choose the style of...,Reinmar,['pomek'],[{'body': '@Reinmar \r\n\r\nWe also in an eed ...,"['type:feature', 'package:list', 'support:2', ...",CLOSED,1031.0,"['7736', '7801', '5752', '1028', '7803', '7804..."
877,Implement ordered lists start index and revers...,Beside [list styles](https://github.com/ckedit...,Reinmar,[],[{'body': 'Microsoft word has this feature use...,"['type:feature', 'package:list', 'support:2', ...",CLOSED,1032.0,['1031']
1227,Find and replace feature,🆕 Feature request\r\n\r\n## CKEditor 5\r\n\r\n...,addsimm,[],"[{'body': ""Hi @addsimm! There's no such featur...","['status:discussion', 'type:feature', 'support...",CLOSED,1430.0,['1257']
...,...,...,...,...,...,...,...,...,...
11943,Document list feature parity,The goal of this initiative is to make `Docume...,Witoso,[],[],"['type:task', 'Epic', 'squad:core']",CLOSED,14632.0,[]
12209,List v1 sunset,After the:\r\n\r\n* [ ] https://github.com...,Witoso,[],[],"['Epic', 'squad:core']",OPEN,15037.0,['14632']
12262,Further enhancements to the drag & drop: list ...,## 📝 Provide a description of the improvement\...,Witoso,[],[],"['type:improvement', 'Epic', 'squad:core']",OPEN,15110.0,"['14766', '14712', '15058', '14735', '14711', ..."
12280,Better entry point for image file managers and...,## 📝 Provide a description of the new feature\...,Witoso,[],[{'body': 'The idea would be to push the confi...,"['type:feature', 'package:image', 'Epic', 'squ...",OPEN,15149.0,[]


In [95]:
#lets examine issue bodies of epic issues
epic_df['Issue Body'].iloc[5]

"## Is this a bug report or feature request? (choose one)\n\n🆕 Feature request\n\n## 💻 Version of CKEditor\n\nCKEditor version 5, decoupled document build\n\n## 📋 Steps to reproduce\n\n1. insert one small image\n2. insert another small image\n\n## ✅ Expected result\nTwo small images should be in same line with space e.g. Image1   Image2\n\n## ❎ Actual result\n\nCurrently they are stacked up from top to bottom.\n\n---\n\n## Future work\nMVP step 2: https://github.com/ckeditor/ckeditor5/issues/8666\nNice to have features: https://github.com/ckeditor/ckeditor5/issues/8712\n\n---\n\nIf you'd like to see this feature implemented, add 👍 to this post."

In [96]:
df[df["Issue Number"] == 8666]

Unnamed: 0,Issue Title,Issue Body,Author,Assignees,Comments,Labels,Issue State,Issue Number,References
7860,Inline images - MVP step 2,- [ ] Linking images (we're fine with this: `<...,pkwasnik,[],"[{'body': 'Remember about link decorators.', '...","['type:feature', 'package:image', 'domain:ui/u...",CLOSED,8666.0,[]


In [97]:
df

Unnamed: 0,Issue Title,Issue Body,Author,Assignees,Comments,Labels,Issue State,Issue Number,References
0,Introduce tests and Bender.js,The infrastructure for testing must be created...,fredck,['fredck'],[],['type:improvement'],CLOSED,1.0,[]
1,Introduce the AMD API,The AMD API must be introduced according to [i...,fredck,[],[{'body': 'This includes the introduction of t...,['type:improvement'],CLOSED,2.0,[]
2,Introduce CKEDITOR.create,Introduce the `CKEDITOR.create` method and the...,fredck,[],"[{'body': 'Because of #19, nothing must be don...",['type:improvement'],CLOSED,3.0,[]
3,Plugins path resolution,"The RequireJS ""plugin"" plugin must be created ...",fredck,[],[{'body': 'This is prototyped here:\nhttps://g...,['type:improvement'],CLOSED,4.0,[]
4,Introduce the build process,The build process must be introduced.\n,fredck,[],[{'body': 'This has been prototyped here:\nhtt...,['type:improvement'],CLOSED,5.0,[]
...,...,...,...,...,...,...,...,...,...
12451,Make the `UpcastEvent` definition visible,## Provide a description of the task\r\n\r\nht...,pomek,['pomek'],[],"['type:task', 'squad:devops', 'squad:core', 's...",OPEN,15424.0,[]
12452,Linter error when changing `augmentation.ts` f...,The linter rule forbids committing changes whe...,mremiszewski,[],"[{'body': 'For future reference, we figured ou...","['type:bug', 'squad:devops', 'domain:linters']",OPEN,15426.0,[]
12453,CKEditor In Vue3 - CKEditorError: ckeditor-dup...,"Here I am trying to use the CK Editor in vue3,...",rg225,[],[],['type:question'],OPEN,15427.0,['6296']
12454,ListProperties not showin in Toolbar,Does anyone have a clue why the List styles ar...,timtempel,[],"[{'body': 'Apologies, super stubborn browser c...",['type:question'],CLOSED,15428.0,[]
