In [1]:
# Data comes from script get create_dataset.py
# DATA_FILENAME = "data/repos-cleaned-TypeScript-2023-06-05-1620.pkl"
# DATA_FILENAME = "data/repos-cleaned-JavaScript-2023-06-05-1933.pkl"
DATA_FILENAME = "data/repos-cleaned-Go-2023-06-05-1432.pkl"

# Note that javascript needs to come before java because java would always match whatever Js matches
LANGUAGE = [l for l in ['Go', 'JavaScript', 'TypeScript', 'Java', 'Python'] if l in DATA_FILENAME][0]

TOP_N = 50

In [2]:
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import pickle
# Import memoize cache (lru with size = 1)
from functools import lru_cache

@lru_cache(maxsize=1)
def get_comments_df():
    with open(DATA_FILENAME, 'rb') as f:
        all_examples, metadata = pickle.load(f)
        all_examples = [e for es in all_examples.values() for e in es]
        return pd.DataFrame(all_examples)

print(f'Loading data from {DATA_FILENAME} for language {LANGUAGE}...')
comments_df = get_comments_df()

print('Total comments: {}'.format(len(comments_df)))
# Print first 5 rows
comments_df.head()

Loading data from data/repos-cleaned-TypeScript-2023-06-05-1620.pkl for language TypeScript...
Total comments: 3991


Unnamed: 0,old_entire_comment_raw,new_entire_comment_raw,old_code_raw,new_code_raw,old_comment_raw,new_comment_raw,old_code_start_line,old_code_start_char,new_code_start_line,new_code_start_char,...,new_comment_start_char,qualified_name,type,label,repo_url,path,commit,commit_msg,id,kind
0,/**\n * This method creates an observable that...,/**\n * This method creates an observable that...,waitForCollectionToLoad() {\n return this.sto...,waitForCollectionToLoad() {\n return this.sto...,This method creates an observable that waits f...,This method creates an observable that waits f...,33,2,34,2,...,2,waitForCollectionToLoad,summary,1,https://github.com/ngrx/example-app.git,src/guards/book-exists.ts,5c706a33a9f44f4cde06c7b624c03353afa39938,"Add more documentation, fix a few bugs",ngrx/example-app.git#0,jsdoc
1,/**\n * This method parses all the fields in t...,/**\n * This method parses all the fields in t...,prepareMessage(): string {\n let messag...,prepareMessage(): string {\n let messag...,This method parses all the fields in the objec...,This method parses all the fields in the objec...,152,1,153,1,...,1,prepareMessage,summary,1,https://github.com/spruceid/siwe.git,packages/siwe/lib/client.ts,eb64c77e7d0f77f9b36eecb0f7f078797337b729,Re-introduce validate() and check for invalid ...,spruceid/siwe.git#1,jsdoc
2,/**\n * Verify (asynchronously) an already dec...,/**\n * Verify (asynchronously) an already dec...,protected verifyDecomposedJwt(\n decomposedJw...,protected verifyDecomposedJwt(\n decomposedJw...,Verify (asynchronously) an already decomposed ...,Verify (asynchronously) an already decomposed ...,590,2,600,2,...,2,verifyDecomposedJwt,summary,1,https://github.com/awslabs/aws-jwt-verify.git,src/jwt-rsa.ts,80636886e4d9019dadc7ac7a94c174e06a31f913,Include RS512 support for JWT signing algorith...,awslabs/aws-jwt-verify.git#0,jsdoc
3,/**\n * Verify (synchronously) an already deco...,/**\n * Verify (synchronously) an already deco...,protected verifyDecomposedJwtSync(\n decompos...,protected verifyDecomposedJwtSync(\n decompos...,Verify (synchronously) an already decomposed J...,Verify (synchronously) an already decomposed J...,551,2,561,2,...,2,verifyDecomposedJwtSync,summary,1,https://github.com/awslabs/aws-jwt-verify.git,src/jwt-rsa.ts,80636886e4d9019dadc7ac7a94c174e06a31f913,Include RS512 support for JWT signing algorith...,awslabs/aws-jwt-verify.git#1,jsdoc
4,/**\n * Verify (synchronously) a JWT that is s...,/**\n * Verify (synchronously) a JWT that is s...,"public verifySync(\n ...[jwt, properties]: Ve...","public verifySync(\n ...[jwt, properties]: Ve...",Verify (synchronously) a JWT that is signed us...,Verify (synchronously) a JWT that is signed us...,531,2,541,2,...,2,verifySync,summary,1,https://github.com/awslabs/aws-jwt-verify.git,src/jwt-rsa.ts,80636886e4d9019dadc7ac7a94c174e06a31f913,Include RS512 support for JWT signing algorith...,awslabs/aws-jwt-verify.git#2,jsdoc


In [3]:
comments_df['commit_url'] = comments_df.apply(lambda row: f'{row.repo_url[:-4]}/commit/{row.commit[:12]}', axis=1)

In [4]:
comment_pattern = 'javadoc|jsdoc|docstr|godoc|tsdoc|documenta|comment'
comment_update_verbs = ['fix', 'update', 'address', 'revise', 'modify', 'amend', 'outdate', 'change']
comment_update_verbs = ['fix']

# Filter dataset to only entries where commit_msg contains comment_pattern and one of the comment_update_verbs
comments_df = comments_df[comments_df['commit_msg'].str.contains(comment_pattern, case=False)]
comments_df = comments_df[comments_df['commit_msg'].str.contains('|'.join(comment_update_verbs), case=False)]
# Filter out qualified_name == constructor
comments_df = comments_df[comments_df['qualified_name'] != 'constructor']

# Print size
print('Total comments after filtering: {}'.format(len(comments_df)))

Total comments after filtering: 295


In [5]:
# Set column max to 200
pd.set_option('display.max_colwidth', 200)
# Order by length of commit_msg and print first 10, take only 1 from each repo_url
# take a random sample of size TOP_N
top_df = comments_df.sort_values(by='commit_msg', key=lambda col: col.str.len()).groupby('repo_url').head(1).sample(TOP_N)
# top_df = comments_df.sort_values(by='commit_msg', key=lambda col: col.str.len()).groupby('repo_url').head(1).head(TOP_N)
top_df

Unnamed: 0,old_entire_comment_raw,new_entire_comment_raw,old_code_raw,new_code_raw,old_comment_raw,new_comment_raw,old_code_start_line,old_code_start_char,new_code_start_line,new_code_start_char,...,qualified_name,type,label,repo_url,path,commit,commit_msg,id,kind,commit_url
3288,/**\n * Returns the service endpoints that exist in provider.\n * @param {any} endpoints\n * @return {Promise<ServiceEndpoint[]>}\n */,/**\n * This function returns an array of service endpoints for a given provider endpoint.\n * @param {string} providerEndpoint - The provider endpoint\n * @param {any} endpoints - The endpoints o...,"public async getServiceEndpoints(providerEndpoint: string, endpoints: any) {\n const serviceEndpoints: ServiceEndpoint[] = []\n for (const i in endpoints.serviceEndpoints) {\n const endpoint:...","public async getServiceEndpoints(providerEndpoint: string, endpoints: any) {\n const serviceEndpoints: ServiceEndpoint[] = []\n for (const i in endpoints.serviceEndpoints) {\n const endpoint:...",Returns the service endpoints that exist in provider.,This function returns an array of service endpoints for a given provider endpoint.,49,2,57,2,...,getServiceEndpoints,summary,1,https://github.com/oceanprotocol/ocean.js.git,src/services/Provider.ts,9bf71ba4f078e121d333d74164ff8b6229f9d6e3,Features/ethers (#1696)\n\n* add ethers\r\n\r\n* global updates\r\n\r\n* ve updates\r\n\r\n* ve tests\r\n\r\n* contract updates\r\n\r\n* first test passing\r\n\r\n* downgrade to ethers 5.7.2\r\n\r...,oceanprotocol/ocean.js.git#67,jsdoc,https://github.com/oceanprotocol/ocean.js/commit/9bf71ba4f078
3767,/**\n * Override this in a derived class to provide logic for when a team is deleted.\n * @param handler\n * @returns A promise that represents the work queued.\n */,"/**\n * Registers a handler for TeamsTeamDeleted events, such as for when a team is deleted.\n * @param handler\n * @returns A promise that represents the work queued.\n */","public onTeamsTeamDeletedEvent(\n handler: (teamInfo: TeamInfo, context: TurnContext, next: () => Promise<void>) => Promise<void>\n): this {\n return this.on('TeamsTeamDeleted', async (conte...","public onTeamsTeamDeletedEvent(\n handler: (teamInfo: TeamInfo, context: TurnContext, next: () => Promise<void>) => Promise<void>\n): this {\n return this.on('TeamsTeamDeleted', async (conte...",Override this in a derived class to provide logic for when a team is deleted.,"Registers a handler for TeamsTeamDeleted events, such as for when a team is deleted.",775,4,802,4,...,onTeamsTeamDeletedEvent,summary,1,https://github.com/microsoft/botbuilder-js.git,libraries/botbuilder/src/teamsActivityHandler.ts,6e1009b256f3b9b17ade20dc1ea6551b4855ba6c,adjusted override/callback docstrings (#2902)\n\n* adjusted override/callback docstrings\r\n\r\n* typo fix\r\n\r\nCo-authored-by: Michael Richardson <v-micric@microsoft.com>,microsoft/botbuilder-js.git#315,jsdoc,https://github.com/microsoft/botbuilder-js/commit/6e1009b256f3
1083,"/**\n * Optionally configure various options, such as response parsing, that the SDK allows.\n *\n * @param config Object of supported SDK options and toggles.\n * @param config.parseResponse If r...",/**\n * Optionally configure various options that the SDK allows.\n *\n * @param config Object of supported SDK options and toggles.\n */,config(config: ConfigOptions) {\n this.core.setConfig(config);\n},config(config: ConfigOptions) {\n this.core.setConfig(config);\n},"Optionally configure various options, such as response parsing, that the SDK allows.",Optionally configure various options that the SDK allows.,20,2,19,2,...,config,summary,1,https://github.com/readmeio/api.git,packages/api/test/__fixtures__/sdk/operationid-quirks/index.ts,18ddbfb223b5e514c14caf89d284b988a4c84e6d,feat: completely overhauling how we're handling response data (#539)\n\n* feat: allowing `accept` headers to always be sent through as metadata\r\n\r\n* chore: removing some unnecessary comments\r...,readmeio/api.git#0,jsdoc,https://github.com/readmeio/api/commit/18ddbfb223b5
140,"/**\n * Randomly samples from the distribution represented by the bayesian network,\n * making sure the sample is consistent with the provided restrictions on value possibilities.\n * Returns fals...","/**\n * Randomly samples values from the distribution represented by the bayesian network,\n * making sure the sample is consistent with the provided restrictions on value possibilities.\n * Retur...","generateConsistentSampleWhenPossible(valuePossibilities: Record<string, string[]>) {\n return this.recursivelyGenerateConsistentSampleWhenPossible({}, valuePossibilities, 0);\n}","generateConsistentSampleWhenPossible(valuePossibilities: Record<string, string[]>) {\n return this.recursivelyGenerateConsistentSampleWhenPossible({}, valuePossibilities, 0);\n}","Randomly samples from the distribution represented by the bayesian network, making sure the sample is consistent with the provided restrictions on value possibilities.","Randomly samples values from the distribution represented by the bayesian network, making sure the sample is consistent with the provided restrictions on value possibilities.",46,4,45,4,...,generateConsistentSampleWhenPossible,summary,1,https://github.com/apify/fingerprint-suite.git,packages/generative-bayesian-network/src/bayesian-network.ts,9d32a69b3ecab44c4a9200e0aadf7802efcf0edf,"JSDoc update, TS fixups",apify/fingerprint-suite.git#0,jsdoc,https://github.com/apify/fingerprint-suite/commit/9d32a69b3eca
3185,/**\n * Get image tiles urls for a single tile or\n * a tile level.\n *\n * @param {ImageTilesRequestContract} tiles - Tiles to request\n * @returns {Promise<ImageTilesContract>} Promise to the\n ...,/**\n * Get image tiles urls for a tile level.\n *\n * @param {ImageTilesRequestContract} tiles - Tiles to request\n * @returns {Promise<ImageTilesContract>} Promise to the\n * image tiles respons...,"public getImageTiles(\n tiles: ImageTilesRequestContract): Promise<ImageTilesContract> {\n return Promise.reject(new MapillaryError(""Not implemented""));\n}","public getImageTiles(\n tiles: ImageTilesRequestContract): Promise<ImageTilesContract> {\n return Promise.reject(new MapillaryError(""Not implemented""));\n}",Get image tiles urls for a single tile or a tile level.,Get image tiles urls for a tile level.,196,4,192,4,...,getImageTiles,summary,1,https://github.com/mapillary/mapillary-js.git,src/api/DataProviderBase.ts,99f65410ee953265f6087ac929c339481a42860f,docs: fix links and event tags\n\nTag event types with event jsdoc tag.\nRemove event tag from event interfaces\nto enable full info in the docs.,mapillary/mapillary-js.git#90,jsdoc,https://github.com/mapillary/mapillary-js/commit/99f65410ee95
3339,/**\n * Returns a translated string from a key and a locale\n * @param {string} key\n * @return {string}\n */,/**\n * Returns a translated string from a key in the current {@link Language.locale}\n * @param key - Translation key\n * @returns Translated string\n */,"get(key: string): string {\n const translation = getTranslation(this.translations, key);\n if (translation !== null) {\n return translation;\n }\n\n return key;\n}","get(key: string): string {\n const translation = getTranslation(this.translations, key);\n if (translation !== null) {\n return translation;\n }\n\n return key;\n}",Returns a translated string from a key and a locale,Returns a translated string from a key in the current Language.locale,28,4,28,4,...,get,summary,1,https://github.com/Adyen/adyen-web.git,src/language/Language.ts,d5828f2d98c61f3efd6f3d6349986ec29353466a,Add eslint-plugin-tsdoc (#50)\n\n* Add TSDoc linting\r\n\r\n* Fix linting issues on TSDoc comments,Adyen/adyen-web.git#2,jsdoc,https://github.com/Adyen/adyen-web/commit/d5828f2d98c6
2648,/**\n * Returns url for a `new` action in given Resource. Uses {@link resourceActionUrl}\n *\n * @param {string} resourceId id to the resource\n * @param {string} [search] optional query s...,/**\n * Returns url for a `list` action in given Resource. Uses {@link resourceActionUrl}\n *\n * @param {string} resourceId id to the resource\n * @param {string} [search] optional query ...,"listUrl(resourceId: string, search?: string): string {\n return this.resourceActionUrl({ resourceId, actionName: 'list', search })\n}","listUrl(resourceId: string, search?: string): string {\n return this.resourceActionUrl({ resourceId, actionName: 'list', search })\n}",Returns url for a `new` action in given Resource.,Returns url for a `list` action in given Resource.,193,2,193,2,...,listUrl,summary,1,https://github.com/SoftwareBrothers/adminjs.git,src/backend/utils/view-helpers/view-helpers.ts,44f3999ef7f6913aa217664cbeedb70b8d3b3807,Collective resolve of GitHub issues (#989)\n\n* fix: fixed filters autocomplete from url on initial page enter (#988)\r\n\r\n* fix: update doc comment for listUrl method\r\n\r\n* fix: look over fo...,SoftwareBrothers/adminjs.git#14,jsdoc,https://github.com/SoftwareBrothers/adminjs/commit/44f3999ef7f6
1930,// NOTE: positions of achievements are 1-indexed.,/**\n * Changes the position of the achievement\n *\n * Note: positions of achievements are 1-indexed\n *\n * @param achievement the AchievementItem\n * @param newPosition the new position\n */,"public changeAchievementPosition(achievement: AchievementItem, newPosition: number) {\n const achievements = this.getAchievements()\n .filter(achievement => achievement.isTask)\n .sort((tas...","public changeAchievementPosition(achievement: AchievementItem, newPosition: number) {\n const achievements = this.getAchievements()\n .filter(achievement => achievement.isTask)\n .sort((tas...",NOTE: positions of achievements are 1-indexed.,Changes the position of the achievement,297,2,375,2,...,changeAchievementPosition,summary,1,https://github.com/source-academy/frontend.git,src/commons/achievement/utils/AchievementInferencer.ts,4320231ee4170ae326b27f14fbe0d962931591d8,"Achievement: Refactor (Part 1) (#1447)\n\n* Edit mocks\r\n\r\n* Update view style\r\n\r\n* Code formatting, documentation\r\n\r\n* Update control handlers\r\n\r\n* Restructure achievement commons\...",source-academy/frontend.git#2,md,https://github.com/source-academy/frontend/commit/4320231ee417
0,"/**\n * This method creates an observable that waits for the `loaded` property\n * of the collection to turn `true`, emitting one time once loading\n * has finished.\n */","/**\n * This method creates an observable that waits for the `loaded` property\n * of the collection state to turn `true`, emitting one time once loading\n * has finished.\n */",waitForCollectionToLoad() {\n return this.store.let(getCollectionLoaded())\n .filter(loaded => loaded)\n .take(1);\n},waitForCollectionToLoad() {\n return this.store.let(getCollectionLoaded())\n .filter(loaded => loaded)\n .take(1);\n},"This method creates an observable that waits for the `loaded` property of the collection to turn `true`, emitting one time once loading has finished.","This method creates an observable that waits for the `loaded` property of the collection state to turn `true`, emitting one time once loading has finished.",33,2,34,2,...,waitForCollectionToLoad,summary,1,https://github.com/ngrx/example-app.git,src/guards/book-exists.ts,5c706a33a9f44f4cde06c7b624c03353afa39938,"Add more documentation, fix a few bugs",ngrx/example-app.git#0,jsdoc,https://github.com/ngrx/example-app/commit/5c706a33a9f4
853,"// For positive fragments (Has/HasValue) we need to find any passing entity up the proxy chain\n// so as soon as passes is true, we can early return. For negative fragments (Not/NotValue) every en...",/**\n * Helper function to check whether the result of a query pass check is a breaking state.\n *\n * @remarks\n * For positive fragments (Has/HasValue) we need to find any passing entity up the ...,"function isBreakingPassState(passes: boolean, fragment: EntityQueryFragment<Schema>) {\n return (passes && isPositiveFragment(fragment)) || (!passes && isNegativeFragment(fragment));\n}","function isBreakingPassState(passes: boolean, fragment: EntityQueryFragment<Schema>) {\n return (passes && isPositiveFragment(fragment)) || (!passes && isNegativeFragment(fragment));\n}","For positive fragments (Has/HasValue) we need to find any passing entity up the proxy chain so as soon as passes is true, we can early return.",Helper function to check whether the result of a query pass check is a breaking state.,103,0,239,0,...,isBreakingPassState,summary,1,https://github.com/latticexyz/mud.git,packages/recs/src/Query.ts,43575821dd13447649659d488e897c7a0f4745f5,docs: add documentation (#129)\n\n* build: change docs action to use alvrs/realdocs as base branch\r\n\r\n* docs(recs): add documentation\r\n\r\n* docs(recs): add documentation\r\n\r\n* docs(recs)...,latticexyz/mud.git#0,fd,https://github.com/latticexyz/mud/commit/43575821dd13


In [6]:
# For each in top_df, print:
# ###### i ######
# URL: commit_url
# Review: commit_msg
# Old version: old entire comment raw + old code raw
# New version: new entire comment raw + new code raw
for i in range(len(top_df)):
    row = top_df.iloc[i]
    print(f'###### {i + 1} ######')
    print(f'URL: {row.commit_url}')
    print(f'Review: {row.commit_msg}')
    print(f'Old version:\n{row.old_entire_comment_raw}\n{row.old_code_raw}')
    print(f'New version:\n{row.new_entire_comment_raw}\n{row.new_code_raw}')
    print()

###### 1 ######
URL: https://github.com/oceanprotocol/ocean.js/commit/9bf71ba4f078
Review: Features/ethers (#1696)

* add ethers

* global updates

* ve updates

* ve tests

* contract updates

* first test passing

* downgrade to ethers 5.7.2

* add log

* add tx wait

* full NftFactory.test

* add wait

* add Router tests

* fix dispenser

* Nft test

* add dispenser tests

* add fre tests part 1

* WIP datatoken unit tests

* add DFRewards test

* increase gas estimate

* increase gas estimate

* Work datatoken unit tests

* datatoken test more tests

* finished datatoken tests

* fix nft get data

* fix nft transfer tests

* Provider int tests

* Updating CodeExamples.md

* update provider & fix publish flow int test

* wip publish edit consume integration test

* more work on  publish edit consume integration test

* fix edit  publish edit consume integration test

* add 3 int tests

* Updating ComputeExamples.md

* fix signature and download

* fix compute flow integration test

