Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/actions/spelling/allow.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
gpgarmor
github
https
leetcode
pgn
scm
shas
ssh
ubuntu
yargsparser
7 changes: 3 additions & 4 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@
"twemoji-parser": "^14.0.0",
"vue": "^2.7.1",
"vue-prism-component": "^1.2.0",
"xml-formatter": "^2.6.1"
"xml-formatter": "^2.6.1",
"yargs-parser": "^21.1.1"
},
"devDependencies": {
"eslint": "^8.25.0",
Expand Down
29 changes: 18 additions & 11 deletions source/app/metrics/utils.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -225,17 +225,19 @@ export async function language({filename, patch}) {
}

/**Run command (use this to execute commands and process whole output at once, may not be suitable for large outputs) */
export async function run(command, options, {prefixed = true, log = true} = {}) {
export async function run(command, options, {prefixed = true, log = true, debug = true} = {}) {
const prefix = {win32: "wsl"}[process.platform] ?? ""
command = `${prefixed ? prefix : ""} ${command}`.trim()
return new Promise((solve, reject) => {
console.debug(`metrics/command/run > ${command}`)
if (debug)
console.debug(`metrics/command/run > ${command}`)
const child = processes.exec(command, options)
let [stdout, stderr] = ["", ""]
child.stdout.on("data", data => stdout += data)
child.stderr.on("data", data => stderr += data)
child.on("close", code => {
console.debug(`metrics/command/run > ${command} > exited with code ${code}`)
if (debug)
console.debug(`metrics/command/run > ${command} > exited with code ${code}`)
if (log) {
console.debug(stdout)
console.debug(stderr)
Expand All @@ -246,7 +248,7 @@ export async function run(command, options, {prefixed = true, log = true} = {})
}

/**Spawn command (use this to execute commands and process output on the fly) */
export async function spawn(command, args = [], options = {}, {prefixed = true, timeout = 300 * 1000, stdout} = {}) { //eslint-disable-line max-params
export async function spawn(command, args = [], options = {}, {prefixed = true, timeout = 300 * 1000, stdout, debug = true} = {}) { //eslint-disable-line max-params
const prefix = {win32: "wsl"}[process.platform] ?? ""
if ((prefixed) && (prefix)) {
args.unshift(command)
Expand All @@ -255,15 +257,18 @@ export async function spawn(command, args = [], options = {}, {prefixed = true,
if (!stdout)
throw new Error("`stdout` argument was not provided, use run() instead of spawn() if processing output is not needed")
return new Promise((solve, reject) => {
console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")}`)
if (debug)
console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")}`)
const child = processes.spawn(command, args, {...options, shell: true, timeout})
const reader = readline.createInterface({input: child.stdout})
reader.on("line", stdout)
const closed = new Promise(close => reader.on("close", close))
child.on("close", async code => {
console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")} > exited with code ${code}`)
if (debug)
console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")} > exited with code ${code}`)
await closed
console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")} > reader closed`)
if (debug)
console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")} > reader closed`)
return code === 0 ? solve() : reject()
})
})
Expand Down Expand Up @@ -372,7 +377,7 @@ export const filters = {
return result
},
/**Repository filter*/
repo(repository, patterns) {
repo(repository, patterns, {debug = true} = {}) {
//Disable filtering when no pattern is provided
if (!patterns.length)
return true
Expand All @@ -390,11 +395,12 @@ export const filters = {

//Basic pattern matching
const include = (!patterns.includes(repo)) && (!patterns.includes(`${user}/${repo}`))
console.debug(`metrics/filters/repo > filter ${repo} (${include ? "included" : "excluded"})`)
if (debug)
console.debug(`metrics/filters/repo > filter ${repo} (${include ? "included" : "excluded"})`)
return include
},
/**Text filter*/
text(text, patterns) {
text(text, patterns, {debug = true} = {}) {
//Disable filtering when no pattern is provided
if (!patterns.length)
return true
Expand All @@ -404,7 +410,8 @@ export const filters = {

//Basic pattern matching
const include = !patterns.includes(text)
console.debug(`metrics/filters/text > filter ${text} (${include ? "included" : "excluded"})`)
if (debug)
console.debug(`metrics/filters/text > filter ${text} (${include ? "included" : "excluded"})`)
return include
},
}
Expand Down
45 changes: 42 additions & 3 deletions source/plugins/languages/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,7 @@ It will be automatically hidden if empty.</p>

## 🔎 `indepth` mode

The default algorithm use the top languages provided of each repository you contributed to.
When working in collaborative projects with a lot of people, these numbers may be less representative of your actual work.
The default algorithm uses the top languages from each repository you contributed to using GitHub GraphQL API (which is similar to the displayed languages bar on github.com). When working in collaborative projects with a lot of people, these numbers may be less representative of your actual work.

The `plugin_languages_indepth` option lets you use a more advanced algorithm for more accurate statistics.
Under the hood, it will clone your repositories, run [linguist-js](https://github.com/Nixinova/Linguist) (a JavaScript port of [GitHub linguist](https://github.com/github/linguist)) and iterate over patches matching your `commits_authoring` setting.
Expand All @@ -257,12 +256,52 @@ Since git lets you use any email and username for commits, *metrics* may not be

> ⚠️ This feature significantly increase workflow time

> ⚠️ Since this mode iterates over **each commit of each repository**, it is not suited for large code base, especially those with a large amount of commits and the ones containing binaries. While `plugin_languages_analysis_timeout` can be used to increase the default timeout for analysis, please be responsible and keep this feature disabled if it cannot work on your account to save GitHub resources and our planet 🌏
> ⚠️ Since this mode iterates over **each matching commit of each repository**, it is not suited for large code base, especially those with a large amount of commits and the ones containing binaries. While `plugin_languages_analysis_timeout` and `plugin_languages_analysis_timeout_repositories` can be used to increase the default timeout for analysis, please be responsible and keep this feature disabled if it cannot work on your account to save GitHub resources and our planet 🌏

> ⚠️ Although *metrics* does not send any code to external sources, repositories are temporarily cloned on the GitHub Action runner. It is advised to keep this option disabled when working with sensitive data or company code. Use at your own risk, *metrics* and its authors **cannot** be held responsible for any resulting code leaks. Source code is available for auditing at [analyzers.mjs](/source/plugins/languages/analyzers.mjs).

> 🌐 Web instances must enable this feature in `settings.json`

Below is a summary of the process used to compute indepth statistics:

## Most used mode

1. Fetch GPG keys linked to your GitHub account
- automatically add attached emails to `commits_authoring`
- *web-flow* (GitHub's public key for changes made through web-ui) is also fetched
2. Import GPG keys so they can be used to verify commits later
3. Iterate through repositories
- early break if `plugin_languages_analysis_timeout` is reached
- skip repository if it matches `plugin_languages_skipped`
- include repositories from `plugin_languages_indepth_custom`
- a specific branch and commit range can be used
- a source other than github.com can be used
4. Clone repository
- target branch is checkout
5. List of authored commits is computed
- using `git log --author` and `commits_authoring` to search in commit headers
- using `git log --grep` and `commits_authoring` to search in commit body
- ensure these are within the range specified by `plugin_languages_indepth_custom` (if applicable)
6. Process authored commits
- early break if `plugin_languages_analysis_timeout_repositories` is reached
- using `git verify-commit` to check authenticity against imported GPG keys
- using `git log --patch` to extract added/deleted lines/bytes from each file
- using [GitHub linguist](https://github.com/github/linguist) ([linguist-js](https://github.com/Nixinova/LinguistJS)) to detect language for each file
- respect `plugin_languages_categories` option
- if a file has since been deleted or moved, checkout on the last commit file was present and run linguist again
7. Aggregate results

## Recently used mode

1. Fetch push events linked to your account (or target repository)
- matching `plugin_languages_recent_load` and `plugin_languages_recent_days` options
- matching committer emails from `commits_authoring`
2. Process authored commits
- using [GitHub linguist](https://github.com/github/linguist) ([linguist-js](https://github.com/Nixinova/LinguistJS)) to detect language for each file
- respect `plugin_languages_recent_categories` option
- directly pass file content rather than performing I/O and simulating a git repository
3. Aggregate results

## 📅 Recently used languages

This feature uses a similar algorithm as `indepth` mode, but uses patches from your events feed instead.
Expand Down
182 changes: 182 additions & 0 deletions source/plugins/languages/analyzer/analyzer.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
//Imports
import fs from "fs/promises"
import os from "os"
import paths from "path"
import git from "simple-git"
import {filters} from "../../../app/metrics/utils.mjs"

/**Analyzer */
export class Analyzer {

/**Constructor */
constructor(login, {account = "bypass", authoring = [], uid = Math.random(), shell, rest = null, context = {mode:"user"}, skipped = [], categories = ["programming", "markup"], timeout = {global:NaN, repositories:NaN}}) {

Check failure

Code scanning / CodeQL

Insecure randomness

This security context depends on a cryptographically insecure random number at [Math.random()](1).
//User informations
this.login = login
this.account = account
this.authoring = authoring
this.uid = uid
this.gpg = []

//Utilities
this.shell = shell
this.rest = rest
this.context = context
this.markers = {
hash:/\b[0-9a-f]{40}\b/,
file:/^[+]{3}\sb[/](?<file>[\s\S]+)$/,
line:/^(?<op>[-+])\s*(?<content>[\s\S]+)$/,
}
this.parser = /^(?<login>[\s\S]+?)\/(?<name>[\s\S]+?)(?:@(?<branch>[\s\S]+?)(?::(?<ref>[\s\S]+))?)?$/
this.consumed = false

//Options
this.skipped = skipped
this.categories = categories
this.timeout = timeout

//Results
this.results = {partial: {global:false, repositories:false}, total: 0, lines: {}, stats: {}, colors: {}, commits: 0, files: 0, missed: {lines: 0, bytes: 0, commits: 0}, elapsed:0}
this.debug(`instantiated a new ${this.constructor.name}`)
}

/**Run analyzer */
async run(runner) {
if (this.consumed)
throw new Error("This analyzer has already been consumed, another instance needs to be created to perform a new analysis")
this.consumed = true
const results = await new Promise(async solve => {
let completed = false
if (Number.isFinite(this.timeout.global)) {
this.debug(`timeout set to ${this.timeout.global}m`)
setTimeout(() => {
if (!completed) {
try {
this.debug(`reached maximum execution time of ${this.timeout.global}m for analysis`)
this.results.partial.global = true
solve(this.results)
}
catch {
//Ignore errors
}
}
}, this.timeout.global * 60 * 1000)
}
await runner()
completed = true
solve(this.results)
})
results.partial = (results.partial.global)||(results.partial.repositories)
return results
}

/**Parse repository */
parse(repository) {
let branch = null, ref = null
if (typeof repository === "string") {
if (!this.parser.test(repository))
throw new TypeError(`"${repository}" pattern is not supported`)
const {login, name, ...groups} = repository.match(this.parser)?.groups ?? {}
repository = {owner:{login}, name}
branch = groups.branch ?? null
ref = groups.ref ?? null
}
const repo = `${repository.owner.login}/${repository.name}`
const path = paths.join(os.tmpdir(), `${this.uid}-${repo.replace(/[^\w]/g, "_")}`)
return {repo, path, branch, ref}
}

/**Clone a repository */
async clone(repository) {
const {repo, branch, path} = this.parse(repository)
let url = /^https?:\/\//.test(repo) ? repo : `https://github.com/${repo}`
try {
this.debug(`cloning ${url} to ${path}`)
await fs.rm(path, {recursive: true, force: true})
await fs.mkdir(path, {recursive: true})
await git(path).clone(url, ".", ["--single-branch"]).status()
this.debug(`cloned ${url} to ${path}`)
if (branch) {
this.debug(`switching to branch ${branch} for ${repo}`)
await git(path).branch(branch)
}
return true
}
catch (error) {
this.debug(`failed to clone ${url} (${error})`)
this.clean(path)
return false
}
}

/**Analyze a repository */
async analyze(path, {commits = []} = {}) {
const cache = {files:{}, languages:{}}
const start = Date.now()
let elapsed = 0, processed = 0
if (this.timeout.repositories)
this.debug(`timeout for repository analysis set to ${this.timeout.repositories}m`)
for (const commit of commits) {
elapsed = (Date.now() - start)/1000/60
if ((this.timeout.repositories)&&(elapsed > this.timeout.repositories)) {
this.results.partial.repositories = true
this.debug(`reached maximum execution time of ${this.timeout.repositories}m for repository analysis (${elapsed}m elapsed)`)
break
}
try {
const {total, files, missed, lines, stats} = await this.linguist(path, {commit, cache})
this.results.commits++
this.results.total += total
this.results.files += files
this.results.missed.lines += missed.lines
this.results.missed.bytes += missed.bytes
for (const language in lines) {
if (this.categories.includes(cache.languages[language]?.type))
this.results.lines[language] = (this.results.lines[language] ?? 0) + lines[language]
}
for (const language in stats) {
if (this.categories.includes(cache.languages[language]?.type))
this.results.stats[language] = (this.results.stats[language] ?? 0) + stats[language]
}
}
catch (error) {
this.debug(`skipping commit ${commit.sha} (${error})`)
this.results.missed.commits++
}
finally {
this.results.elapsed += elapsed
processed++
if ((processed%50 === 0)||(processed === commits.length))
this.debug(`at commit ${processed}/${commits.length} (${(100*processed/commits.length).toFixed(2)}%, ${elapsed.toFixed(2)}m elapsed)`)
}
}
this.results.colors = Object.fromEntries(Object.entries(cache.languages).map(([lang, {color}]) => [lang, color]))
}

/**Clean a path */
async clean(path) {
try {
this.debug(`cleaning ${path}`)
await fs.rm(path, {recursive: true, force: true})
this.debug(`cleaned ${path}`)
return true
}
catch (error) {
this.debug(`failed to clean (${error})`)
return false
}
}

/**Whether to skip a repository or not */
ignore(repository) {
const ignored = !filters.repo(repository, this.skipped)
if (ignored)
this.debug(`skipping ${typeof repository === "string" ? repository : `${repository?.owner?.login}/${repository?.name}`} as it matches skipped repositories`)
return ignored
}

/**Debug log */
debug(message) {
return console.debug(`metrics/compute/${this.login}/plugins > languages > ${this.constructor.name.replace(/([a-z])([A-Z])/, (_, a, b) => `${a} ${b.toLocaleLowerCase()}`).toLocaleLowerCase()} > ${message}`)
}

}
Loading