Skip to content

Commit

Permalink
Merge pull request #10 from wcj617/main
Browse files Browse the repository at this point in the history
Store a list of current repos which use the JSON Schema topic and when they were created #4
  • Loading branch information
Relequestual committed Jun 21, 2024
2 parents ccb62cd + f02866c commit 200735c
Show file tree
Hide file tree
Showing 14 changed files with 5,362 additions and 795 deletions.
5 changes: 3 additions & 2 deletions projects/initial-data/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -141,5 +141,6 @@ dist
# SvelteKit build / generate output
.svelte-kit

# End of https://www.toptal.com/developers/gitignore/api/node

# intellij idea
.idea/
# End of https://www.toptal.com/developers/gitignore/api/node
12 changes: 12 additions & 0 deletions projects/initial-data/babel.config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"presets": [
[
"@babel/preset-env",
{
"targets": {
"node": "current"
}
}
]
]
}
2 changes: 1 addition & 1 deletion projects/initial-data/dataRecorder.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export class DataRecorder {
if (!fs.existsSync(this.fileName)) {
fs.writeFileSync(
this.fileName,
'repo,repo_topics,creation,archive_url_creation,topic_present_creation,release,archive_url_release,topic_present_release\n',
'repo,repo_topics,date_first_commit,creation,date_first_release\n',
'utf8',
);
}
Expand Down
404 changes: 404 additions & 0 deletions projects/initial-data/initialTopicRepoData-1711533629611.csv

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions projects/initial-data/jest.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
export default {
setupFilesAfterEnv: ['./jest.setup.js'],
testEnvironment: 'jest-environment-node',
transform: {
'^.+\\.(js|jsx)$': 'babel-jest',
},
};
8 changes: 8 additions & 0 deletions projects/initial-data/jest.setup.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/* eslint-disable no-undef */
import { server } from './mocks/server';

beforeAll(() => {
server.listen();
});
afterEach(() => server.resetHandlers());
afterAll(() => server.close());
191 changes: 100 additions & 91 deletions projects/initial-data/main.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,53 @@
import { Octokit } from 'octokit';
import cheerio from 'cheerio';
import { getInput } from './setup.js';

import { DataRecorder } from './dataRecorder.js';

const WAYBACK_API_URL = 'http://archive.org/wayback/available';
const CSV_FILE_NAME = `initialTopicRepoData-${Date.now()}.csv`;

async function fetchRepoCreationDate(octokit, owner, repo) {
export async function fetchRepoCreationDate(octokit, owner, repo) {
console.log(`Fetching creation date for repository: ${owner}/${repo}`);
const response = await octokit.request('GET /repos/{owner}/{repo}', {
owner,
repo,
});
return response.data.created_at;
return Date.parse(response.data.created_at);
}

async function fetchFirstCommitDate(octokit, owner, repo) {
console.log(`Fetching first commit date for repository: ${owner}/${repo}`);
try {
const response = await octokit.request('GET /repos/{owner}/{repo}/commits', {
owner,
repo,
per_page: 1,
});

const lastPageUrl = response.headers.link?.match(
/<([^>]+)>;\s*rel="last"/,
)?.[1];

if (!lastPageUrl) {
if (response.data.length > 0) {
response.data[0].commit.author.date;
}
else {
throw new Error(`No commits found for ${owner}/${repo}`); //TODO: check if this is the correct error message
}
}

const lastPageResponse = await octokit.request(lastPageUrl);

if (lastPageResponse.data.length > 0) {
return Date.parse(lastPageResponse.data[0].commit.author.date);
} else {
console.error('Error occured');
throw new Error(`No commits found ${owner}/${repo}`); //TODO: check if this is the correct error message
}
}
catch(err) {
throw new Error(`Could not find any commits for ${owner}/${repo}`); //TODO: check if this is the correct error message
}
}

async function fetchRepoTopics(octokit, owner, repo) {
Expand All @@ -27,103 +61,77 @@ async function fetchRepoTopics(octokit, owner, repo) {

async function fetchFirstReleaseDate(octokit, owner, repo) {
console.log(`Fetching first release date for repository: ${owner}/${repo}`);
const response = await octokit.request('GET /repos/{owner}/{repo}/releases', {
owner,
repo,
per_page: 1,
});
const lastPageUrl = response.headers.link?.match(
/<([^>]+)>;\s*rel="last"/,
)?.[1];
try {
const response = await octokit.request('GET /repos/{owner}/{repo}/releases', {
owner,
repo,
per_page: 1,
});
const lastPageUrl = response.headers.link?.match(
/<([^>]+)>;\s*rel="last"/,
)?.[1];

if (!lastPageUrl) {
if (response.data.length > 0) {
response.data[0].created_at;
}
else {
throw new Error(`No releases found for ${owner}/${repo}`); //TODO: check if this is the correct error message
}
}

if (!lastPageUrl) {
return response.data.length > 0 ? response.data[0].created_at : null;
const lastPageResponse = await octokit.request(lastPageUrl);
if (lastPageResponse.data.length > 0) {
return Date.parse(lastPageResponse.data[0].created_at);
}
else {
throw new Error(`No releases found for ${owner}/${repo}`); //TODO: check if this is the correct error message
}
}
catch(err) {
console.error('Error occured');
throw new Error(`Unable to get releases for ${owner}/${repo}`); //TODO: check if this is the correct error message
}

const lastPageResponse = await octokit.request(lastPageUrl);
return lastPageResponse.data.length > 0
? lastPageResponse.data[0].created_at
: null;
}

async function fetchWaybackSnapshot(url, timestamp) {
console.log(
`Fetching Wayback Machine snapshot for URL: ${url} at timestamp: ${timestamp}`,
);
console.log(`${WAYBACK_API_URL}?url=${url}&timestamp=${timestamp}`);
const response = await fetch(
`${WAYBACK_API_URL}?url=${url}&timestamp=${timestamp}`,
);
const data = await response.json();
return data.archived_snapshots;
}

async function checkTopicInPage(url, topic) {
console.log(`Checking if topic "${topic}" exists in page: ${url}`);
const response = await fetch(url);
const html = await response.text();
const $ = cheerio.load(html);
return $(`a.topic-tag-link:contains('${topic}')`).length > 0;
}

async function processRepository(octokit, owner, repo, topic) {
export async function processRepository(octokit, owner, repo) {
console.log(`Processing repository: ${owner}/${repo}`);
const githubRepoURL = `https://github.com/${owner}/${repo}`;

const creationDate = await fetchRepoCreationDate(octokit, owner, repo);
const firstReleaseDate = await fetchFirstReleaseDate(octokit, owner, repo);
let firstReleaseDate;
try {
firstReleaseDate = await fetchFirstReleaseDate(octokit, owner, repo);
}
catch(err) {
throw new Error(`Unable to get releases for ${owner}/${repo}`);
}
const repoTopics = await fetchRepoTopics(octokit, owner, repo);

let firstCommitDate;
try {
firstCommitDate = await fetchFirstCommitDate(octokit, owner, repo);
}
catch(err) {
throw new Error(`Error trying to find first commit for ${owner}/${repo}`);
}
console.log({ firstReleaseDate });

if (firstReleaseDate === null) {
console.log(`First release date: of ${githubRepoURL} unknown`);
}

const dateTypes = [
['creation', creationDate],
...(firstReleaseDate !== null ? [['release', firstReleaseDate]] : []),
];
console.log({ dateTypes });

const dataSets = dateTypes.map(async ([dateType, isoDate]) => {
if (isoDate) {
console.log(`Processing ${dateType} date: ${isoDate}`);
const date = new Date(isoDate);
const datestamp = date.getTime();
const archivedSnapshots = await fetchWaybackSnapshot(
githubRepoURL,
datestamp,
);
if (Object.keys(archivedSnapshots).length === 0) {
console.log(`Unable to find archive for ${githubRepoURL}`);
} else {
const archiveUrl = archivedSnapshots.closest.url;
if (archiveUrl) {
const topicExists = await checkTopicInPage(archiveUrl, topic);
return {
[`datestamp_${dateType}`]: datestamp,
[`archiveUrl_${dateType}`]: archiveUrl,
[`topicExists_${dateType}`]: topicExists,
};
} else {
console.error(
`Couldn't get closest archive URL given response from ${githubRepoURL}`,
);
}
}
}
});

const combinedData = await Promise.all(dataSets);

const singleRowData = combinedData.reduce(
(acc, cur) => {
if (cur) {
return { ...acc, ...cur };
}
return acc;
},
{ repository: `${owner}/${repo}`, repoTopics: `"${repoTopics.join(',')}"` },
);

if (firstCommitDate === null) {
console.log(`First commit date: of ${githubRepoURL} unknown`);
}

const singleRowData = {
repository: `${owner}/${repo}`,
repoTopics: `"${repoTopics.join(', ')}"`,
date_first_commit: firstCommitDate,
creation: creationDate,
date_first_release: firstReleaseDate, // firstReleaseDate is null if no releases. allowed it because it is appropriate
};

return singleRowData;
}
Expand All @@ -149,17 +157,18 @@ async function main(token, topic, numRepos) {
repo.owner.login,
repo.name,
);

dataRecorder.appendToCSV(Object.values(dataRow));
processedRepos++;
console.log(`processed ${processedRepos}`);
} catch (err) {
console.error(err);
}
}

if (numRepos !== -1 && processedRepos >= numRepos) break;
}
}

};


export function runMain() {
const { token, topic, numRepos } = getInput();
Expand Down
13 changes: 13 additions & 0 deletions projects/initial-data/mocks/handlers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { http, HttpResponse } from 'msw';

export const releaseDateHandler = http.get('https://api.github.com/*', () => {
return HttpResponse.json({
repository: 'octocat/hello-world',
names: 'python, json, json-schema, docker, postgresql',
date_first_commit: '2019-02-09T15:42:36Z',
created_at: '2019-02-23T15:08:34Z',
date_first_release: '2019-03-11T09:49:01Z',
});
});

export const handlers = [releaseDateHandler];
4 changes: 4 additions & 0 deletions projects/initial-data/mocks/server.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import { setupServer } from 'msw/node';
import { handlers } from './handlers';

export const server = setupServer(...handlers);
10 changes: 8 additions & 2 deletions projects/initial-data/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
"main": "index.js",
"engines": {},
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"start": "node start.js",
"test": "jest --watchAll --detectOpenHandles",
"eslint": "eslint . --ext js",
"eslint:fix": "pnpm run eslint -- --fix"
"eslint:fix": "pnpm run eslint --fix"
},
"keywords": [],
"author": "Ben Hutton",
Expand All @@ -21,8 +21,14 @@
"octokit": "^3.1.2"
},
"devDependencies": {
"@babel/preset-env": "^7.24.3",
"@testing-library/jest-dom": "^6.4.2",
"babel-jest": "^29.7.0",
"eslint": "^8.54.0",
"eslint-config-prettier": "^9.1.0",
"jest": "^29.7.0",
"memfs": "^4.8.0",
"msw": "^2.2.10",
"prettier": "^3.1.0",
"prettier-eslint": "^16.1.2"
}
Expand Down
Loading

0 comments on commit 200735c

Please sign in to comment.