Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert to using AMO API instead of scraping website #2

Merged
merged 2 commits into from Apr 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
111 changes: 66 additions & 45 deletions README.md
Expand Up @@ -44,51 +44,72 @@ Assuming the following [addons.json](./addons.json) input file:
```sh
$ npx pdehaan/curated-addons

{
"category": "Privacy/Security - Tier 1",
"addons": [
{
"slug": "ghostery",
"title": "Ghostery – Privacy Ad Blocker",
"author": "Ghostery",
"version": "8.3.1",
"summary": "Ghostery is a powerful privacy extension. Block ads, stop trackers and speed up websites.",
"isFeatured": true,
"homepage": "http://www.ghostery.com/",
"metadata": {
[
{
"category": "Privacy/Security - Tier 1",
"addons": [
{
"name": "Ghostery – Privacy Ad Blocker",
"version": "8.3.1",
"guid": "firefox@ghostery.com",
"author": "Ghostery",
"summary": "Ghostery is a powerful privacy extension. \n\nBlock ads, stop trackers and speed up websites.",
"type": "extension",
"size": 7528085,
"createdDate": "2008-11-15T11:19:34.000Z",
"lastUpdated": "2019-01-31T21:25:09.000Z",
"license": "Mozilla Public License, version 2.0",
"isFeatured": true,
"isSourcePublic": true,
"ratings": 4.4436,
"reviewCount": 2888,
"users": 1146072,
"reviews": 2888,
"rating": 4.4
"weeklyDownloads": 71438,
"homepage": "http://www.ghostery.com/",
"url": "https://addons.mozilla.org/firefox/addon/ghostery/",
"slug": "ghostery"
},
{
"name": "Firefox Multi-Account Containers",
"version": "6.1.0",
// ...
},
{
"name": "AdBlocker Ultimate",
"version": "2.37",
// ...
}
},
{
"slug": "multi-account-containers",
"title": "Firefox Multi-Account Containers",
"author": "Mozilla",
"version": "6.1.0",
"summary": "Firefox Multi-Account Containers lets you keep parts of your online life separated into color-coded tabs that preserve your privacy. Cookies are separated by container, allowing you to use the web with multiple identities or accounts simultaneously.",
"isFeatured": true,
"homepage": "https://github.com/mozilla/multi-account-containers/#readme",
"metadata": {
"users": 186461,
"reviews": 2638,
"rating": 4.5
}
},
{
"slug": "adblocker-ultimate",
"title": "AdBlocker Ultimate",
"author": "AdBlocker Ultimate",
"version": "2.37",
"summary": "Free and improved ad blocker. Completely remove ALL ads. No \"acceptable\" ads or whitelisted advertisers, block tracking, block malware!",
"isFeatured": true,
"homepage": "https://adblockultimate.net",
"metadata": {
"users": 415361,
"reviews": 5309,
"rating": 4.7
}
}
]
}
]
},
{
"category": "Privacy/Security - Tier 2",
"addons": [
{
"name": "AdBlocker for YouTube™",
"version": "0.2.9",
"guid": "jid1-q4sG8pYhq8KGHs@jetpack",
"author": "AdblockLite",
"summary": "Removes all annoying Ads from YouTube",
"type": "extension",
"size": 38990,
"createdDate": "2015-03-29T02:01:10.000Z",
"lastUpdated": "2018-08-31T15:52:30.000Z",
"license": "Mozilla Public License, version 2.0",
"isFeatured": true,
"isSourcePublic": true,
"ratings": 4.0864,
"reviewCount": 220,
"users": 228258,
"weeklyDownloads": 7661,
"homepage": "http://mybrowseraddon.com/clean-youtube.html",
"url": "https://addons.mozilla.org/firefox/addon/adblock-for-youtube/",
"slug": "adblock-for-youtube"
},
{
"name": "Forget Me Not - Forget cookies & other data",
"version": "2.2.1",
// ...
]
}
]
```
4 changes: 2 additions & 2 deletions addons.json
@@ -1,14 +1,14 @@
[
{
"category": "Privacy/Security - Tier 1",
"category": "Privacy/Security - A",
"addons": [
"ghostery",
"multi-account-containers",
"adblocker-ultimate"
]
},
{
"category": "Privacy/Security - Tier 2",
"category": "Privacy/Security - B",
"addons": [
"adblock-for-youtube",
"forget_me_not",
Expand Down
3 changes: 1 addition & 2 deletions cli.js
Expand Up @@ -11,6 +11,5 @@ main(addons)

async function main(addons) {
const res = await lib.fetchAddons(addons);
console.log(JSON.stringify(res, null, 2));
process.exit(0);
console.log(JSON.stringify(res, null, 2));
}
73 changes: 33 additions & 40 deletions lib.js
@@ -1,56 +1,49 @@
const puppeteer = require("puppeteer");
const axios = require("axios");

const amoClient = axios.create({
baseURL: "https://addons.mozilla.org/api/v4/"
});

module.exports = {
fetchAddons
};

async function fetchAddons(addons) {
const browser = await puppeteer.launch();
const page = await browser.newPage();

const data = [];
for (const item of addons) {
const res = [];
const categoryAddons = [];
for (const name of item.addons) {
res.push(await getAddon(page, name));
categoryAddons.push(getAddon(name));
}

return {
data.push({
category: item.category,
addons: res
};
addons: await Promise.all(categoryAddons)
});
}

await browser.close();
return data;
}

async function getAddon(page, slug) {
await page.goto(`https://addons.mozilla.org/firefox/addon/${slug}/`);
const title = await page.$eval(".AddonTitle", el => el.textContent);
const author = await page.$eval(".AddonTitle-author", el => el.textContent);
const summary = await page.$eval(".Addon-summary", el => el.textContent);
const isFeatured = await page.$(".Badge-featured");
const [users, reviews, rating] = await page.$$eval(".MetadataCard-list", nodes => nodes.map(n => n.innerText));
const version = await page.$eval(".AddonMoreInfo-version", el => el.textContent);

const homepage = await page.$eval(".AddonMoreInfo-homepage-link", el => decodeURIComponent(el.getAttribute("href").replace(/^.*(https?)%3A/, "$1:")));

async function getAddon(slug) {
const {data: addon} = await amoClient.get(`/addons/addon/${slug}`);
return {
slug,
title: title.replace(author, "").trim(),
author: author.replace(/^by /, "").trim(),
version,
summary,
isFeatured: !!isFeatured,
homepage,
metadata: {
users: numberParser(users),
reviews: numberParser(reviews),
rating: parseFloat(rating.split("\n").pop())
}
name: addon.name["en-US"],
version: addon.current_version.version,
guid: addon.guid,
author: addon.authors[0].name,
summary: addon.summary["en-US"],
type: addon.type,
size: addon.current_version.files[0].size,
createdDate: new Date(addon.created),
lastUpdated: new Date(addon.last_updated),
license: addon.current_version.license.name["en-US"],
isFeatured: addon.is_featured,
isSourcePublic: addon.is_source_public,
ratings: addon.ratings.average,
reviewCount: addon.ratings.count,
users: addon.average_daily_users,
weeklyDownloads: addon.weekly_downloads,
homepage: addon.homepage["en-US"],
url: addon.url.replace("/en-US/", "/"),
slug
};
}

function numberParser(value) {
const _value = value.replace("\n", " ").replace(/,/g, "");
return parseInt(_value, 10);
}