Skip to content

Commit

Permalink
Merge pull request #2 from pdehaan/amo-api
Browse files Browse the repository at this point in the history
Convert to using AMO API instead of scraping website
  • Loading branch information
pdehaan committed Apr 11, 2019
2 parents 84a5991 + 7e33335 commit 5e0fa43
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 384 deletions.
111 changes: 66 additions & 45 deletions README.md
Expand Up @@ -44,51 +44,72 @@ Assuming the following [addons.json](./addons.json) input file:
```sh
$ npx pdehaan/curated-addons

{
"category": "Privacy/Security - Tier 1",
"addons": [
{
"slug": "ghostery",
"title": "Ghostery – Privacy Ad Blocker",
"author": "Ghostery",
"version": "8.3.1",
"summary": "Ghostery is a powerful privacy extension. Block ads, stop trackers and speed up websites.",
"isFeatured": true,
"homepage": "http://www.ghostery.com/",
"metadata": {
[
{
"category": "Privacy/Security - Tier 1",
"addons": [
{
"name": "Ghostery – Privacy Ad Blocker",
"version": "8.3.1",
"guid": "firefox@ghostery.com",
"author": "Ghostery",
"summary": "Ghostery is a powerful privacy extension. \n\nBlock ads, stop trackers and speed up websites.",
"type": "extension",
"size": 7528085,
"createdDate": "2008-11-15T11:19:34.000Z",
"lastUpdated": "2019-01-31T21:25:09.000Z",
"license": "Mozilla Public License, version 2.0",
"isFeatured": true,
"isSourcePublic": true,
"ratings": 4.4436,
"reviewCount": 2888,
"users": 1146072,
"reviews": 2888,
"rating": 4.4
"weeklyDownloads": 71438,
"homepage": "http://www.ghostery.com/",
"url": "https://addons.mozilla.org/firefox/addon/ghostery/",
"slug": "ghostery"
},
{
"name": "Firefox Multi-Account Containers",
"version": "6.1.0",
// ...
},
{
"name": "AdBlocker Ultimate",
"version": "2.37",
// ...
}
},
{
"slug": "multi-account-containers",
"title": "Firefox Multi-Account Containers",
"author": "Mozilla",
"version": "6.1.0",
"summary": "Firefox Multi-Account Containers lets you keep parts of your online life separated into color-coded tabs that preserve your privacy. Cookies are separated by container, allowing you to use the web with multiple identities or accounts simultaneously.",
"isFeatured": true,
"homepage": "https://github.com/mozilla/multi-account-containers/#readme",
"metadata": {
"users": 186461,
"reviews": 2638,
"rating": 4.5
}
},
{
"slug": "adblocker-ultimate",
"title": "AdBlocker Ultimate",
"author": "AdBlocker Ultimate",
"version": "2.37",
"summary": "Free and improved ad blocker. Completely remove ALL ads. No \"acceptable\" ads or whitelisted advertisers, block tracking, block malware!",
"isFeatured": true,
"homepage": "https://adblockultimate.net",
"metadata": {
"users": 415361,
"reviews": 5309,
"rating": 4.7
}
}
]
}
]
},
{
"category": "Privacy/Security - Tier 2",
"addons": [
{
"name": "AdBlocker for YouTube™",
"version": "0.2.9",
"guid": "jid1-q4sG8pYhq8KGHs@jetpack",
"author": "AdblockLite",
"summary": "Removes all annoying Ads from YouTube",
"type": "extension",
"size": 38990,
"createdDate": "2015-03-29T02:01:10.000Z",
"lastUpdated": "2018-08-31T15:52:30.000Z",
"license": "Mozilla Public License, version 2.0",
"isFeatured": true,
"isSourcePublic": true,
"ratings": 4.0864,
"reviewCount": 220,
"users": 228258,
"weeklyDownloads": 7661,
"homepage": "http://mybrowseraddon.com/clean-youtube.html",
"url": "https://addons.mozilla.org/firefox/addon/adblock-for-youtube/",
"slug": "adblock-for-youtube"
},
{
"name": "Forget Me Not - Forget cookies & other data",
"version": "2.2.1",
// ...
]
}
]
```
4 changes: 2 additions & 2 deletions addons.json
@@ -1,14 +1,14 @@
[
{
"category": "Privacy/Security - Tier 1",
"category": "Privacy/Security - A",
"addons": [
"ghostery",
"multi-account-containers",
"adblocker-ultimate"
]
},
{
"category": "Privacy/Security - Tier 2",
"category": "Privacy/Security - B",
"addons": [
"adblock-for-youtube",
"forget_me_not",
Expand Down
3 changes: 1 addition & 2 deletions cli.js
Expand Up @@ -11,6 +11,5 @@ main(addons)

async function main(addons) {
const res = await lib.fetchAddons(addons);
console.log(JSON.stringify(res, null, 2));
process.exit(0);
console.log(JSON.stringify(res, null, 2));
}
73 changes: 33 additions & 40 deletions lib.js
@@ -1,56 +1,49 @@
const puppeteer = require("puppeteer");
const axios = require("axios");

const amoClient = axios.create({
baseURL: "https://addons.mozilla.org/api/v4/"
});

module.exports = {
fetchAddons
};

async function fetchAddons(addons) {
const browser = await puppeteer.launch();
const page = await browser.newPage();

const data = [];
for (const item of addons) {
const res = [];
const categoryAddons = [];
for (const name of item.addons) {
res.push(await getAddon(page, name));
categoryAddons.push(getAddon(name));
}

return {
data.push({
category: item.category,
addons: res
};
addons: await Promise.all(categoryAddons)
});
}

await browser.close();
return data;
}

async function getAddon(page, slug) {
await page.goto(`https://addons.mozilla.org/firefox/addon/${slug}/`);
const title = await page.$eval(".AddonTitle", el => el.textContent);
const author = await page.$eval(".AddonTitle-author", el => el.textContent);
const summary = await page.$eval(".Addon-summary", el => el.textContent);
const isFeatured = await page.$(".Badge-featured");
const [users, reviews, rating] = await page.$$eval(".MetadataCard-list", nodes => nodes.map(n => n.innerText));
const version = await page.$eval(".AddonMoreInfo-version", el => el.textContent);

const homepage = await page.$eval(".AddonMoreInfo-homepage-link", el => decodeURIComponent(el.getAttribute("href").replace(/^.*(https?)%3A/, "$1:")));

async function getAddon(slug) {
const {data: addon} = await amoClient.get(`/addons/addon/${slug}`);
return {
slug,
title: title.replace(author, "").trim(),
author: author.replace(/^by /, "").trim(),
version,
summary,
isFeatured: !!isFeatured,
homepage,
metadata: {
users: numberParser(users),
reviews: numberParser(reviews),
rating: parseFloat(rating.split("\n").pop())
}
name: addon.name["en-US"],
version: addon.current_version.version,
guid: addon.guid,
author: addon.authors[0].name,
summary: addon.summary["en-US"],
type: addon.type,
size: addon.current_version.files[0].size,
createdDate: new Date(addon.created),
lastUpdated: new Date(addon.last_updated),
license: addon.current_version.license.name["en-US"],
isFeatured: addon.is_featured,
isSourcePublic: addon.is_source_public,
ratings: addon.ratings.average,
reviewCount: addon.ratings.count,
users: addon.average_daily_users,
weeklyDownloads: addon.weekly_downloads,
homepage: addon.homepage["en-US"],
url: addon.url.replace("/en-US/", "/"),
slug
};
}

function numberParser(value) {
const _value = value.replace("\n", " ").replace(/,/g, "");
return parseInt(_value, 10);
}

0 comments on commit 5e0fa43

Please sign in to comment.