Political news scraping & NLP parsing from web pages.
To use this module, install from npm, install locally using the command below, or clone this repository and import the .js files directly from source.
npm i poli-parse
Imports can be done through the aggregating index.js file or via individual members.
const PoliParse = require('./index.js'); // from source
const PoliParse = require('poli-parse') // from npm
// es6
import * as PoliParse from "../src"; // from source
import * as PoliParse from from "poli-parse"; // from npm
// from source
const Parse = require("./Parse");
const { Parse } = require("poli-parse");
import { Parse } from "poli-parse"; // es6
The collection below is just a sample of methods and may be out of date. For the most recent examples, please see the samples folder in the root directory of this repository.
Basic scrape/parse example.
const PP = require("../dist/index"); // import library
// hit the homepage of wsj.com
PP.Scrape.AllText("https://www.wsj.com/").then(data => {
// filter out text that isn't longer than two words (likely not a sentence)
const filtered = PP.Parse.FilterLength(data, 2);
// split monolithic text from the html into more useful chunks
const split = PP.Parse.SplitMonolithic(filtered);
// find all texts which mention a specific subject
const subject = PP.Parse.FilterSubject(split, [
"Elizabeth",
"Warren"
]);
// see what headlines were found regarding the subject
console.log(subject);
});
More advanced scrape/parse example, with analysis logic added.
const PP = require("../dist/index"); // import library
PP.Scrape.AllText("https://www.wsj.com/").then(data => {
let newData = PP.Parse.FilterLength(data, 2);
newData = PP.Parse.SplitMonolithic(newData);
newData = PP.Parse.FilterSubject(newData, ["Donald", "Trump"]);
const results = newData.map(async headline => {
const sentiment = await PP.Sentiment.Compute(headline);
const pos = await PP.Language.ComputePOS(headline);
return {
headline,
sentiment,
pos
};
});
Promise.all(results).then(completed => {
console.log(completed);
});
});
The following commands are available during development.
npm test # run tests with Jest
npm run coverage # run tests with coverage and open it on browser
npm run lint # lint code
npm run docs # generate docs
npm run build # transpile code