Skip to content

Commit

Permalink
Merge pull request #9 from jshemas/rebuild
Browse files Browse the repository at this point in the history
rebuild library in typescript and no longer makes HTTP requests
  • Loading branch information
jshemas committed Oct 22, 2023
2 parents ae69de6 + 112932e commit 85d7581
Show file tree
Hide file tree
Showing 71 changed files with 8,797 additions and 15,039 deletions.
2 changes: 1 addition & 1 deletion .eslintignore
@@ -1,3 +1,3 @@
coverage/
dist/
node_modules/
example.js
15 changes: 5 additions & 10 deletions .eslintrc.json
@@ -1,22 +1,17 @@
{
"settings": {
"react": {
"version": "latest"
}
},
"extends": [
"airbnb",
"airbnb-base",
"airbnb-typescript/base",
"plugin:promise/recommended"
],
"plugins": [
"mocha",
"promise"
],
"env": {
"commonjs": true,
"node": true
"parserOptions": {
"project": "./tsconfig.json"
},
"rules": {
"import/no-named-as-default": 0,
"max-len": ["error", {
"code": 120,
"ignoreStrings": true,
Expand Down
30 changes: 30 additions & 0 deletions .github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,30 @@
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: ''
assignees: ''

---

**Describe the bug**
A clear and concise description of what the bug is.

**To Reproduce**
Steps to reproduce the behavior:

**Expected behavior**
A clear and concise description of what you expected to happen.

**Actual behavior**
A clear and concise description of what is happening.

**Screenshots**
If applicable, add screenshots to help explain your problem.

**Additional context**
Add any other context about the problem here.

- OS: [e.g. iOS/Windows/Linux]
- Node Version: [e.g. 18]
- openGraphScraperLite Version:
20 changes: 20 additions & 0 deletions .github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,20 @@
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: ''
assignees: ''

---

**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]

**Describe the solution you'd like**
A clear and concise description of what you want to happen.

**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.

**Additional context**
Add any other context or screenshots about the feature request here.
16 changes: 10 additions & 6 deletions .github/workflows/node.js.yml
Expand Up @@ -13,19 +13,23 @@ jobs:
buildAndTest:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
node-version: [10.x, 12.x, 14.x]
node-version:
- 16
- 18
- 20
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v1
uses: actions/setup-node@v3
with:
node-version: ${{ matrix.node-version }}
- name: Install Dependencies
run: npm ci
run: npm install
- name: Linting
run: npm run eslint
- name: Running Typescript
run: npm run build
- name: Unit Testing
run: npm run mocha:unit
- name: Integration Testing
run: npm run mocha:int
7 changes: 7 additions & 0 deletions CHANGELOG.md
@@ -1,10 +1,17 @@
# Change Log

## 2.0.0

- This library no longer does HTTP requests, it will now just pull Open Graph and Twitter Card info from HTML pass into it.

## 1.1.1

- Fixing issues with ky import

## 1.1.0

- Setting up the openGraphScraperLite repo

## 1.0.0

- First version of openGraphScraperLite!
95 changes: 37 additions & 58 deletions README.md
@@ -1,103 +1,82 @@
# This module was meant to be a proof of concept. You will probably run into a bunch of cors issues using this since it's making the requests from the browser. Using a reverse proxy libraries like https://github.com/Rob--W/cors-anywhere might help solve the problem, but you are probably better off making the requests in a backend service then pass in the html into openGraphScraper.

# openGraphScraperLite

[![Node.js CI](https://github.com/jshemas/openGraphScraperLite/workflows/Node.js%20CI/badge.svg?branch=master)](https://github.com/jshemas/openGraphScraperLite/actions?query=branch%3Amaster)
[![Known Vulnerabilities](https://snyk.io/test/github/jshemas/openGraphScraperLite/badge.svg)](https://snyk.io/test/github/jshemas/openGraphScraperLite)

A simple javascript module for scraping Open Graph and Twitter Card info off a site. For Node.js usage, we recommend `open-graph-scraper` by the same people.
A simple javascript module for scraping Open Graph and Twitter Card info from given HTML. For Node.js usage, we recommend `open-graph-scraper` by the same people and can do HTTP requests.

## Installation

```bash
npm install open-graph-scraper-lite
npm install open-graph-scraper-lite --save
```

## Usage

Callback Example:
```javascript
const ogs = require('open-graph-scraper-lite');
const options = { url: 'http://ogp.me/' };
ogs(options, (error, results, response) => {
console.log('error:', error); // This is returns true or false. True if there was a error. The error it self is inside the results object.
console.log('results:', results); // This contains all of the Open Graph results
console.log('response:', response); // This contains the HTML of page
});
```

Promise Example:
```javascript
const ogs = require('open-graph-scraper-lite');
const options = { url: 'http://ogp.me/' };
const ogs = require('open-graph-scraper');
const options = {
html: `<html><head>
<link rel="icon" type="image/png" href="https://bar.com/foo.png" />
<meta charset="utf-8" />
<meta property="og:description" name="og:description" content="html description example" />
<meta property="og:image" name="og:image" content="https://www.foo.com/bar.jpg" />
<meta property="og:title" name="og:title" content="foobar" />
<meta property="og:type" name="og:type" content="website" />
</head></html>`
};
ogs(options)
.then((data) => {
const { error, result, response } = data;
console.log('error:', error); // This is returns true or false. True if there was a error. The error it self is inside the results object.
console.log('result:', result); // This contains all of the Open Graph results
console.log('response:', response); // This contains the HTML of page
const { result } = data;
console.log('result:', result);
})
```

## Results JSON

Check the return for a ```success``` flag. If success is set to true, then the url input was valid. Otherwise it will be set to false. The above example will return something like...
```javascript
{
ogTitle: 'Open Graph protocol',
result: {
ogDescription: 'html description example',
ogTitle: 'foobar',
ogType: 'website',
ogUrl: 'http://ogp.me/',
ogDescription: 'The Open Graph protocol enables any web page to become a rich object in a social graph.',
ogImage: {
url: 'http://ogp.me/logo.png',
width: '300',
height: '300',
type: 'image/png'
},
requestUrl: 'http://ogp.me/',
ogImage: [ { url: 'https://www.foo.com/bar.jpg', type: 'jpg' } ],
favicon: 'https://bar.com/foo.png',
charset: 'utf-8',
success: true
}
```

## Options

| Name | Info | Default Value | Required |
|----------------------|----------------------------------------------------------------------------|---------------|----------|
| url | URL of the site. | | x |
| timeout | Timeout of the request | 2000 ms | |
| html | You can pass in an HTML string to run ogs on it. (use without options.url) | | |
| blacklist | Pass in an array of sites you don't want ogs to run on. | [] | |
| html | You can pass in an HTML string to run ogs on it. (use without options.url) | x | |
| onlyGetOpenGraphInfo | Only fetch open graph info and don't fall back on anything else. | false | |
| ogImageFallback | Fetch other images if no open graph ones are found. | true | |
| customMetaTags | Here you can define custom meta tags you want to scrape. | [] | |
| allMedia | By default, OGS will only send back the first image/video it finds | false | |
| retry | Number of times ogs will retry the request. | 2 | |
| headers | An object containing request headers. Useful for setting the user-agent | {} | |
| peekSize | Sets the peekSize for the request | 1024 | |
| urlValidatorSettings | Sets the options used by validator.js for testing the URL | [Here](https://github.com/jshemas/openGraphScraper/blob/master/lib/openGraphScraper.js#L21-L36) | |

Note: `open-graph-scraper-lite` uses [ky](https://github.com/sindresorhus/ky) for requests and most of [ky's options](https://github.com/sindresorhus/ky#api) should work as `open-graph-scraper-lite` options.
## Custom Meta Tag Example

Custom Meta Tag Example:
```javascript
const ogs = require('open-graph-scraper-lite');
const ogs = require('open-graph-scraper');
const options = {
url: 'https://github.com/jshemas/openGraphScraper',
html: `<html><head>
<link rel="icon" type="image/png" href="https://bar.com/foo.png" />
<meta charset="utf-8" />
<meta property="og:description" name="og:description" content="html description example" />
<meta property="og:image" name="og:image" content="https://www.foo.com/bar.jpg" />
<meta property="og:title" name="og:title" content="foobar" />
<meta property="og:type" name="og:type" content="website" />
<meta name="hostname" content="github.com">
</head></html>`,
customMetaTags: [{
multiple: false, // is there more then one of these tags on a page (normally this is false)
multiple: false, // is there more than one of these tags on a page (normally this is false)
property: 'hostname', // meta tag name/property attribute
fieldName: 'hostnameMetaTag', // name of the result variable
}],
};
ogs(options)
.then((data) => {
const { error, result, response } = data;
console.log('hostnameMetaTag:', result.hostnameMetaTag); // hostnameMetaTag: github.com
const { result } = data;
console.log('hostnameMetaTag:', result.customMetaTags.hostnameMetaTag); // hostnameMetaTag: github.com
})
```

## Tests

Then you can run the tests by running...
```bash
npm run test
```
22 changes: 22 additions & 0 deletions dist/index.d.ts
@@ -0,0 +1,22 @@
import type { OpenGraphScraperOptions, OgObject } from './lib/types';
/**
* `open-graph-scraper` uses [fetch](https://developer.mozilla.org/en-US/docs/Web/API/fetch) for http requests
* for scraping Open Graph and Twitter Card info off a website.
*
* @param {object} options - The options used by Open Graph Scraper
* @param {boolean} [options.onlyGetOpenGraphInfo] - Only fetch open graph info and don't fall back on anything else.
* @param {object} [options.customMetaTags] - Here you can define custom meta tags you want to scrape.
* @param {string} options.html - You can pass in an HTML string to run ogs on it. (use without options.url)
* @returns {Promise} Promise Object with the Open Graph results
*/
export default function run(options: OpenGraphScraperOptions): Promise<ErrorResult | SuccessResult>;
export type SuccessResult = {
error: false;
html: string;
result: OgObject;
};
export type ErrorResult = {
error: true;
html: undefined;
result: OgObject;
};
45 changes: 45 additions & 0 deletions dist/index.js
@@ -0,0 +1,45 @@
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
/* eslint-disable max-len, import/no-import-module-exports */
const openGraphScraper_1 = __importDefault(require("./lib/openGraphScraper"));
/**
* `open-graph-scraper` uses [fetch](https://developer.mozilla.org/en-US/docs/Web/API/fetch) for http requests
* for scraping Open Graph and Twitter Card info off a website.
*
* @param {object} options - The options used by Open Graph Scraper
* @param {boolean} [options.onlyGetOpenGraphInfo] - Only fetch open graph info and don't fall back on anything else.
* @param {object} [options.customMetaTags] - Here you can define custom meta tags you want to scrape.
* @param {string} options.html - You can pass in an HTML string to run ogs on it. (use without options.url)
* @returns {Promise} Promise Object with the Open Graph results
*/
async function run(options) {
let results;
try {
results = await (0, openGraphScraper_1.default)(options);
}
catch (error) {
const exception = error;
const returnError = {
error: true,
result: {
success: false,
error: exception.message,
errorDetails: exception,
},
html: undefined,
};
// eslint-disable-next-line @typescript-eslint/no-throw-literal
throw returnError;
}
const returnSuccess = {
error: false,
result: results.ogObject,
html: results.html,
};
return returnSuccess;
}
exports.default = run;
module.exports = run;
10 changes: 10 additions & 0 deletions dist/lib/extract.d.ts
@@ -0,0 +1,10 @@
import type { OgObjectInteral, OpenGraphScraperOptions } from './types';
/**
* extract all of the meta tags needed for ogs
*
* @param {sting} body - the body of the fetch request
* @param {object} options - options for ogs
* @return {object} object with ogs results
*
*/
export default function extractMetaTags(body: string, options: OpenGraphScraperOptions): OgObjectInteral;

0 comments on commit 85d7581

Please sign in to comment.