Skip to content

Commit

Permalink
init 2.0 update to promise api and add global ocr
Browse files Browse the repository at this point in the history
  • Loading branch information
netpi committed Jul 20, 2016
1 parent ae58fa6 commit d558875
Show file tree
Hide file tree
Showing 9 changed files with 249 additions and 118 deletions.
74 changes: 43 additions & 31 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,51 +1,63 @@
## [Baidu-OCR-API](https://bce.baidu.com/doc/OCR/ProductDescription.html#.E4.BB.8B.E7.BB.8D) for nodejs


[![NPM version][npm-image]][npm-url] [![Build Status][travis-image]][travis-url] [![Coveralls Status][coveralls-image]][coveralls-url]

[![Downloads][downloads-image]][npm-url]

### Install
```
npm install baidu-ocr-api --save
### Install & Usage

#### 1. Global

```sh
npm install baidu-ocr-api -g


ocr --help

# 远程图片
ocr http://7pun4e.com1.z0.glb.clouddn.com/test.jpg

# 本地图片
ocr ./test.jpg

```
### Usage
##### 效果图

> 登陆 [百度bcs控制台中心](https://console.bce.baidu.com/iam/#/iam/accesslist) 申请access key
>
参看 [examples](https://github.com/netpi/baidu-ocr-api/tree/master/examples)
![](http://7pun4e.com1.z0.glb.clouddn.com/test.jpg)

```js
var ak = 'your ak';
var sk = 'your sk';
var ocr = require('baidu-ocr-api').create(ak,sk);
var opt= {
// url can be a cdn url, or a local url like : __dirname+'/test.jpg'
url:'http://7xod3k.com1.z0.glb.clouddn.com/fbuguhlemsgeilpkxykeluenbjkozzne',
// type: line,text,character default:line
type:'line',
language:'CHN_ENG'
}
ocr.scan(opt,function (err,result) {
if(err){
return console.log(err);
}
console.log(result); // 参看 examples

})

的早期世界观是建立在《魔兽争霸3:冰封王座》的基础上的,因此与现在暴雪公司的《魔兽世界》的背景设定有一定的联系,但由于版本更迭又略有不同。整个地图中地形名费伍德森林,费伍德森林是网络游戏《魔兽世界》中的游戏地图,位于卡利姆多境内的一片森林。这片由森林和草场构成的繁荣动荡的土地曾经由卡尔多雷掌管,并曾经处于半神塞纳留斯的保护下。燃烧军团的铁蹄践踏了这片土地,没有被毁灭的树木和生物则被恶魔的暴行永远的诅咒着


#### 2. nodejs
```sh
npm install baidu-ocr-api --save

```
### opt
| 字段名 || 选项 | 描述 |
| -------- | ---------------------------------------- | ----------- | ------ |
| url | `cdn` 地址 <br/> 本地地址: __diranme+'/test.jpg' | 必选 | 目标地址 |
| type | `text`:识别某张图中的所有文字<br>`line`: 将结果作为单行文字去解析<br>`character`:识别某张图中的单个文字 | 可选(默认:line) | 返回结果结构 |
| language | , <br/>可选 : `CHN_ENG`/`CHN`/`ENG` | 可选<br/> 默认:`CHN_ENG` | 返回语言类型 |
FYI [examples](https://github.com/netpi/baidu-ocr-api/tree/master/examples)

```js
/**
登陆 百度bcs控制台中心 申请access key
https://console.bce.baidu.com/iam/#/iam/accesslist
**/
var ak = 'your ak';
var sk = 'your sk';
var ocr = require('../').create(ak,sk);
// 外部图片
ocr.scan({
url:'http://7pun4e.com1.z0.glb.clouddn.com/test.jpg', // 支持本地路径
type:'text',
}).then(function (result) {
return console.log(result)
}).catch(function (err) {
console.log('err', err);
})

```

### test
```sh
Expand Down
34 changes: 34 additions & 0 deletions bin/ocr
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env node

var ak = process.env.BAIDU_AK||'b7d11214c8fc452db3de12028cf46daa';
var sk = process.env.BAIDU_SK||'64631fe987f4423bb0a117101bf90a45';
var ocr = require('../lib/ocr').create(ak,sk);
var argv = require("minimist")(process.argv.slice(2));
var colors = require('colors');
String.prototype.startWith = function(compareStr){
return this.indexOf(compareStr) == 0;
}
var opt = {};
opt.url = argv._[0]||argv.u;
opt.type = argv.type||argv.t||'text';
opt.language = argv.language||'CHE_ENG';
opt.merge = argv.m||argv.merge;

if(!opt.url||argv.h||argv.help){
console.log('\n')
console.log('Example usage:\n');
console.log(" ocr", "[url] [options] ");
console.log(" ocr","http://7pun4e.com1.z0.glb.clouddn.com/test.jpg", "-m -l CHE_ENG ".yellow);
console.log('\n')
console.log("Options:\n");

console.log(" -t, --type".yellow," text line character ");
console.log(" -l, --language".yellow," CHE_ENG CHE ENG");
console.log(" -m, --marge".yellow," merge result");
process.exit(0);
}
ocr.scan(opt).then(function (result) {
console.log(result.results.words);
}).catch(function (err) {
console.log('err', err);
})
19 changes: 19 additions & 0 deletions examples/cdnurl.ocr.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/**
登陆 百度bcs控制台中心 申请access key
https://console.bce.baidu.com/iam/#/iam/accesslist
**/

var ak = 'b7d11214c8fc452db3de12028cf46daa';
var sk = '64631fe987f4423bb0a117101bf90a45'
var ocr = require('../').create(ak,sk);
// 外部图片
ocr.scan({
url:'http://7pun4e.com1.z0.glb.clouddn.com/test.jpg',
type:'text',
}).then(function (result) {
return console.log(result)
}).catch(function (err) {
console.log('err', err);
})
11 changes: 1 addition & 10 deletions examples/ocr.js → examples/localurl.ocr.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,7 @@ https://console.bce.baidu.com/iam/#/iam/accesslist
var ak = 'b7d11214c8fc452db3de12028cf46daa';
var sk = '64631fe987f4423bb0a117101bf90a45'
var ocr = require('../').create(ak,sk);
// 外部图片
ocr.scan({
url:'https://raw.githubusercontent.com/netpi/baidu-ocr-api/master/test/test.jpg',
type:'text',
},function (err,result) {
if(err){
return console.log(result)
}
console.log(result);
})

// 本地图片
ocr.scan({
url:__dirname+'/test.jpg',
Expand Down
1 change: 1 addition & 0 deletions examples/readme.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
### start


```js
node orc.js
```
165 changes: 96 additions & 69 deletions lib/ocr.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
var crypto = require('crypto');
var urllib = require('urllib');
var Promise = require('bluebird');
var fs = require('fs');
var request = require('request-promise');
var urllib = require('urllib');
Promise.promisifyAll(fs);

var headersToSign = [];
function OCR(ak,sk) {
this.ak = ak;
Expand All @@ -17,9 +21,15 @@ OCR.prototype.scan = function scan(opt,cb) {
line:'/v1/recognize/line', // 将结果作为单行文字去解析
character:'/v1/recognize/character' // 识别某张图中的单个文字
}
var merge = opt.merge||true;
var merge = opt.merge;
if(merge === 'false'||merge ===false)
{
merge = false;
}else{
merge = true;
}
// init data
var type = opt.type||'line';
var type = opt.type||'text';
var path = pathOpt[type];
var url = opt.url;
var language = opt.language||'CHN_ENG';
Expand All @@ -34,53 +44,66 @@ OCR.prototype.scan = function scan(opt,cb) {
'host': 'ocr.bj.baidubce.com',
'x-bce-date': requestDate
};
getImgBase64(url,function (err,result) {
if(err){
return cb(err);
}
var data = {
base64:result,
language:language
};
// get Authorization
var databuffer = new Buffer(JSON.stringify(data));
headers['Content-Type'] = 'application/json';
headers['Content-Length'] = databuffer.length;
String.prototype.startWith = function(compareStr){
return this.indexOf(compareStr) == 0;
}
return new Promise(function (resolve,reject) {
getImgBase64(url).then(function(result) {
var data = {
base64:result,
language:language
};
// get Authorization
var databuffer = new Buffer(JSON.stringify(data));
headers['Content-Type'] = 'clarapplication/json';
headers['Content-Length'] = databuffer.length;

var content = 'bce-auth-v1/'+ accessKeyId +'/'+ requestDate +'/' + expire;
// get SigningKey
var SigningKey = crypto.createHmac('sha256', secretAccessKey).update(content).digest('hex');
var CanonicalURI = path;
var CanonicalQueryString = getCanonicalQueryString(params);
var CanonicalHeaders = getCanonicalHeaders(headers);
var CanonicalRequest = [httpMethod.toUpperCase(), CanonicalURI, CanonicalQueryString, CanonicalHeaders].join('\n');
// get Signature
var Signature = crypto.createHmac('sha256', SigningKey).update(CanonicalRequest).digest('hex');
// Mosaic Authorization
headers.Authorization = [content, headersToSign.join(';'), Signature].join('/');
var url = 'http://'+headers.host+path;
var options = {
host: headers.host,
path: path+'?'+getCanonicalQueryString(params),
method: httpMethod,
headers: headers,
timeout:15000,
data:data
};
// send request
urllib.request(url,options,function (err,data,resp) {
if(err){
return cb(err)
}
var resultJson = JSON.parse(data.toString());
return cb(null,resultJson);
var content = 'bce-auth-v1/'+ accessKeyId +'/'+ requestDate +'/' + expire;
// get SigningKey
var SigningKey = crypto.createHmac('sha256', secretAccessKey).update(content).digest('hex');
var CanonicalURI = path;
var CanonicalQueryString = getCanonicalQueryString(params);
var CanonicalHeaders = getCanonicalHeaders(headers);
var CanonicalRequest = [httpMethod.toUpperCase(), CanonicalURI, CanonicalQueryString, CanonicalHeaders].join('\n');
// get Signature
var Signature = crypto.createHmac('sha256', SigningKey).update(CanonicalRequest).digest('hex');
// Mosaic Authorization
headers.Authorization = [content, headersToSign.join(';'), Signature].join('/');
var url = 'http://'+headers.host+path;
var options = {

json:data,
host: headers.host,
path: path+'?'+getCanonicalQueryString(params),
headers: headers,
method:httpMethod,
encoding:'UTF-8'
};
request(url,options).then(function (result) {
if(!result.results){
return reject(result)
}
if(merge){
var words = '';
var rectangles = [];
result.results.forEach(function (result) {
words+= result.word;
rectangles.push(result.rectangle)
})
return resolve({results:{
words:words,
rectangles:rectangles
}})
}
return resolve(result);
}).catch(function (err) {
reject(err);
})
}).catch(function (err) {
reject(err);
})
})
}

String.prototype.startWith = function(compareStr){
return this.indexOf(compareStr) == 0;
}
function getCanonicalQueryString(params) {
var result = [];
for(var key in params) {
Expand Down Expand Up @@ -129,28 +152,32 @@ function getCanonicalQueryString(params) {
return result;
}
// 获取base64
function getImgBase64(url,cb) {
function getImgBase64(url) {
return new Promise(function (resolve,reject) {
// 外部地址
if(url.startWith('http')||url.startWith('https')){
urllib.request(url,{
headers:{
'User-Agent': 'Paw/2.1 (Macintosh; OS X/10.10.5) GCDHTTPRequest',
'Referer':'http://baidu.com'
}
},function(err,data,resp) {

if(err){
return cb(err)
}
return cb(null,data.toString('base64'));
})
}else{ // 本地地址
fs.readFile(url,function (err,data) {
if(err){
return cb(err)
}else{
return cb(null,data.toString('base64'))
}
})
}
if(url.startWith('http')||url.startWith('https')){
request({
method:'GET',
url:url,
headers:{
'User-Agent': 'Paw/2.1 (Macintosh; OS X/10.10.5) GCDHTTPRequest',
'Referer':'http://baidu.com'
},
encoding:null
}).then(function (result) {
resolve(result.toString('base64'));
}).catch(function (err) {
reject(err);
})
}else{ // 本地地址
fs.readFileAsync(url)
.then(function (data) {
resolve(data.toString('base64'))
})
.catch(function (err) {
reject(err);
})
}
})
}
};

0 comments on commit d558875

Please sign in to comment.