Skip to content

Commit

Permalink
feat(pkgs): crawler
Browse files Browse the repository at this point in the history
Signed-off-by: sam <sam@freighttrust.com>
  • Loading branch information
sam committed Apr 5, 2021
1 parent 6e0cec2 commit f3f8db1
Show file tree
Hide file tree
Showing 26 changed files with 7,699 additions and 248 deletions.
50 changes: 25 additions & 25 deletions .github/workflows/linter.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
name: linter

on:
push:

jobs:
build:
# Name the Job
name: Lint Code Base
# Set the agent to run on
runs-on: ubuntu-latest

steps:
- name: Checkout Code
uses: actions/checkout@v2
with:
# Full git history is needed to get a proper list of changed files within `super-linter`
fetch-depth: 0

- name: Lint Code Base
uses: github/super-linter@v3
env:
VALIDATE_ALL_CODEBASE: true
DEFAULT_BRANCH: master
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
name: linter

on:
push:

jobs:
build:
# Name the Job
name: Lint Code Base
# Set the agent to run on
runs-on: ubuntu-latest

steps:
- name: Checkout Code
uses: actions/checkout@v2
with:
# Full git history is needed to get a proper list of changed files within `super-linter`
fetch-depth: 0

- name: Lint Code Base
uses: github/super-linter@v3
env:
VALIDATE_ALL_CODEBASE: true
DEFAULT_BRANCH: master
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
57 changes: 57 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
### Changelog

All notable changes to this project will be documented in this file. Dates are displayed in UTC.

Generated by [`auto-changelog`](https://github.com/CookPete/auto-changelog).

#### [v0.0.8](https://github.com/manifoldfinance/mev-corpus/compare/v0.0.7...v0.0.8)

> 23 March 2021
- fix(defects): json [`4aa8f43`](https://github.com/manifoldfinance/mev-corpus/commit/4aa8f43ac4ad478e27112d3a576d73c3091c81d3)
- feat(pkgs): verified contracts [`c397223`](https://github.com/manifoldfinance/mev-corpus/commit/c39722319a39a10d89d132164248b9dd695a5487)
- etherscan registry export [`9b815b1`](https://github.com/manifoldfinance/mev-corpus/commit/9b815b1b5a48493f5ff8d539df24d0cd43d1283e)

#### [v0.0.7](https://github.com/manifoldfinance/mev-corpus/compare/v0.0.6...v0.0.7)

> 13 March 2021
- feat(mev-bundles): init [`fe1e626`](https://github.com/manifoldfinance/mev-corpus/commit/fe1e6263072f4928380d8ae570a6784c8e4c01c1)
- move to dune/ [`579559f`](https://github.com/manifoldfinance/mev-corpus/commit/579559f3b979d23cd23a4fec42969b4effe921a2)
- Create coinbase.sql [`e26b348`](https://github.com/manifoldfinance/mev-corpus/commit/e26b3489c757e96083bce32d1ac1517a8e1e7f4b)

#### [v0.0.6](https://github.com/manifoldfinance/mev-corpus/compare/v0.0.5...v0.0.6)

> 6 March 2021
- chore(update): log update [`b777ef6`](https://github.com/manifoldfinance/mev-corpus/commit/b777ef68ba6543c76f6cf4943bb3d7460a6c23fd)
- fix(config): data configuration [`125afa7`](https://github.com/manifoldfinance/mev-corpus/commit/125afa7e2c98d4d47f994134b38d6cba78109361)
- feat(pending): pending classification subpackage [`c6db2ca`](https://github.com/manifoldfinance/mev-corpus/commit/c6db2cad35bbf8de16f98bb63acb6f26d028c1d2)

#### [v0.0.5](https://github.com/manifoldfinance/mev-corpus/compare/v0.0.4...v0.0.5)

> 29 January 2021
- data(add): updated list [`4892f0a`](https://github.com/manifoldfinance/mev-corpus/commit/4892f0a59b897ffda4dc9b28b4d8dd9d0f607e7c)
- update [`8c1076a`](https://github.com/manifoldfinance/mev-corpus/commit/8c1076a6068a3ce2572969982617822c5ccae462)
- feat(lists): known bots [`afa9b1d`](https://github.com/manifoldfinance/mev-corpus/commit/afa9b1d6eee27a40076361ac98521f63c90fe5a4)

#### [v0.0.4](https://github.com/manifoldfinance/mev-corpus/compare/v0.0.3...v0.0.4)

> 27 January 2021
- chore(repo): gitignore [`1ea05f3`](https://github.com/manifoldfinance/mev-corpus/commit/1ea05f3e359f6ec34554fca611405e602b088ced)
- feat(pkgs): packages update [`a3a587c`](https://github.com/manifoldfinance/mev-corpus/commit/a3a587cb6326f313ada09bda9aca3dca962af3ff)

#### [v0.0.3](https://github.com/manifoldfinance/mev-corpus/compare/v0.0.1...v0.0.3)

> 27 January 2021
- feat(packages): new packages and data update [`66a3c09`](https://github.com/manifoldfinance/mev-corpus/commit/66a3c09374b50d841639339383dcc3769312fe33)
- feat(corpus): exchanges list [`b91bb15`](https://github.com/manifoldfinance/mev-corpus/commit/b91bb1529fc8cbacc7512352977803049be237c5)

#### v0.0.1

> 27 January 2021
- feat(init): inital commit [`8c6892a`](https://github.com/manifoldfinance/mev-corpus/commit/8c6892addfc0608ce975c6e78de8096f9e9968d7)
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

> Miner Extracted Value Data Corpus

<!-- START pkgtoc, keep to allow update -->

**Packages**
Expand Down
8 changes: 4 additions & 4 deletions data.config.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
delimiter: "",
newline: "",
quoteChar: '"',
escapeChar: '"',
delimiter: " ",
newline: " ",
quoteChar: "''",
escapeChar: "",
header: false,
transformHeader: undefined,
dynamicTyping: false,
Expand Down
7 changes: 7 additions & 0 deletions packages/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
*.tgz
*.tar.gz
*.tar
*.log
node_modules
.DS_Store

1 change: 0 additions & 1 deletion packages/chain-utils/lib/well-known/multicall.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

const multicallAddresses = {
1: '0xeefba1e63905ef1d7acba5a8513c70307c1ce441',
3: '0xF24b01476a55d635118ca848fbc7Dab69d403be3',
Expand Down
6 changes: 6 additions & 0 deletions packages/crawler/.prettierignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
output/
build/
archive/
downloads/
*.csv
*.sol
15 changes: 15 additions & 0 deletions packages/crawler/.prettierrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"$schema": "http://json.schemastore.org/prettierrc",
"arrowParens": "always",
"bracketSpacing": true,
"jsxBracketSameLine": false,
"jsxSingleQuote": false,
"printWidth": 80,
"proseWrap": "always",
"quoteProps": "as-needed",
"semi": true,
"singleQuote": true,
"tabWidth": 2,
"trailingComma": "all",
"useTabs": false
}
11 changes: 11 additions & 0 deletions packages/crawler/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# `crawler`

> TODO: description
## Usage

```
const crawler = require('crawler');
// TODO: DEMONSTRATE API
```
7 changes: 7 additions & 0 deletions packages/crawler/__tests__/crawler.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
'use strict';

const crawler = require('..');

describe('crawler', () => {
it('needs tests');
});
6 changes: 6 additions & 0 deletions packages/crawler/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/**
crawler.js
filter.js
getContracts.js
retryGetContracts.js
*/
97 changes: 97 additions & 0 deletions packages/crawler/lib/crawler.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
'use strict';

const Web3 = require('web3');
const web3 = new Web3();
const fs = require('fs');

/**
* @function getTransaction
* @summary gets transactions from block
*
*/
async function getTransaction(block_no, index) {
const tx = web3.eth.getTransactionFromBlock(block_no, i).then((res) => {
return web3.eth.getTransactionReceipt(res.hash);
});
return tx;
}

function retryGetTransaction(block_no, index, retries = 10, err = null) {
if (!retries) {
return Promise.reject(err);
}
return getTransaction(block_no, index).catch(async (err) => {
console.log(`Retrying ${from} block ${i}th transaction, ${retries - 1} try left`);
await sleep(1000);
return retryGetTransaction(block_no, index, retries - 1, err);
});
}
//for one block
async function getContractsByBlock(block_no) {
const tx_count = await web3.eth.getBlockTransactionCount(block_no);
var txs = [];
for (i = 0; i < tx_count; i++) {
const tx = retryGetTransaction(block_no, i);
txs.push(tx);
}
const contracts = (await Promise.all(txs))
.filter((receipt) => receipt.contractAddress != null)
.map((receipt) => receipt.contractAddress);
return contracts;
}

function sleep(ms) {
return new Promise((resolve) => {
setTimeout(resolve, ms);
});
}

function saveToCSV(arr) {
var file = fs.createWriteStream('contracts.txt', { flags: 'a' });
file.on('error', function (err) {
console.log('ERROR while writing to file');
});
arr.forEach(function (v) {
file.write(v.join('\n') + '\n');
});
file.end();
}

function retryGetBlocks(from, retries = 10, err = null) {
if (!retries) {
return Promise.reject(err);
}
return get100blocks(from).catch(async (err) => {
console.log(`Retrying from ${from}th block ${retries - 1} try left`);
await sleep(15000);
return retryGetBlocks(from, retries - 1, err);
});
}

async function get100blocks(from) {
console.log(`Crawling from ${from}th block`);
var temp = [];
for (var i = 1; i < 100; i++) {
const block_no = from + i;
const c = getContractsByBlock(block_no);
temp.push(c);
}
const results = (await Promise.all(temp)).filter((l) => l.length != 0);
saveToCSV(results);
await sleep(1000);
return results.length;
}

//CSV format
//block# index# txHash contractAddress
async function crawl() {
var contractCount = 0;
for (var base = 35000; base < 93380; base++) {
contractCount += await retryGetBlocks(base * 100);
console.log(`Found ${contractCount} contracts`);
}
}

crawl();

/** module.exports = crawl; */
32 changes: 32 additions & 0 deletions packages/crawler/lib/filter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
//https://api.etherscan.io/api?module=contract&action=getsourcecode&address=${address}
const axios = require('axios');
var fs = require('fs');
const csv = require('fast-csv');
const path = require('path');

var results = [];

function saveResult() {
const write = fs.createWriteStream(path.resolve(__dirname, 'addresses_filtered.csv'));
csv.writeToStream(write, results);
}

async function filter() {
const stream = fs.createReadStream(path.resolve(__dirname, 'results.csv'));
//.pipe(csv.parse({ headers: true }))
csv
.parseStream(stream, { headers: true })
.on('error', (error) => console.error(error))
.on('data', (row) => {
if (row[' status'] == 'failed') {
console.log('Found failed');
results.push([row.address]);
}
})
.on('end', async (rowCount) => {
console.log(`Parsed ${rowCount} rows`);
saveResult();
});
}
filter();
exports.default = filter;
Loading

0 comments on commit f3f8db1

Please sign in to comment.