Skip to content
This repository has been archived by the owner on Dec 2, 2021. It is now read-only.

Commit

Permalink
Merge branch '39/packagist'
Browse files Browse the repository at this point in the history
  • Loading branch information
brew committed Jul 7, 2017
2 parents 4e5ccd9 + 11e09e0 commit 41df667
Show file tree
Hide file tree
Showing 7 changed files with 378 additions and 2 deletions.
23 changes: 22 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Potentially, we'd love to see interest from other non-profits who receive funds
- [NPM](#npm)
- [PyPI](#pypi)
- [RubyGems](#rubygems)
- [Packagist](#packagist)
- [Social Media](#social-media)
- [Twitter](#twitter)
- [Facebook](#facebook)
Expand Down Expand Up @@ -161,7 +162,7 @@ The PyPI processor requires a Google API account with generated credential to ma

#### RubyGems

The RubyGems processor collects gem package download data from the rubygems.org API.
The RubyGems processor collects ruby gem download data from the rubygems.org API.

- **total_downloads**: collected directly from the API
- **downloads**: daily downloads calculated from yesterday's `total_downloads` value, if present.
Expand All @@ -177,6 +178,26 @@ config:

No historical download data is collected for RubyGems.

#### Packagist

The Packagist processor collects PHP package daily download data from the packagist.org API.

- **downloads**: daily downloads collected directly from the API.

```yaml
config:
code-packaging:
packagist:
packages:
- "frictionlessdata/tableschema"
- "frictionlessdata/datapackage"
```

Note: `packages` defined in the config must include their owner organization in the form `organization_name/package_name`.

Results from the Packagist.org API appear to be a couple of days behind.


### Social Media

#### Twitter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ def add_steps(steps: list, pipeline_id: str,
'gem_id': gem
}))

if 'packagist' in config:
for package in config['packagist']['packages']:
steps.append(('measure.add_packagist_resource', {
'package': package
}))

steps.append(('measure.remove_resource', {
'name': 'latest-project-entries'
}))
Expand Down
108 changes: 108 additions & 0 deletions datapackage_pipelines_measure/processors/add_packagist_resource.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import dateutil
from collections import OrderedDict

import simplejson
import requests

from datapackage_pipelines.generators import slugify
from datapackage_pipelines.wrapper import ingest, spew

import logging
log = logging.getLogger(__name__)


def _request_data_from_packagist(endpoint):
'''Request data and handle errors from packagist.org REST API.'''

packagist_url = 'https://packagist.org{endpoint}' \
.format(endpoint=endpoint)

packagist_response = requests.get(packagist_url)

if (packagist_response.status_code != 200):
log.error('An error occurred fetching Packagist data: {}'
.format(packagist_response.text))
raise Exception(packagist_response.text)

try:
json_response = packagist_response.json()
except simplejson.scanner.JSONDecodeError as e:
log.error('Expected JSON in response from: {}'.format(packagist_url))
raise e

return json_response


def _request_package_stats_from_packagist(package):
'''Request general info for a package.'''
endpoint = '/packages/{package}/stats/all.json'.format(package=package)
json_response = _request_data_from_packagist(endpoint)
return json_response


def packagist_collector(package, latest_row):
package_info = _request_package_stats_from_packagist(package)
download_by_date = dict(zip(package_info['labels'],
package_info['values']))

if latest_row:
# If there's a latest_row, reject all items in download_by_date before
# latest_row date
latest_row_date_str = latest_row['date'].strftime('%Y-%m-%d')
download_by_date = {k: v for k, v in download_by_date.items()
if k >= latest_row_date_str}

# ensure dict is ordered by date key
download_by_date = OrderedDict(sorted(download_by_date.items()))

resource_content = []
for k, v in download_by_date.items():
res_row = {
'package': package.split('/')[-1],
'source': 'packagist',
'date': dateutil.parser.parse(k).date(),
'downloads': v
}
resource_content.append(res_row)

return resource_content


parameters, datapackage, res_iter = ingest()

package = parameters['package']
resource = {
'name': slugify(package),
'path': 'data/{}.csv'.format(slugify(package))
}

headers = ['package', 'source', 'date', 'downloads']
resource['schema'] = {'fields': [{'name': h, 'type': 'string'}
for h in headers]}

datapackage['resources'].append(resource)


def process_resources(res_iter, datapackage, package):

def get_latest_row(first):
latest_row = None
package_name = package.split('/')[-1]
my_rows = []
for row in first:
if row['package'] == package_name and row['source'] == 'packagist':
latest_row = row
my_rows.append(row)
return latest_row, iter(my_rows)

if len(datapackage['resources']):
if datapackage['resources'][0]['name'] == 'latest-project-entries':
latest_row, latest_iter = get_latest_row(next(res_iter))
yield latest_iter
else:
latest_row = None
yield from res_iter
yield packagist_collector(package, latest_row)


spew(datapackage, process_resources(res_iter, datapackage, package))
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,15 @@
}
},
"required": ["gems"]
},
"packagist": {
"type": "object",
"properties": {
"packages": {
"type": "array"
}
},
"required": ["packages"]
}
}
},
Expand Down
4 changes: 4 additions & 0 deletions projects/frictionlessdata/measure.source-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ config:
gems:
- 'tableschema'
- 'datapackage'
packagist:
packages:
- 'frictionlessdata/tableschema'
- 'frictionlessdata/datapackage'

code-hosting:
github:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def read(*paths):
PACKAGE = 'datapackage_pipelines_measure'
NAME = PACKAGE.replace('_', '-')
INSTALL_REQUIRES = [
'datapackage-pipelines',
'datapackage-pipelines==1.0.19',
'psycopg2',
'tweepy',
'facebook-sdk',
Expand Down
Loading

0 comments on commit 41df667

Please sign in to comment.