Skip to content

Commit

Permalink
Merge 8eafa84 into 379c18b
Browse files Browse the repository at this point in the history
  • Loading branch information
Wooble committed May 13, 2019
2 parents 379c18b + 8eafa84 commit d658cc7
Show file tree
Hide file tree
Showing 18 changed files with 535 additions and 23 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Expand Up @@ -6,6 +6,7 @@ recursive-include docs *.gitkeep
recursive-include docs *.py
recursive-include docs *.rst
recursive-include pycounter *.csv
recursive-include pycounter *.json
recursive-include pycounter *.tsv
recursive-include pycounter *.xlsx
recursive-include pycounter *.xml
Expand Down
16 changes: 16 additions & 0 deletions README.rst
Expand Up @@ -38,6 +38,8 @@ Licensed under the MIT license. See the file LICENSE for details.
pycounter is tested on Python 2.7, 3.4, 3.5, 3.6, 3.7 and pypy2 (if you're still
stuck on Python 2.6 or 3.3, please use version 0.16.1 of pycounter)

pycounter 2.x will be the last version with support for Python 2.

Documentation is on `Read the Docs <http://pycounter.readthedocs.io>`_.


Expand All @@ -55,6 +57,19 @@ From inside the source distribution:
Probably do all of this in a virtualenv. `The PyPA <https://packaging.python.org/tutorials/installing-packages/>`_
has a good explanation of how to get started.)


COUNTER 5 Note
--------------

In this alpha release, reports are output in COUNTER 4 format with COUNTER 5 data,
which is wrong, and probably not a valid apples-to-apples comparison since, for example,
TR_J1 excludes Gold Open Access counts that would be included in JR1, and also has
HTML and PDF columns that will always be 0 because these are no longer reported.

Before the final 2.0 release, it will be capable of producing actual COUNTER 5 reports,
probably with an API for getting COUNTER 4 style data compatible with scripts that
were making assumptions about the data received to pass it into another system.

Usage
-----

Expand Down Expand Up @@ -100,3 +115,4 @@ Our code is automatically styled using black. To install the pre-commit hook:
pip install pre-commit

pre-commit install

10 changes: 10 additions & 0 deletions docs/source/index.rst
Expand Up @@ -18,7 +18,16 @@ API Docs

pycounter.report
pycounter.sushi
pycounter.exceptions

Internal APIs
=============

.. autosummary::
pycounter.sushi5
pycounter.constants
pycounter.csvhelper
pycounter.helpers

Indices and tables
==================
Expand All @@ -34,4 +43,5 @@ Contents
.. toctree::

pycounter
internal_apis
sushiclient
30 changes: 30 additions & 0 deletions docs/source/internal_apis.rst
@@ -0,0 +1,30 @@
pycounter Internal APIs
=======================

pycounter.sushi5 module
-----------------------

.. automodule:: pycounter.sushi5
:members:
:undoc-members:

pycounter.constants module
-----------------------

.. automodule:: pycounter.constants
:members:
:undoc-members:

pycounter.csvhelper module
-----------------------

.. automodule:: pycounter.csvhelper
:members:
:undoc-members:

pycounter.helpers module
-----------------------

.. automodule:: pycounter.helpers
:members:
:undoc-members:
5 changes: 5 additions & 0 deletions docs/source/pycounter.rst
Expand Up @@ -58,3 +58,8 @@ Commonly-used function
Other functions
^^^^^^^^^^^^^^^
.. autofunction:: get_sushi_stats_raw

pycounter.exceptions module
---------------------------

.. module:: pycounter.exceptions
10 changes: 10 additions & 0 deletions docs/source/sushiclient.rst
Expand Up @@ -66,6 +66,16 @@ Options:

Path to write output file to. If file already exists, it will be overwritten.

.. option:: -d, --dump

Dump raw request and response to logger.

.. option:: --no_ssl_verify

Skip SSL certificate verification.

.. option:: --no-delay

Do not wait 60 seconds before retrying a request in case of failure. This is
provided mainly for testing; it's not recommended to skip the delay when
talking to someone else's server...
14 changes: 14 additions & 0 deletions pycounter/constants.py
Expand Up @@ -90,6 +90,7 @@
u"and Page-Type (formatted for normal browsers/delivered "
u"to mobile devices and for mobile devices/delivered to "
u"mobile devices)",
u"TR_J1": u'Journal Requests (Excluding "OA_Gold")',
}

HEADER_FIELDS = {
Expand Down Expand Up @@ -173,6 +174,19 @@
u"Access denied category",
u"Reporting Period Total",
),
# FIXME: this is outputting counter 5 reports in 4 format for... reasons.
"TR_J1": (
u"Journal",
u"Publisher",
u"Platform",
u"Journal DOI",
u"Proprietary Identifier",
u"Print ISSN",
u"Online ISSN",
u"Reporting Period Total",
u"Reporting Period HTML",
u"Reporting Period PDF",
),
}

TOTAL_TEXT = {
Expand Down
67 changes: 47 additions & 20 deletions pycounter/report.py
Expand Up @@ -643,40 +643,63 @@ def parse_generic(report_reader):
"""
report = CounterReport()

report.report_type, report.report_version = _get_type_and_version(
six.next(report_reader)[0]
)
first_line = six.next(report_reader)
if first_line[0] == "Report_Name": # COUNTER 5 report
second_line = six.next(report_reader)
third_line = six.next(report_reader)
report.report_type, report.report_version = _get_c5_type_and_version(
first_line, second_line, third_line
)
else:
report.report_type, report.report_version = _get_type_and_version(first_line[0])

# noinspection PyTypeChecker
report.metric = METRICS.get(report.report_type)
if report.report_version != 5:
# noinspection PyTypeChecker
report.metric = METRICS.get(report.report_type)

report.customer = six.next(report_reader)[0]
report.customer = six.next(report_reader)[1 if report.report_version == 5 else 0]

if report.report_version == 4:
if report.report_version >= 4:
inst_id_line = six.next(report_reader)
if inst_id_line:
report.institutional_identifier = inst_id_line[0]
report.institutional_identifier = inst_id_line[
1 if report.report_version == 5 else 0
]
if report.report_type == "BR2":
report.section_type = inst_id_line[1]

six.next(report_reader)
if report.report_version == 5:
for _ in range(3):
six.next(report_reader)

covered_line = six.next(report_reader)
report.period = convert_covered(covered_line[0])
report.period = convert_covered(
covered_line[1 if report.report_version == 5 else 0]
)

six.next(report_reader)
if report.report_version < 5:
six.next(report_reader)

date_run_line = six.next(report_reader)
report.date_run = convert_date_run(date_run_line[0])
report.date_run = convert_date_run(
date_run_line[1 if report.report_version == 5 else 0]
)

if report.report_version == 5:
for _ in range(2):
# Skip Created_By and blank line
six.next(report_reader)

header = six.next(report_reader)

try:
report.year = _year_from_header(header, report)
except AttributeError:
warnings.warn("Could not determine year from malformed header")
if report.report_version < 5:
try:
report.year = _year_from_header(header, report)
except AttributeError:
warnings.warn("Could not determine year from malformed header")

if report.report_version == 4:
if report.report_version >= 4:
countable_header = header[0:8]
for col in header[8:]:
if col:
Expand All @@ -693,7 +716,7 @@ def parse_generic(report_reader):
end_date = last_day(convert_date_column(header[last_col - 1]))
report.period = (start_date, end_date)

if report.report_type != "DB1":
if report.report_type != "DB1" and report.report_version != 5:
six.next(report_reader)

if report.report_type == "DB2":
Expand Down Expand Up @@ -723,8 +746,8 @@ def _parse_line(line, report, last_col):
doi = ""
prop_id = ""

if report.report_version == 4:
if report.report_type.startswith("JR1"):
if report.report_version >= 4:
if report.report_type.startswith("JR1") or report.report_type == "TR_J1":
old_line = line
line = line[0:3] + line[5:7] + line[10:last_col]
doi = old_line[3]
Expand Down Expand Up @@ -761,7 +784,7 @@ def _parse_line(line, report, last_col):
for data in line[5:]:
month_data.append((curr_month, format_stat(data)))
curr_month = next_month(curr_month)
if report.report_type.startswith("JR"):
if report.report_type.startswith("JR") or report.report_type == "TR_J1":
return CounterJournal(
metric=report.metric,
month_data=month_data,
Expand Down Expand Up @@ -809,6 +832,10 @@ def _get_type_and_version(specifier):
return report_type, report_version


def _get_c5_type_and_version(first_line, second_line, third_line):
return second_line[1], int(third_line[1])


def _year_from_header(header, report):
"""Get the year for the report from the header.
Expand Down
7 changes: 6 additions & 1 deletion pycounter/sushi.py
Expand Up @@ -14,11 +14,13 @@
import requests
import six

from pycounter import sushi5
import pycounter.constants
import pycounter.exceptions
from pycounter.helpers import convert_date_run
import pycounter.report


logger = logging.getLogger(__name__)
NS = pycounter.constants.NS

Expand Down Expand Up @@ -60,7 +62,7 @@ def get_sushi_stats_raw(
:param release: report release number (should generally be `4`.)
:param sushi_dump: produces dump of XML to DEBUG logger
:param sushi_dump: produces dump of XML (or JSON, for COUNTER 5) to DEBUG logger
:param verify: bool: whether to verify SSL certificates
Expand Down Expand Up @@ -130,6 +132,9 @@ def get_report(*args, **kwargs):
:param no_delay: don't delay in retrying Report Queued
"""
if kwargs.get("release") == 5:
return sushi5.get_report(*args, **kwargs)

no_delay = kwargs.pop("no_delay", False)
delay_amount = 0 if no_delay else 60
while True:
Expand Down

0 comments on commit d658cc7

Please sign in to comment.