diff --git a/pycounter/constants.py b/pycounter/constants.py index 6801b9a..dcb1208 100644 --- a/pycounter/constants.py +++ b/pycounter/constants.py @@ -59,11 +59,11 @@ u"DB1": u"Total Searches, Result Clicks and Record Views by Month and " u"Database", u"DB2": u"Access Denied by Month, Database and Category", u"JR1": u"Number of Successful Full-Text Article Requests by Month and " u"Journal", - u"JR1GOA": u"Number of Successful Gold Open Access Full-Text Article " + u"JR1 GOA": u"Number of Successful Gold Open Access Full-Text Article " u"Requests by Month and Journal", u"JR1a": u"Number of Successful Full-Text Article Requests from an " u"Archive by Month and Journal", - u"JR2": u"Access Denied to Full Text Articles by Month, Journal, and " u"Category", + u"JR2": u"Access Denied to Full Text Articles by Month, Journal, and Category", u"JR3": u"Number of Successful Item Requests and Turnaways by Month, " u"Journal, and Page-Type", u"JR3mobile": u"Number of Successful Item Requests by Month, Journal, " @@ -106,7 +106,7 @@ u"Reporting Period HTML", u"Reporting Period PDF", ), - "JR2": ( + "JR1 GOA": ( u"Journal", u"Publisher", u"Platform", @@ -118,6 +118,17 @@ u"Reporting Period HTML", u"Reporting Period PDF", ), + "JR2": ( + u"Journal", + u"Publisher", + u"Platform", + u"Journal DOI", + u"Proprietary Identifier", + u"Print ISSN", + u"Online ISSN", + u"Access Denied Category", + u"Reporting Period Total", + ), "JR3": ( u"Journal", u"Publisher", @@ -158,6 +169,7 @@ u"Proprietary Identifier", u"ISBN", u"ISSN", + u"Access Denied Category", u"Reporting Period Total", ), "DB1": ( @@ -192,7 +204,9 @@ TOTAL_TEXT = { "JR1": u"Total for all journals", + "JR2": u"Total for all journals", "BR1": u"Total for all titles", "BR2": u"Total for all titles", + "BR3": u"Total for all titles", "DB2": u"Total for all databases", } diff --git a/pycounter/report.py b/pycounter/report.py index b66ff4f..5a7a2c7 100644 --- a/pycounter/report.py +++ b/pycounter/report.py @@ -170,7 +170,7 @@ def as_generic(self): output_lines.append([u"Date run:"]) output_lines.append([self.date_run.strftime("%Y-%m-%d")]) output_lines.append(self._table_header()) - if self.report_type in ("JR1", "BR1", "BR2", "DB2"): + if self.report_type in ("JR1", "BR1", "BR2", "DB2", "JR2", "BR3"): output_lines.extend(self._totals_lines()) elif self.report_type.startswith("DB"): self._ensure_required_metrics() @@ -207,9 +207,9 @@ def _totals_line(self, metric): total_cells.append(platforms.pop()) else: total_cells.append(u"") - if self.report_type in ("JR1", "BR1", "BR2"): + if self.report_type in ("JR1", "BR1", "BR2", "JR2", "BR3"): total_cells.extend([u""] * 4) - elif self.report_type == "DB2": + if self.report_type in ("DB2", "JR2", "BR3"): total_cells.append(metric) total_usage = 0 pdf_usage = 0 @@ -426,9 +426,12 @@ def as_generic(self): for data in self: total_usage += data[2] month_data.append(six.text_type(data[2])) + if self.metric.startswith("Access"): + data_line.append(self.metric) data_line.append(six.text_type(total_usage)) - data_line.append(six.text_type(self.html_total)) - data_line.append(six.text_type(self.pdf_total)) + if not self.metric.startswith("Access"): + data_line.append(six.text_type(self.html_total)) + data_line.append(six.text_type(self.pdf_total)) data_line.extend(month_data) return data_line @@ -523,6 +526,8 @@ def as_generic(self): for data in self: total_usage += data[2] month_data.append(six.text_type(data[2])) + if self.metric and self.metric.startswith("Access"): + data_line.append(self.metric) data_line.append(six.text_type(total_usage)) data_line.extend(month_data) return data_line @@ -803,7 +808,11 @@ def _parse_line(line, report, last_col): metric = report.metric if report.report_version >= 4: - if report.report_type.startswith("JR1") or report.report_type == "TR_J1": + if ( + report.report_type.startswith("JR1") + or report.report_type == "TR_J1" + or report.report_type == "TR_J2" + ): old_line = line line = line[0:3] + line[5:7] + line[10:last_col] doi = old_line[3] @@ -812,6 +821,8 @@ def _parse_line(line, report, last_col): pdf_total = format_stat(old_line[9]) issn = line[3].strip() eissn = line[4].strip() + if report.report_type == "TR_J2": + metric = old_line[9] elif report.report_type in ("BR1", "BR2"): line = line[0:3] + line[5:7] + line[8:last_col] @@ -851,7 +862,11 @@ def _parse_line(line, report, last_col): for data in line[months_start_idx:]: month_data.append((curr_month, format_stat(data))) curr_month = next_month(curr_month) - if report.report_type.startswith("JR") or report.report_type == "TR_J1": + if ( + report.report_type.startswith("JR") + or report.report_type == "TR_J1" + or report.report_type == "TR_J2" + ): return CounterJournal( metric=metric, month_data=month_data, diff --git a/pycounter/sushi.py b/pycounter/sushi.py index c91f575..71717d5 100644 --- a/pycounter/sushi.py +++ b/pycounter/sushi.py @@ -275,7 +275,7 @@ def raw_to_full(raw_report): html_usage = 0 pdf_usage = 0 - metrics_for_db = collections.defaultdict(list) + metrics_for_db = collections.OrderedDict() for perform_item in item.ItemPerformance: item_date = convert_date_run(perform_item.Period.Begin.text) @@ -289,18 +289,19 @@ def raw_to_full(raw_report): pdf_usage += int(inst.Count) elif inst.MetricType == "ft_html": html_usage += int(inst.Count) - elif ( - report.report_type.startswith("DB") - or report.report_type == "PR1" + elif report.report_type.startswith("DB") or report.report_type in ( + "PR1", + "JR2", + "BR3", ): - metrics_for_db[inst.MetricType].append( + metrics_for_db.setdefault(inst.MetricType, []).append( (item_date, int(inst.Count)) ) if usage is not None: month_data.append((item_date, int(usage))) if report.report_type: - if report.report_type.startswith("JR"): + if report.report_type == "JR1": report.pubs.append( pycounter.report.CounterJournal( title=title, @@ -317,7 +318,26 @@ def raw_to_full(raw_report): pdf_total=pdf_usage, ) ) + elif report.report_type == "BR3": + for metric_code, month_data in six.iteritems(metrics_for_db): + metric = pycounter.constants.DB_METRIC_MAP[metric_code] + report.pubs.append( + pycounter.report.CounterBook( + title=title, + platform=platform, + publisher=publisher_name, + period=report.period, + metric=metric, + issn=issn, + print_isbn=print_isbn, + online_isbn=online_isbn, + doi=doi, + proprietary_id=prop_id, + month_data=month_data, + ) + ) elif report.report_type.startswith("BR"): + # BR1, BR2 report.pubs.append( pycounter.report.CounterBook( title=title, @@ -358,4 +378,21 @@ def raw_to_full(raw_report): month_data=month_data, ) ) + elif report.report_type == "JR2": + for metric_code, month_data in six.iteritems(metrics_for_db): + metric = pycounter.constants.DB_METRIC_MAP[metric_code] + report.pubs.append( + pycounter.report.CounterJournal( + title=title, + platform=platform, + publisher=publisher_name, + period=report.period, + metric=metric, + issn=issn, + eissn=eissn, + doi=doi, + proprietary_id=prop_id, + month_data=month_data, + ) + ) return report diff --git a/pycounter/test/conftest.py b/pycounter/test/conftest.py index cff0976..95040a3 100644 --- a/pycounter/test/conftest.py +++ b/pycounter/test/conftest.py @@ -65,6 +65,8 @@ def parse_sushi_file(filename): "sushi_simple_br1.xml", "sushi_simple_db1.xml", "sushi_db1_missing_record_view.xml", + "sushi_br3.xml", + "sushi_jr2.xml", ] ) def sushi_report_all(request): @@ -97,6 +99,18 @@ def sushi_report_jr1(request): return parse_sushi_file(request.param) +@pytest.fixture +def sushi_report_jr2(): + """Journal turnaways.""" + return parse_sushi_file("sushi_jr2.xml") + + +@pytest.fixture +def sushi_report_br3(): + """Book turnaways.""" + return parse_sushi_file("sushi_br3.xml") + + @pytest.fixture( params=[ "C4BR1.tsv", @@ -155,3 +169,28 @@ def br3_report(): def jr2_report(): """Journal report 2 (turnaways).""" return parsedata("C4JR2.csv") + + +@pytest.fixture( + params="""C4BR1.tsv +C4BR2.tsv +C4BR3.csv +C4DB1.tsv +C4DB1_split_year.tsv +C4DB2.tsv +C4JR1.csv +C4JR1_bad.csv +C4JR1big.csv +C4JR1GOA.csv +C4JR1mul.csv +C4JR1my.csv +C4JR2.csv +PR1.tsv +simpleBR1.csv +simpleJR1.csv +simpleJR1.tsv +""".split() +) +def all_reports(request): + """All COUNTER 4 reports.""" + return parsedata(request.param) diff --git a/pycounter/test/counter5/conftest.py b/pycounter/test/counter5/conftest.py index 26bac33..df5c599 100644 --- a/pycounter/test/counter5/conftest.py +++ b/pycounter/test/counter5/conftest.py @@ -33,6 +33,14 @@ def trj1_report(): ) +@pytest.fixture +def trj2_report(): + """Tab-separated title turnaways report.""" + return pycounter.report.parse( + os.path.join(os.path.dirname(__file__), "data", "tr_j2.tsv") + ) + + @pytest.fixture def sushi5_report_trb1(): """JSON SUSHI report.""" diff --git a/pycounter/test/counter5/data/tr_j2.tsv b/pycounter/test/counter5/data/tr_j2.tsv new file mode 100644 index 0000000..50b0bac --- /dev/null +++ b/pycounter/test/counter5/data/tr_j2.tsv @@ -0,0 +1,18 @@ +Report_Name "Journal Access Denied " +Report_ID TR_J2 +Release 5 +Institution_Name Sample University +Institution_ID isni=1234567890 +Metric_Types Limit_Exceeded; No_License +Report_Filters Data_Type=Journal; Access_Method=Regular +Report_Attributes +Exceptions +Reporting_Period 2017-01-01 to 2017-06-30 +Created 2017-05-25 +Created_By Platform X + +Title Publisher Publisher_ID Platform DOI Proprietary_ID Print_ISSN Online_ISSN URI Metric_Type Reporting_ Period_Total Jan-2017 Feb-2017 Mar-2017 Apr-2017 May-2017 Jun-2017 +Journal A Publisher X isni=1234123412341234 PlatformX "/12.1.0.1/1111.2.222 " pubx:jnlA 1111-22222 1111-1223 Limit_Exceeded 3 3 +Journal A Publisher X isni=1234123412341234 PlatformX /12.1.0.1/1111.2.222 pubx:jnlA 1111-22222 1111-1223 No_License +Journal B Publisher X isni=1234123412341234 PlatformX /12.1.0.1/1111.2.211 pubx:jnlB 1111-22211 1111-1213 Limit_Exceeded +Journal B Publisher X isni=1234123412341234 PlatformX /12.1.0.1/1111.2.211 pubx:jnlB 1111-22211 1111-1213 No_License \ No newline at end of file diff --git a/pycounter/test/counter5/test_trj2.py b/pycounter/test/counter5/test_trj2.py new file mode 100644 index 0000000..d7e97be --- /dev/null +++ b/pycounter/test/counter5/test_trj2.py @@ -0,0 +1,18 @@ +"""Test parsing of COUNTER 5 TRJ2 report (turnaways)""" + +import datetime + + +def test_metric(trj2_report): + assert trj2_report.metric is None # Multiple metrics per report + + +def test_type(trj2_report): + assert trj2_report.report_type == u"TR_J2" + + +def test_data(trj2_report): + i = iter(trj2_report) + row = next(i) + item = next(iter(row)) + assert item == (datetime.date(2017, 1, 1), u"Limit_Exceeded", 3) diff --git a/pycounter/test/data/C4JR2.csv b/pycounter/test/data/C4JR2.csv index 0442c4d..649fd16 100644 --- a/pycounter/test/data/C4JR2.csv +++ b/pycounter/test/data/C4JR2.csv @@ -5,7 +5,7 @@ "2011-01-01 to 2011-12-31" "Date run:" 2012-02-21 -Journal,Publisher,Platform,"Journal DOI","Proprietary Identifier","Print ISSN","Online ISSN","Access Denied Category","Reporting Period Total","Reporting Period HTML","Reporting Period PDF",Jan-2011,Feb-2011,Mar-2011,Apr-2011,May-2011,Jun-2011,Jul-2011,Aug-2011,Sep-2011,Oct-2011,Nov-2011,Dec-2011 +Journal,Publisher,Platform,"Journal DOI","Proprietary Identifier","Print ISSN","Online ISSN","Access Denied Category","Reporting Period Total",Jan-2011,Feb-2011,Mar-2011,Apr-2011,May-2011,Jun-2011,Jul-2011,Aug-2011,Sep-2011,Oct-2011,Nov-2011,Dec-2011 "Total for all journals","Maximegalon University Press",MJO,,,,,16,0,16,2,1,0,0,0,5,1,1,0,5,1,0 "Abstracts of Working Papers in Economics","Maximegalon University Press",MJO,,,0951-0079,0951-0079,Access denied: concurrent/simultaneous user license limit exceeded,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0 "Acta Numerica","Maximegalon University Press",MJO,,,0962-4929,1474-0508,Access denied: concurrent/simultaneous user license limit exceeded,16,0,16,2,1,0,0,0,5,1,1,0,5,1,0 diff --git a/pycounter/test/data/sushi_br3.xml b/pycounter/test/data/sushi_br3.xml new file mode 100644 index 0000000..12a79a1 --- /dev/null +++ b/pycounter/test/data/sushi_br3.xml @@ -0,0 +1,76 @@ + + + + + + + + exampleRequestor + + + + + exampleReference + + + + + + 2013-01-01 + 2013-01-31 + + + + + + + Example Vendor + example + + Vendor Contact + vendor@example.com + + + + exampleLibrary + + + Print_ISBN + 9780011234569 + + + Online_ISBN + 9780011234549 + + + Proprietary + FD + + ExamplePlatform + Fake data + Book + + + 2013-01-01 + 2013-01-31 + + Access_denied + + turnaway + 6 + + + no_license + 8 + + + + + + + + + diff --git a/pycounter/test/data/sushi_jr2.xml b/pycounter/test/data/sushi_jr2.xml new file mode 100644 index 0000000..68bb475 --- /dev/null +++ b/pycounter/test/data/sushi_jr2.xml @@ -0,0 +1,83 @@ + + + + + + + + exampleRequestor + + + + + exampleReference + + + + + + 2013-01-01 + 2013-01-31 + + + + + + + Example Vendor + example + + Vendor Contact + vendor@example.com + + + + Example Library + exampleLibrary + + + Print_ISSN + 0737-1764 + + + Online_ISSN + 1234-5678 + + + Proprietary + JFD + + + DOI + 10.5555/12345678 + + ExamplePlatform + Example Publisher + Journal of fake data + Journal + + + 2013-01-01 + 2013-01-31 + + Access_denied + + turnaway + 6 + + + no_license + 8 + + + + + + + + + diff --git a/pycounter/test/test_output_common.py b/pycounter/test/test_output_common.py index 73aef01..bfbfe2c 100644 --- a/pycounter/test/test_output_common.py +++ b/pycounter/test/test_output_common.py @@ -65,3 +65,8 @@ def test_totals_sparse_data(tmp_path): assert book_line.startswith("Book 1") numbers = [int(num) for num in book_line.split("\t")[-3:]] assert numbers == [0, 3, 5], "check counts for book 1" + + +def test_roundtrippable(all_reports, tmp_path): + """Test that all of our parsable reports can also be output.""" + all_reports.write_tsv(str(tmp_path / "output.tsv")) diff --git a/pycounter/test/test_sushi.py b/pycounter/test/test_sushi.py index 1572901..884fef0 100644 --- a/pycounter/test/test_sushi.py +++ b/pycounter/test/test_sushi.py @@ -86,6 +86,28 @@ def test_title_jr1(sushi_report_jr1): assert publication.title == u"Journal of fake data" +def test_data_jr2(sushi_report_jr2): + assert [next(iter(line)) for line in sushi_report_jr2] == [ + ( + datetime.date(2013, 1, 1), + u"Access denied: concurrent/simultaneous user license exceeded", + 6, + ), + (datetime.date(2013, 1, 1), u"Access denied: content item not licensed", 8), + ] + + +def test_data_br3(sushi_report_br3): + assert [next(iter(line)) for line in sushi_report_br3] == [ + ( + datetime.date(2013, 1, 1), + u"Access denied: concurrent/simultaneous user license exceeded", + 6, + ), + (datetime.date(2013, 1, 1), u"Access denied: content item not licensed", 8), + ] + + class TestConvertRawBook(unittest.TestCase): """Test converting simple BR1 SUSHI response"""