diff --git a/app.db b/app.db new file mode 100644 index 0000000..0049e46 Binary files /dev/null and b/app.db differ diff --git a/app/models.py b/app/models.py index 064f8e0..ccf3f3a 100644 --- a/app/models.py +++ b/app/models.py @@ -2,8 +2,9 @@ Database models for monitoring researchers who have used the facility and their outputs """ +import os.path from sqlalchemy import orm -from app import db +from app import db, PKG_DIR from app.exceptions import NifReportingException @@ -66,6 +67,8 @@ class Publication(db.Model): __tablename__ = 'publications' + CONTENT_DIR = os.path.join(PKG_DIR, 'publication-content') + id = db.Column(db.Integer, primary_key=True) date = db.Column(db.Date) scopus_id = db.Column(db.String(100), unique=True) @@ -81,7 +84,7 @@ class Publication(db.Model): nif_assoc = db.Column(db.Integer) access_status = db.Column(db.Integer) abstract = orm.deferred(db.Column(db.Text)) - content = orm.deferred(db.Column(db.Text)) + #content = orm.deferred(db.Column(db.Text)) scopus_authors = db.relationship( 'ScopusAuthor', secondary='scopusauthor_publication_assoc') @@ -108,6 +111,30 @@ def __init__(self, doi, title, scopus_id=None, pii=None, date=None, self.access_status = access_status # self.author_ids = author_ids + @property + def content(self): + if self.has_content: + return None + with open(self.content_path) as f: + content = f.read() + return content + + @content.setter + def content(self, content): + if content is not None: + with open(self.content_path, 'w') as f: + f.write(str(content)) + + @property + def content_path(self): + path = os.path.join(self.CONTENT_DIR, str(self.scopus_id)) + path += ('.txt' if self.pii else '.html') + return path + + @property + def has_content(self): + return os.path.exists(self.content_path) + class Affiliation(db.Model): diff --git a/scripts/add_content.py b/scripts/add_content.py index 9b809ef..3b88b9c 100644 --- a/scripts/add_content.py +++ b/scripts/add_content.py @@ -11,11 +11,11 @@ from PyPDF2 import PdfFileReader from bs4 import BeautifulSoup import pybliometrics.scopus as sc -from app import PKG_DIR, db +from app import db from app.models import Publication from app.constants import ( CANT_ACCESS_CONTENT, PLAIN_TEXT_ACCESS_CONTENT, HTML_ACCESS_CONTENT, - PDF_ACCESS_CONTENT, UNKNOWN_ACCESS_CONTENT) + UNKNOWN_ACCESS_CONTENT) # PDF_ACCESS_CONTENT, @@ -71,15 +71,10 @@ def content_from_pii(pii): pass return text -content_dir = os.path.join(PKG_DIR, 'publication-content') -os.makedirs(content_dir, exist_ok=True) for pub in Publication.query.all(): - content_path = os.path.join(content_dir, pub.scopus_id + ( - '.txt' if pub.pii is not None else '.html')) - - if not os.path.exists(content_path): + if not pub.has_content: if pub.pii: content = content_from_pii(pub.pii) if content is None: @@ -96,6 +91,5 @@ def content_from_pii(pii): pub.access_status = UNKNOWN_ACCESS_CONTENT if content: - with open(content_path, 'w') as f: - f.write(str(content)) + pub.content = content db.session.commit()