Skip to content

Commit

Permalink
Merge pull request #216 from code-master5/json-dump-transaction
Browse files Browse the repository at this point in the history
CB-298: Add transaction support for json dumps
  • Loading branch information
paramsingh committed Oct 25, 2018
2 parents b7a33e7 + 4e2c63f commit 3a9c3b3
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 117 deletions.
75 changes: 39 additions & 36 deletions critiquebrainz/data/dump_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,43 +144,46 @@ def json(location, rotate=False):
current_app.json_encoder = DumpJSONEncoder

print("Creating new archives...")
for license in db_license.list_licenses():
safe_name = slugify(license["id"])
with tarfile.open(os.path.join(location, "critiquebrainz-%s-%s-json.tar.bz2" %
(datetime.today().strftime('%Y%m%d'), safe_name)), "w:bz2") as tar:
temp_dir = tempfile.mkdtemp()
license_dir = os.path.join(temp_dir, safe_name)
create_path(license_dir)

# Finding entities that have reviews with current license
entities = db_review.distinct_entities()
for entity in entities:
entity = str(entity)
# Creating directory structure and dumping reviews
dir_part = os.path.join(entity[0:1], entity[0:2])
reviews = db_review.list_reviews(entity_id=entity, license_id=license["id"], limit=None)[0]
if reviews:
rg_dir = '%s/%s' % (license_dir, dir_part)
create_path(rg_dir)
f = open('%s/%s.json' % (rg_dir, entity), 'w+')
f.write(jsonify(reviews=[db_review.to_dict(r) for r in reviews]).data.decode("utf-8"))
f.close()

tar.add(license_dir, arcname='reviews')

# Copying legal text
tar.add(os.path.join(os.path.dirname(os.path.realpath(__file__)), "licenses", safe_name + ".txt"), arcname='COPYING')

print(" + %s/critiquebrainz-%s-%s-json.tar.bz2" % (location, datetime.today().strftime('%Y%m%d'), safe_name))
with db.engine.begin() as connection:
for license in db_license.get_licenses_list(connection):
safe_name = slugify(license["id"])
with tarfile.open(os.path.join(location, "critiquebrainz-%s-%s-json.tar.bz2" %
(datetime.today().strftime('%Y%m%d'), safe_name)), "w:bz2") as tar:
temp_dir = tempfile.mkdtemp()
license_dir = os.path.join(temp_dir, safe_name)
create_path(license_dir)

# Finding entities that have reviews with current license
entities = db_review.get_distinct_entities(connection)
for entity in entities:
entity = str(entity)
# Creating directory structure and dumping reviews
dir_part = os.path.join(entity[0:1], entity[0:2])
reviews = db_review.get_reviews_list(connection, entity_id=entity, license_id=license["id"], limit=None)[0]
if reviews:
rg_dir = '%s/%s' % (license_dir, dir_part)
create_path(rg_dir)
f = open('%s/%s.json' % (rg_dir, entity), 'w+')
f.write(jsonify(reviews=[db_review.to_dict(r, connection=connection) for r in reviews])
.data.decode("utf-8"))
f.close()

tar.add(license_dir, arcname='reviews')

# Copying legal text
tar.add(os.path.join(os.path.dirname(os.path.realpath(__file__)), "licenses", safe_name + ".txt"),
arcname='COPYING')

print(" + %s/critiquebrainz-%s-%s-json.tar.bz2" % (location, datetime.today().strftime('%Y%m%d'), safe_name))

shutil.rmtree(temp_dir) # Cleanup

if rotate:
print("Removing old sets of archives (except two latest)...")
remove_old_archives(location, "critiquebrainz-[0-9]+-[-\w]+-json.tar.bz2",
is_dir=False, sort_key=os.path.getmtime)

shutil.rmtree(temp_dir) # Cleanup

if rotate:
print("Removing old sets of archives (except two latest)...")
remove_old_archives(location, "critiquebrainz-[0-9]+-[-\w]+-json.tar.bz2",
is_dir=False, sort_key=os.path.getmtime)

print("Done!")
print("Done!")


@cli.command()
Expand Down
23 changes: 16 additions & 7 deletions critiquebrainz/db/license.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ def delete(*, id):
})


def get_licenses_list(connection):
"""
helper function for list_licenses() that extends support for execution within a transaction by directly receiving the
connection object
"""
query = sqlalchemy.text("""
SELECT id,
info_url,
full_name
FROM license
""")
results = connection.execute(query)
return [dict(row) for row in results.fetchall()]


def list_licenses():
"""Get a list of licenses.
Expand All @@ -58,10 +73,4 @@ def list_licenses():
}
"""
with db.engine.connect() as connection:
results = connection.execute(sqlalchemy.text("""
SELECT id,
info_url,
full_name
FROM license
"""))
return [dict(row) for row in results.fetchall()]
return get_licenses_list(connection)
148 changes: 87 additions & 61 deletions critiquebrainz/db/review.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,13 @@
supported_languages.append(lang.iso639_1_code)


# TODO(roman): Rename this function. It doesn't convert a review to dictionary.
# TODO(code-master5): Rename this function. It doesn't convert a review to dictionary.
# Review that is passed to it is already a dictionary.
def to_dict(review, confidential=False):
review["user"] = User(db_users.get_by_id(review.pop("user_id")))
def to_dict(review, confidential=False, connection=None):
if connection is not None:
review["user"] = User(db_users.get_user_by_id(connection, review.pop("user_id")))
else:
review["user"] = User(db_users.get_by_id(review.pop("user_id")))
review["user"] = review["user"].to_dict(confidential=confidential)
review["id"] = str(review["id"])
review["entity_id"] = str(review["entity_id"])
Expand Down Expand Up @@ -316,32 +319,12 @@ def create(*, entity_id, entity_type, user_id, is_draft, text=None, rating=None,


# pylint: disable=too-many-branches
def list_reviews(*, inc_drafts=False, inc_hidden=False, entity_id=None,
entity_type=None, license_id=None, user_id=None,
language=None, exclude=None, sort=None, limit=20,
offset=None):
"""Get a list of reviews.
This function provides several filters that can be used to select a subset of reviews.
Args:
entity_id (uuid): ID of the entity that has been reviewed.
entity_type (str): Type of the entity that has been reviewed.
user_id (uuid): ID of the author.
sort (str): Order of the returned reviews. Can be either "popularity" (order by difference in +/- votes),
or "published_on" (order by publish time), or "random" (order randomly).
limit (int): Maximum number of reviews to return.
offset (int): Offset that can be used in conjunction with the limit.
language (str): Language code of reviews.
license_id (str): License ID that reviews are associated with.
inc_drafts (bool): True if reviews marked as drafts should be included, False if not.
inc_hidden (bool): True if reviews marked as hidden should be included, False if not.
exclude (list): List of reviews (their IDs) to exclude from results.
Returns:
Tuple with two values:
1. list of reviews as dictionaries,
2. total number of reviews that match the specified filters.
def get_reviews_list(connection, *, inc_drafts=False, inc_hidden=False, entity_id=None,
entity_type=None, license_id=None, user_id=None, language=None,
exclude=None, sort=None, limit=20, offset=None):
"""
helper function for list_reviews() that extends support for execution within a transaction by directly receiving the
connection object
"""
filters = []
filter_data = {}
Expand Down Expand Up @@ -388,9 +371,8 @@ def list_reviews(*, inc_drafts=False, inc_hidden=False, entity_id=None,
{filterstr}
""".format(filterstr=filterstr))

with db.engine.connect() as connection:
result = connection.execute(query, filter_data)
count = result.fetchone()[0]
result = connection.execute(query, filter_data)
count = result.fetchone()[0]
order_by_clause = str()

if sort == "popularity":
Expand Down Expand Up @@ -469,33 +451,66 @@ def list_reviews(*, inc_drafts=False, inc_hidden=False, entity_id=None,
filter_data["limit"] = limit
filter_data["offset"] = offset

with db.engine.connect() as connection:
results = connection.execute(query, filter_data)
rows = results.fetchall()
rows = [dict(row) for row in rows]
# Organise last revision info in reviews
if rows:
for row in rows:
row["rating"] = RATING_SCALE_1_5.get(row["rating"])
row["last_revision"] = {
"id": row.pop("latest_revision_id"),
"timestamp": row.pop("latest_revision_timestamp"),
"text": row["text"],
"rating": row["rating"],
"review_id": row["id"],
}
row["user"] = User({
"id": row["user_id"],
"display_name": row.pop("display_name"),
"show_gravatar": row.pop("show_gravatar"),
"is_blocked": row.pop("is_blocked"),
"musicbrainz_username": row.pop("musicbrainz_id"),
"email": row.pop("email"),
"created": row.pop("user_created"),
})
results = connection.execute(query, filter_data)
rows = results.fetchall()
rows = [dict(row) for row in rows]

# Organise last revision info in reviews
if rows:
for row in rows:
row["rating"] = RATING_SCALE_1_5.get(row["rating"])
row["last_revision"] = {
"id": row.pop("latest_revision_id"),
"timestamp": row.pop("latest_revision_timestamp"),
"text": row["text"],
"rating": row["rating"],
"review_id": row["id"],
}
row["user"] = User({
"id": row["user_id"],
"display_name": row.pop("display_name"),
"show_gravatar": row.pop("show_gravatar"),
"is_blocked": row.pop("is_blocked"),
"musicbrainz_username": row.pop("musicbrainz_id"),
"email": row.pop("email"),
"created": row.pop("user_created"),
})

return rows, count


def list_reviews(*, inc_drafts=False, inc_hidden=False, entity_id=None, entity_type=None,
license_id=None, user_id=None, language=None, exclude=None,
sort=None, limit=20, offset=None):
"""Get a list of reviews.
This function provides several filters that can be used to select a subset of reviews.
Args:
entity_id (uuid): ID of the entity that has been reviewed.
entity_type (str): Type of the entity that has been reviewed.
user_id (uuid): ID of the author.
sort (str): Order of the returned reviews. Can be either "popularity" (order by difference in +/- votes),
or "published_on" (order by publish time), or "random" (order randomly).
limit (int): Maximum number of reviews to return.
offset (int): Offset that can be used in conjunction with the limit.
language (str): Language code of reviews.
license_id (str): License ID that reviews are associated with.
inc_drafts (bool): True if reviews marked as drafts should be included, False if not.
inc_hidden (bool): True if reviews marked as hidden should be included, False if not.
exclude (list): List of reviews (their IDs) to exclude from results.
Returns:
Tuple with two values:
1. list of reviews as dictionaries,
2. total number of reviews that match the specified filters.
"""
with db.engine.connect() as connection:
return get_reviews_list(connection, inc_drafts=inc_drafts, inc_hidden=inc_hidden, entity_id=entity_id,
entity_type=entity_type, license_id=license_id, user_id=user_id,
language=language, exclude=exclude, sort=sort, limit=limit, offset=offset)


def get_popular(limit=None):
"""Get a list of popular reviews.
Expand Down Expand Up @@ -613,6 +628,20 @@ def delete(review_id):
db_avg_rating.update(review["entity_id"], review["entity_type"])


def get_distinct_entities(connection):
"""
helper function for distinct_entities() that extends support for execution within a transaction by directly receiving the
connection object
"""
query = sqlalchemy.text("""
SELECT DISTINCT entity_id
FROM review
""")

results = connection.execute(query)
return {row[0] for row in results.fetchall()}


def distinct_entities():
"""Get a set of ID(s) of entities reviewed.
Expand All @@ -623,12 +652,9 @@ def distinct_entities():
# function assumes that IDs are unique between entity types. But it would
# be better to remove that assumption before we support reviewing entities
# from other sources (like BookBrainz).

with db.engine.connect() as connection:
results = connection.execute(sqlalchemy.text("""
SELECT DISTINCT entity_id
FROM review
"""))
return {row[0] for row in results.fetchall()}
return get_distinct_entities(connection)


def reviewed_entities(*, entity_ids, entity_type):
Expand Down
36 changes: 23 additions & 13 deletions critiquebrainz/db/users.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,28 @@ def get_many_by_mb_username(usernames):
return users


def get_user_by_id(connection, user_id):
"""
helper function for get_by_id() that extends support for execution within a transaction by directly receiving the
connection object
"""
query = sqlalchemy.text("""
SELECT {columns}
FROM "user"
WHERE id = :user_id
""".format(columns=','.join(USER_GET_COLUMNS)))

result = connection.execute(query, {
"user_id": user_id
})
row = result.fetchone()
if not row:
return None
row = dict(row)
row['musicbrainz_username'] = row.pop('musicbrainz_id')
return row


def get_by_id(user_id):
"""Get user from user_id (UUID).
Expand All @@ -104,19 +126,7 @@ def get_by_id(user_id):
}
"""
with db.engine.connect() as connection:
result = connection.execute(sqlalchemy.text("""
SELECT {columns}
FROM "user"
WHERE id = :user_id
""".format(columns=','.join(USER_GET_COLUMNS))), {
"user_id": user_id
})
row = result.fetchone()
if not row:
return None
row = dict(row)
row['musicbrainz_username'] = row.pop('musicbrainz_id')
return row
return get_user_by_id(connection, user_id)


def create(**user_data):
Expand Down

0 comments on commit 3a9c3b3

Please sign in to comment.