Skip to content

Commit

Permalink
take utku's changes
Browse files Browse the repository at this point in the history
  • Loading branch information
willronchetti committed Jan 30, 2024
2 parents 58e0b66 + 3e3efa7 commit c9f8007
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "encoded-core"
version = "0.6.0.1b0"
version = "0.6.0.2b3" # TODO: To become 0.7.0
description = "Core data models for Park Lab ENCODE based projects"
authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
license = "MIT"
Expand Down
131 changes: 131 additions & 0 deletions src/encoded_core/file_views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import datetime
import json
import os
import pytz
import requests
import structlog
from typing import Any, Dict, List

Expand Down Expand Up @@ -156,6 +159,12 @@ def download(context, request):
user_props = session_properties(context, request)
except Exception as e:
user_props = {'error': str(e)}

user_uuid = user_props.get('details', {}).get('uuid', None)
user_groups = user_props.get('details', {}).get('groups', None)
if user_groups:
user_groups.sort()

tracking_values = {'user_agent': request.user_agent, 'remote_ip': request.remote_addr,
'user_uuid': user_props.get('details', {}).get('uuid', 'anonymous'),
'request_path': request.path_info, 'request_headers': str(dict(request.headers))}
Expand All @@ -172,6 +181,11 @@ def download(context, request):
# search to find the "right" file and redirect to a download link for that one
properties = context.upgrade_properties()
file_format = get_item_or_none(request, properties.get('file_format'), 'file-formats')
lab_or_submission_center = None
if properties.get('lab') is not None:
lab_or_submission_center = get_item_or_none(request, properties.get('lab'), 'labs')
elif properties.get('submission_centers') is not None and len(properties.get('submission_centers')) > 0:
lab_or_submission_center = get_item_or_none(request, properties.get('submission_centers')[0], 'submission-centers')
_filename = None
if request.subpath:
_filename, = request.subpath
Expand All @@ -196,6 +210,23 @@ def download(context, request):
tracking_values['file_format'] = file_format.get('file_format')
tracking_values['filename'] = filename

# Calculate bytes downloaded from Range header
file_size_downloaded = properties.get('file_size', 0)
if request.range:
file_size_downloaded = 0
# Assume range unit is bytes
if hasattr(request.range, "ranges"):
for (range_start, range_end) in request.range.ranges:
file_size_downloaded += (
(range_end or properties.get('file_size', 0)) -
(range_start or 0)
)
else:
file_size_downloaded = (
(request.range.end or properties.get('file_size', 0)) -
(request.range.start or 0)
)

if not external:
external = context.build_external_creds(request.registry, context.uuid, properties)
if external.get('service') == 's3':
Expand Down Expand Up @@ -237,6 +268,16 @@ def download(context, request):
# except Exception as e:
# log.error('Cannot create TrackingItem on download of %s' % context.uuid, error=str(e))

# Analytics Stuff
ga_config = request.registry.settings.get('ga_config')
# TODO: replace below line with 4DN/SMaHT compatible type
file_experiment_type = None #get_file_experiment_type(request, context, properties)
file_at_id = context.jsonld_id(request)

if ga_config:
update_google_analytics(context, request, ga_config, filename, file_size_downloaded, file_at_id, lab_or_submission_center,
user_uuid, user_groups, file_experiment_type, properties.get('file_type'))

if asbool(request.params.get('soft')):
expires = int(parse_qs(urlparse(location).query)['Expires'][0])
return {
Expand Down Expand Up @@ -269,6 +310,96 @@ def download(context, request):
raise HTTPTemporaryRedirect(location=location)


def update_google_analytics(context, request, ga_config, filename, file_size_downloaded,
file_at_id, lab_or_submission_center, user_uuid, user_groups, file_experiment_type, file_type='other'):
""" Helper for @@download that updates GA in response to a download.
"""
registry = request.registry
ga4_secret = registry.settings.get('ga4.secret')
if not ga4_secret:
raise Exception("No valid GA4 api secret found")

ga_cid = request.cookies.get("clientIdentifier")
if not ga_cid: # Fallback, potentially can stop working as GA is updated
ga_cid = request.cookies.get("_ga")
if ga_cid:
ga_cid = ".".join(ga_cid.split(".")[2:])

ga_tid_mapping = ga_config["hostnameTrackerIDMapping"].get(request.host,
ga_config["hostnameTrackerIDMapping"].get("default"))
ga_tid = ga_tid_mapping[1] if isinstance(ga_tid_mapping, list) and len(ga_tid_mapping) > 1 else None

if ga_tid is None:
raise Exception("No valid tracker id found in ga_config.json > hostnameTrackerIDMapping")

file_extension = os.path.splitext(filename)[1][1:]
item_types = [ty for ty in reversed(context.jsonld_type()[:-1])]
lab_or_submission_center_title = lab_or_submission_center.get("display_title") if lab_or_submission_center is not None else "None"

ga_payload = {
"client_id": ga_cid,
"timestamp_micros": str(int(datetime.datetime.now().timestamp() * 1000000)),
"non_personalized_ads": False,
"events": [
{
"name": "purchase",
"params": {
#"debug_mode": 1,
"name": filename,
"source": "Serverside File Download",
"action": "Range Query" if request.range else "File Download",
"file_name": filename,
"file_extension": file_extension,
"link_url": request.url,
"file_size": file_size_downloaded,
"downloads": 0 if request.range else 1,
"experiment_type": file_experiment_type or "None",
"lab": lab_or_submission_center_title,
# Product Category from @type, e.g. "File/FileProcessed"
"file_classification": "/".join(item_types),
"file_type": file_type,
"items": [
{
"item_id": file_at_id,
"item_name": filename,
"item_category": item_types[0] if len(item_types) >= 1 else "Unknown",
"item_category2": item_types[1] if len(item_types) >= 2 else "Unknown",
"item_brand": lab_or_submission_center_title,
"item_variant": file_type,
"quantity": 1
}
]
}
}
]
}

if user_uuid:
ga_payload['events'][0]['params']['user_uuid'] = user_uuid
ga_payload['user_id'] = user_uuid

if user_groups:
groups_json = json.dumps(user_groups, separators=(',', ':')) # Compcact JSON; aligns w. what's passed from JS.
ga_payload['events'][0]['params']['user_groups'] = groups_json

# Catch error here
try:
def remove_none_fields(obj):
if isinstance(obj, dict):
return {k: remove_none_fields(v) for k, v in obj.items() if v is not None}
elif isinstance(obj, (list, tuple)):
return [remove_none_fields(item) for item in obj if item is not None]
else:
return obj

_ = requests.post(
url="https://www.google-analytics.com/mp/collect?measurement_id={m_tid}&api_secret={api_secret}".format(m_tid=ga_tid, api_secret=ga4_secret),
data=json.dumps(remove_none_fields(ga_payload)),
verify=True)
except Exception as e:
log.error('Exception encountered posting to GA: %s' % e)


def validate_file_format_validity_for_file_type(context, request):
"""Check if the specified file format (e.g. fastq) is allowed for the file type (e.g. FileFastq).
"""
Expand Down

0 comments on commit c9f8007

Please sign in to comment.