Skip to content

Commit

Permalink
Merge 7283215 into a14ddd0
Browse files Browse the repository at this point in the history
  • Loading branch information
idomic committed Feb 19, 2022
2 parents a14ddd0 + 7283215 commit 01e4901
Show file tree
Hide file tree
Showing 4 changed files with 239 additions and 32 deletions.
10 changes: 9 additions & 1 deletion doc/community/user-stats.rst
@@ -1,5 +1,5 @@
User Statistics
==========
===============

As an open source project, we collect anonymous usage statistics to prioritize and find product gaps.
This is optional and may be turned off by changing the configuration file:
Expand All @@ -12,3 +12,11 @@ The data we collect is limited to:
- A generated UUID, randomized when the initial install takes place, no personal or any identifiable information.
- Environment variables: The OS architecture Ploomber is used in (Python version etc.)
- Information about the different product phases: installation, API calls and errors.

Version updates
---------------
If there's an outdated version, ploomber will alert it through the console every second day in a non-invasive way.
You can stop this checks for instance if you're running in production and you've locked versions.
The check can be turned off by changing the configuration file:
inside ~/.ploomber/stats/config.yaml
Change version_check_enabled to False.
1 change: 0 additions & 1 deletion src/ploomber/cli/install.py
Expand Up @@ -443,7 +443,6 @@ def _try_conda_install_and_lock_dev(cmdr, pkg_manager, env_name, use_lock):

def _next_steps(cmdr, cmd_activate):
cmdr.success('Next steps')

message = f'$ {cmd_activate}\n' if cmd_activate else ''
cmdr.print((f'{message}$ ploomber build'))
cmdr.success()
Expand Down
139 changes: 109 additions & 30 deletions src/ploomber/telemetry/telemetry.py
Expand Up @@ -31,8 +31,10 @@

import datetime
import http.client as httplib
import json
import warnings

import click
import posthog
import yaml
import os
Expand Down Expand Up @@ -252,23 +254,15 @@ def check_uid():
Checks if local user id exists as a uid file, creates if not.
"""
uid_path = Path(check_dir_exist(CONF_DIR), 'uid.yaml')
if not uid_path.exists(): # Create - doesn't exist
conf = read_conf_file(uid_path) # file already exist due to version check
if 'uid' not in conf.keys():
uid = str(uuid.uuid4())
try: # Create for future runs
with uid_path.open("w") as file:
yaml.dump({"uid": uid}, file)
res = write_conf_file(uid_path, {"uid": uid}, error=True)
if res:
return f"NO_UID {res}"
else:
return uid
except Exception as e:
warnings.warn(f"ERROR: Can't write UID file: {e}")
return f"NO_UID {e}"
else: # read and return uid
try:
with uid_path.open("r") as file:
uid_dict = yaml.safe_load(file)
return uid_dict['uid']
except Exception as e:
warnings.warn(f"Error: Can't read UID file: {e}")
return f"NO_UID {e}"
return conf.get('uid', "NO_UID")


def check_stats_enabled():
Expand All @@ -284,21 +278,11 @@ def check_stats_enabled():
# Check if local config exists
config_path = Path(check_dir_exist(CONF_DIR), 'config.yaml')
if not config_path.exists():
try: # Create for future runs
with config_path.open("w") as file:
yaml.dump({"stats_enabled": True}, file)
return True
except Exception as e:
warnings.warn(f"ERROR: Can't write to config file: {e}")
return True
write_conf_file(config_path, {"stats_enabled": True})
return True
else: # read and return config
try:
with config_path.open("r") as file:
conf = yaml.safe_load(file)
return conf['stats_enabled']
except Exception as e:
warnings.warn(f"Error: Can't read config file {e}")
return True
conf = read_conf_file(config_path)
return conf.get('stats_enabled', True)


def check_first_time_usage():
Expand All @@ -308,7 +292,97 @@ def check_first_time_usage():
"""
config_path = Path(check_dir_exist(CONF_DIR), 'config.yaml')
uid_path = Path(check_dir_exist(CONF_DIR), 'uid.yaml')
return not uid_path.exists() and config_path.exists()
uid_conf = read_conf_file(uid_path)
return config_path.exists() and 'uid' not in uid_conf.keys()


def get_latest_version():
"""
The function checks for the latest available ploomber version
uid file doesn't exist.
"""
conn = httplib.HTTPSConnection('pypi.org', timeout=1)
try:
conn.request("GET", "/pypi/ploomber/json")
content = conn.getresponse().read()
data = json.loads(content)
latest = data['info']['version']
return latest
except Exception:
return __version__
finally:
conn.close()


def read_conf_file(conf_path):
try:
with conf_path.open("r") as file:
conf = yaml.safe_load(file)
return conf
except Exception as e:
warnings.warn(f"Error: Can't read config file {e}")
return {}


def write_conf_file(conf_path, to_write, error=None):
try: # Create for future runs
with conf_path.open("w") as file:
yaml.dump(to_write, file)
except Exception as e:
warnings.warn(f"ERROR: Can't write to config file: {e}")
if error:
return e


def check_version():
"""
The function checks if the user runs the latest version
This check will be skipped if the version_check_enabled is set to False
If it's not the latest, notifies the user and saves the metadata to conf
Alerting every 2 days on stale versions
"""
# Read conf file
today = datetime.datetime.now()
config_path = Path(check_dir_exist(CONF_DIR), 'config.yaml')
conf = read_conf_file(config_path)

version_path = Path(check_dir_exist(CONF_DIR), 'uid.yaml')
# Update version conf if not there
if not version_path.exists():
version = {'last_version_check': today}
else:
version = read_conf_file(version_path)
if 'last_version_check' not in version.keys():
version['last_version_check'] = today

write_conf_file(version_path, version)

# Check if the flag was disabled
if conf and 'version_check_enabled' in conf.keys() \
and not conf['version_check_enabled']:
return

# If latest version, do nothing
latest = get_latest_version()

if __version__ == latest:
return

# Check if we already notified in the last 2 days
last_message = version['last_version_check']
diff = (today - last_message).days
if diff < 2:
return

click.secho(
f"There's a new Ploomber version available ({latest}), "
f"you're running {__version__}. To upgrade: "
"pip install ploomber --upgrade",
fg='yellow')

# Update latest check date
version['last_version_check'] = today
write_conf_file(version_path, version)


def _get_telemetry_info():
Expand All @@ -320,7 +394,11 @@ def _get_telemetry_info():
# Check if telemetry is enabled, if not skip, else check for uid
telemetry_enabled = check_stats_enabled()

# Check latest version
check_version()

if telemetry_enabled:

# Check first time install
is_install = check_first_time_usage()

Expand Down Expand Up @@ -420,6 +498,7 @@ def log_api(action, client_time=None, total_runtime=None, metadata=None):
def log_call(action, payload=False):
"""Runs a function and logs it
"""

def _log_call(func):
@wraps(func)
def wrapper(*args, **kwargs):
Expand Down
121 changes: 121 additions & 0 deletions tests/telemetry/test_telemetry.py
@@ -1,10 +1,13 @@
import datetime
import pathlib
import sys
from unittest.mock import Mock, call
from pathlib import Path

import pytest
import yaml

import ploomber
from ploomber.telemetry import telemetry
from ploomber.telemetry.validate_inputs import str_param, opt_str_param
from ploomber.cli import plot, install, build, interact, task, report, status
Expand Down Expand Up @@ -374,6 +377,15 @@ def test_is_online():
assert telemetry.is_online()


def test_is_online_timeout():
# Check the total run time is less than 1.5 secs
start_time = datetime.datetime.now()
telemetry.is_online()
end_time = datetime.datetime.now()
total_runtime = end_time - start_time
assert total_runtime < datetime.timedelta(milliseconds=1500)


def test_parse_dag_products(monkeypatch):
product = '/ml-basic/output/get.parquet'
dag = telemetry.clean_tasks_upstream_products(product)
Expand Down Expand Up @@ -462,6 +474,115 @@ def test_validate_entries(monkeypatch):
assert res == (event_id, uid, action, client_time, elapsed_time)


def test_conf_file_after_version_check(tmp_directory, monkeypatch):
version_path = Path('stats') / 'uid.yaml'
write_to_conf_file(tmp_directory=tmp_directory,
monkeypatch=monkeypatch,
last_check='2022-01-20 10:51:41.082376')
uid_content = version_path.read_text()
uid_content += 'uid: some_user_id\n'
version_path.write_text(uid_content)

# Test that conf file has all required fields
telemetry.check_version()
with version_path.open("r") as file:
conf = yaml.safe_load(file)
assert 'uid' in conf.keys()
assert len(conf.keys()) == 2


def test_get_version_timeout():
# Check the total run time is less than 1.5 secs
start_time = datetime.datetime.now()
telemetry.get_latest_version()
end_time = datetime.datetime.now()
total_runtime = end_time - start_time
assert total_runtime < datetime.timedelta(milliseconds=1500)


def test_get_latest_version(monkeypatch):
is_latest = telemetry.get_latest_version()
assert isinstance(is_latest, str)
version_index = [i for i, ltr in enumerate(is_latest) if ltr == '.']
assert len(version_index) >= 1

# Mock version and the conf, check it produces the same version
mock_httplib = Mock()
mock_httplib.HTTPSConnection().request.side_effect = Exception
monkeypatch.setattr(telemetry, 'httplib', mock_httplib)
is_latest = telemetry.get_latest_version()
assert is_latest == ploomber.__version__


def write_to_conf_file(tmp_directory, monkeypatch, last_check):
stats = Path('stats')
stats.mkdir()
conf_path = stats / 'config.yaml'
version_path = stats / 'uid.yaml'
monkeypatch.setattr(telemetry, 'DEFAULT_HOME_DIR', '.')
conf_path.write_text("version_check_enabled: True\n")
version_path.write_text(f"last_version_check: {last_check}\n")


def test_version_skips_when_updated(tmp_directory, capsys, monkeypatch):
# Path conf file
monkeypatch.setattr(telemetry, '__version__', '0.14.8')
mock_version = Mock()
mock_version.return_value = '0.14.8'
monkeypatch.setattr(telemetry, 'get_latest_version', mock_version)

write_to_conf_file(
tmp_directory=tmp_directory,
monkeypatch=monkeypatch,
last_check='2022-01-20 10:51:41.082376') # version='0.14.8',

# Test no warning when same version encountered
telemetry.check_version()
captured = capsys.readouterr()
assert "Ploomber version" not in captured.out


def test_user_output_on_different_versions(tmp_directory, capsys, monkeypatch):
mock_version = Mock()
monkeypatch.setattr(telemetry, 'get_latest_version', mock_version)
write_to_conf_file(tmp_directory=tmp_directory,
monkeypatch=monkeypatch,
last_check='2022-01-20 10:51:41.082376')
mock_version.return_value = '0.14.0'

# Check now that the date is different there is an upgrade warning
telemetry.check_version()
captured = capsys.readouterr()
assert "Ploomber version" in captured.out


def test_no_output_latest_version(tmp_directory, capsys, monkeypatch):
# The file's date is today now, no error should be raised
write_to_conf_file(tmp_directory=tmp_directory,
monkeypatch=monkeypatch,
last_check=datetime.datetime.now())
telemetry.check_version()
captured = capsys.readouterr()
assert "Ploomber version" not in captured.out


def test_output_on_date_diff(tmp_directory, capsys, monkeypatch):
# Warning should be caught since the date and version are off
write_to_conf_file(tmp_directory=tmp_directory,
monkeypatch=monkeypatch,
last_check='2022-01-20 10:51:41.082376')
version_path = Path('stats') / 'uid.yaml'
telemetry.check_version()
captured = capsys.readouterr()
assert "Ploomber version" in captured.out

# Check the conf file was updated
with version_path.open("r") as file:
version = yaml.safe_load(file)
diff = (datetime.datetime.now() - version['last_version_check']).days
assert diff == 0


def test_python_major_version():
version = telemetry.python_version()
major = version.split(".")[0]
Expand Down

0 comments on commit 01e4901

Please sign in to comment.