Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version update - New branch #558

Merged
merged 16 commits into from
Feb 19, 2022
10 changes: 9 additions & 1 deletion doc/community/user-stats.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
User Statistics
==========
===============

As an open source project, we collect anonymous usage statistics to prioritize and find product gaps.
This is optional and may be turned off by changing the configuration file:
Expand All @@ -12,3 +12,11 @@ The data we collect is limited to:
- A generated UUID, randomized when the initial install takes place, no personal or any identifiable information.
- Environment variables: The OS architecture Ploomber is used in (Python version etc.)
- Information about the different product phases: installation, API calls and errors.

Version updates
---------------
If there's an outdated version, ploomber will alert it through the console every second day in a non-invasive way.
You can stop this checks for instance if you're running in production and you've locked versions.
The check can be turned off by changing the configuration file:
inside ~/.ploomber/stats/config.yaml
Change version_check_enabled to False.
3 changes: 1 addition & 2 deletions src/ploomber/cli/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,8 +330,7 @@ def _next_steps(cmdr, cmd_activate, start_time):
total_runtime=str(end_time - start_time))

cmdr.success('Next steps')
cmdr.print((f'$ {cmd_activate}\n'
'$ ploomber build'))
cmdr.print((f'$ {cmd_activate}\n' '$ ploomber build'))
cmdr.success()


Expand Down
140 changes: 108 additions & 32 deletions src/ploomber/telemetry/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@

import datetime
import http.client as httplib
import json
import warnings

import click
import posthog
import yaml
import os
Expand Down Expand Up @@ -252,23 +254,15 @@ def check_uid():
Checks if local user id exists as a uid file, creates if not.
"""
uid_path = Path(check_dir_exist(CONF_DIR), 'uid.yaml')
if not uid_path.exists(): # Create - doesn't exist
conf = read_conf_file(uid_path) # file already exist due to version check
if 'uid' not in conf.keys():
uid = str(uuid.uuid4())
try: # Create for future runs
with uid_path.open("w") as file:
yaml.dump({"uid": uid}, file)
res = write_conf_file(uid_path, {"uid": uid}, error=True)
if res:
return f"NO_UID {res}"
else:
return uid
except Exception as e:
warnings.warn(f"ERROR: Can't write UID file: {e}")
return f"NO_UID {e}"
else: # read and return uid
try:
with uid_path.open("r") as file:
uid_dict = yaml.safe_load(file)
return uid_dict['uid']
except Exception as e:
warnings.warn(f"Error: Can't read UID file: {e}")
return f"NO_UID {e}"
return conf.get('uid', "NO_UID")


def check_stats_enabled():
Expand All @@ -284,21 +278,11 @@ def check_stats_enabled():
# Check if local config exists
config_path = Path(check_dir_exist(CONF_DIR), 'config.yaml')
if not config_path.exists():
try: # Create for future runs
with config_path.open("w") as file:
yaml.dump({"stats_enabled": True}, file)
return True
except Exception as e:
warnings.warn(f"ERROR: Can't write to config file: {e}")
return True
write_conf_file(config_path, {"stats_enabled": True})
return True
else: # read and return config
try:
with config_path.open("r") as file:
conf = yaml.safe_load(file)
return conf['stats_enabled']
except Exception as e:
warnings.warn(f"Error: Can't read config file {e}")
return True
conf = read_conf_file(config_path)
return conf.get('stats_enabled', True)


def check_first_time_usage():
Expand All @@ -308,7 +292,97 @@ def check_first_time_usage():
"""
config_path = Path(check_dir_exist(CONF_DIR), 'config.yaml')
uid_path = Path(check_dir_exist(CONF_DIR), 'uid.yaml')
return not uid_path.exists() and config_path.exists()
uid_conf = read_conf_file(uid_path)
return config_path.exists() and 'uid' not in uid_conf.keys()


def get_latest_version():
"""
The function checks for the latest available ploomber version
uid file doesn't exist.
"""
conn = httplib.HTTPSConnection('pypi.org', timeout=1)
try:
conn.request("GET", "/pypi/ploomber/json")
content = conn.getresponse().read()
data = json.loads(content)
latest = data['info']['version']
return latest
except Exception:
return __version__
finally:
conn.close()


def read_conf_file(conf_path):
try:
with conf_path.open("r") as file:
conf = yaml.safe_load(file)
return conf
except Exception as e:
warnings.warn(f"Error: Can't read config file {e}")
return {}


def write_conf_file(conf_path, to_write, error=None):
idomic marked this conversation as resolved.
Show resolved Hide resolved
try: # Create for future runs
with conf_path.open("w") as file:
yaml.dump(to_write, file)
except Exception as e:
warnings.warn(f"ERROR: Can't write to config file: {e}")
if error:
return e


def check_version():
"""
The function checks if the user runs the latest version
This check will be skipped if the version_check_enabled is set to False
If it's not the latest, notifies the user and saves the metadata to conf
Alerting every 2 days on stale versions
"""
# Read conf file
today = datetime.datetime.now()
config_path = Path(check_dir_exist(CONF_DIR), 'config.yaml')
conf = read_conf_file(config_path)

version_path = Path(check_dir_exist(CONF_DIR), 'uid.yaml')
# Update version conf if not there
if not version_path.exists():
version = {'last_version_check': today}
else:
version = read_conf_file(version_path)
if 'last_version_check' not in version.keys():
version['last_version_check'] = today

write_conf_file(version_path, version)

# Check if the flag was disabled
if conf and 'version_check_enabled' in conf.keys() \
and not conf['version_check_enabled']:
return

# If latest version, do nothing
latest = get_latest_version()

if __version__ == latest:
return

# Check if we already notified in the last 2 days
last_message = version['last_version_check']
diff = (today - last_message).days
if diff < 2:
return

click.secho(
f"There's a new Ploomber version available ({latest}), "
f"you're running {__version__}. To upgrade: "
"pip install ploomber --upgrade",
fg='yellow')

# Update latest check date
version['last_version_check'] = today
write_conf_file(version_path, version)


def _get_telemetry_info():
Expand All @@ -320,7 +394,11 @@ def _get_telemetry_info():
# Check if telemetry is enabled, if not skip, else check for uid
telemetry_enabled = check_stats_enabled()

# Check latest version
check_version()

if telemetry_enabled:

# Check first time install
is_install = check_first_time_usage()

Expand Down Expand Up @@ -423,9 +501,7 @@ def log_api(action,
def log_exception(action):
"""Runs a function and logs exceptions, if any
"""

def _log_exceptions(func):

@wraps(func)
def wrapper(*args, **kwargs):
try:
Expand Down
125 changes: 122 additions & 3 deletions tests/telemetry/test_telemetry.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import datetime
import pathlib
import click
import sys
from unittest.mock import Mock
from pathlib import Path

import click
import pytest
import yaml

import ploomber
from ploomber.telemetry import telemetry
from ploomber.telemetry.validate_inputs import str_param, opt_str_param
from ploomber.cli import plot, install, build, interact, task, report, status
Expand Down Expand Up @@ -144,7 +147,6 @@ def test_pip_env(monkeypatch, inside_pip_env):
# Ref: https://stackoverflow.com/questions/43878953/how-does-one-detect-if-
# one-is-running-within-a-docker-container-within-python
def test_docker_env(monkeypatch):

def mock(input_path):
return 'dockerenv' in str(input_path)

Expand Down Expand Up @@ -375,6 +377,15 @@ def test_is_online():
assert telemetry.is_online()


def test_is_online_timeout():
# Check the total run time is less than 1.5 secs
start_time = datetime.datetime.now()
telemetry.is_online()
end_time = datetime.datetime.now()
total_runtime = end_time - start_time
assert total_runtime < datetime.timedelta(milliseconds=1500)


def test_parse_dag_products(monkeypatch):
product = '/ml-basic/output/get.parquet'
dag = telemetry.clean_tasks_upstream_products(product)
Expand All @@ -392,7 +403,6 @@ def test_parse_dag_products(monkeypatch):


def test_parse_dag(monkeypatch, tmp_directory):

def fn1(product):
pass

Expand Down Expand Up @@ -464,6 +474,115 @@ def test_validate_entries(monkeypatch):
assert res == (event_id, uid, action, client_time, elapsed_time)


def test_conf_file_after_version_check(tmp_directory, monkeypatch):
version_path = Path('stats') / 'uid.yaml'
write_to_conf_file(tmp_directory=tmp_directory,
monkeypatch=monkeypatch,
last_check='2022-01-20 10:51:41.082376')
uid_content = version_path.read_text()
uid_content += 'uid: some_user_id\n'
version_path.write_text(uid_content)

# Test that conf file has all required fields
telemetry.check_version()
with version_path.open("r") as file:
conf = yaml.safe_load(file)
assert 'uid' in conf.keys()
assert len(conf.keys()) == 2


def test_get_version_timeout():
# Check the total run time is less than 1.5 secs
start_time = datetime.datetime.now()
telemetry.get_latest_version()
end_time = datetime.datetime.now()
total_runtime = end_time - start_time
assert total_runtime < datetime.timedelta(milliseconds=1500)


def test_get_latest_version(monkeypatch):
is_latest = telemetry.get_latest_version()
assert isinstance(is_latest, str)
version_index = [i for i, ltr in enumerate(is_latest) if ltr == '.']
assert len(version_index) >= 1

# Mock version and the conf, check it produces the same version
mock_httplib = Mock()
mock_httplib.HTTPSConnection().request.side_effect = Exception
monkeypatch.setattr(telemetry, 'httplib', mock_httplib)
is_latest = telemetry.get_latest_version()
assert is_latest == ploomber.__version__


def write_to_conf_file(tmp_directory, monkeypatch, last_check):
stats = Path('stats')
stats.mkdir()
conf_path = stats / 'config.yaml'
version_path = stats / 'uid.yaml'
monkeypatch.setattr(telemetry, 'DEFAULT_HOME_DIR', '.')
conf_path.write_text("version_check_enabled: True\n")
version_path.write_text(f"last_version_check: {last_check}\n")


def test_version_skips_when_updated(tmp_directory, capsys, monkeypatch):
# Path conf file
monkeypatch.setattr(telemetry, '__version__', '0.14.8')
idomic marked this conversation as resolved.
Show resolved Hide resolved
mock_version = Mock()
mock_version.return_value = '0.14.8'
monkeypatch.setattr(telemetry, 'get_latest_version', mock_version)

write_to_conf_file(
tmp_directory=tmp_directory,
monkeypatch=monkeypatch,
last_check='2022-01-20 10:51:41.082376') # version='0.14.8',

# Test no warning when same version encountered
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove commented code

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wait, what is this testing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to test no echo was printed when it's the same version, for some reason the patch I tried on the ploomber.version didn't work, not sure why.

telemetry.check_version()
captured = capsys.readouterr()
assert "Ploomber version" not in captured.out


def test_user_output_on_different_versions(tmp_directory, capsys, monkeypatch):
mock_version = Mock()
monkeypatch.setattr(telemetry, 'get_latest_version', mock_version)
write_to_conf_file(tmp_directory=tmp_directory,
monkeypatch=monkeypatch,
last_check='2022-01-20 10:51:41.082376')
mock_version.return_value = '0.14.0'

# Check now that the date is different there is an upgrade warning
telemetry.check_version()
captured = capsys.readouterr()
assert "Ploomber version" in captured.out


def test_no_output_latest_version(tmp_directory, capsys, monkeypatch):
# The file's date is today now, no error should be raised
write_to_conf_file(tmp_directory=tmp_directory,
monkeypatch=monkeypatch,
last_check=datetime.datetime.now())
telemetry.check_version()
captured = capsys.readouterr()
assert "Ploomber version" not in captured.out


def test_output_on_date_diff(tmp_directory, capsys, monkeypatch):
# Warning should be caught since the date and version are off
write_to_conf_file(tmp_directory=tmp_directory,
monkeypatch=monkeypatch,
last_check='2022-01-20 10:51:41.082376')
version_path = Path('stats') / 'uid.yaml'
telemetry.check_version()
captured = capsys.readouterr()
assert "Ploomber version" in captured.out

# Check the conf file was updated
with version_path.open("r") as file:
version = yaml.safe_load(file)
diff = (datetime.datetime.now() - version['last_version_check']).days
assert diff == 0


def test_python_major_version():
version = telemetry.python_version()
major = version.split(".")[0]
Expand Down