diff --git a/.bumpversion.cfg b/.bumpversion.cfg index c446dd1..4c8be79 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.2.0 +current_version = 0.2.4 [bumpversion:file:setup.py] diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..fb45bc5 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,6 @@ +[run] +branch = True +omit = whatstk/tests/* + +[report] +fail_under=80 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 7869fa7..d5e8f01 100755 --- a/.gitignore +++ b/.gitignore @@ -255,9 +255,8 @@ examples #tox stuff tox.ini -.coveragerc +#.coveragerc requirements-flake.txt -requirements-test.txt setup.cfg testreport.html testreport.xml @@ -269,4 +268,5 @@ py37 *.ipynb .pypirc -learn/ \ No newline at end of file +learn/ +assets/style.css \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 2cf0feb..cc9c670 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,18 +1,23 @@ dist: xenial language: python python: -- 3.7.2 -- 3.8 -- 3.5 + - 3.7.2 + - 3.8 +before_install: + - pip install -r requirements-test.txt install: -- pip install . + - ls -l + - pip uninstall whatstk + - pip install -e . #pip install -r requirements.txt script: -- pytest + - pip freeze | grep whatstk + - pytest --cov-report term --cov=whatstk tests/ +after_success: + - codecov # submit coverage deploy: provider: pypi - user: lucasrodes - password: - secure: fqmlRL4Cbg8p+OolNSZW84I6iiIXKvrEcH5w2WSaQ1D+qO8b0+xMgLttypQ3vf+hFFc0K6HXHg9HNONFx8yP0/GNAKEl6gwk2ld3+JTM/rKAceVsoZlq6PwnoQqiityDGZHIg8pXVbKdccGQwWh1jvsMNQG3ACFpNZKHYjjESMSDtib9DsvqwdfwbUrjaM7Ucdvmw+ABeD4yDKgG1bU9pMnE+zdQe+9EsmFYY+K5FEdY6vre76BUadTl1MUn9mnUGi7bJHY+QXFxu23X5mvo7fPyII06Ss8lFcsJI/EjZlemr+aYTOXwKgUhfC+Rf3DGcEIRFNuPrq4WMUI7ox+ehE64L3Eved4jIlI1U+hWHitWT6FX+1xdg67lVPQqhbJN4vhhjJEz1+n55BkADp+zQ2UB/IfZbNgBNFtNf+NQYi87d2zNss+FP1Pg1qI0LsqwfroiLNfh2HBCMl6G5tjlU0OBbbiVsLxHxxk9i0uuDcp6FbCPwWZ/bUlj2EP+HPpuCYlzUQjySe+1T0WoGDx9ltXCQzHxiRPlU4i4q7EguMTLBG4vmA5HtYyOqEk0s5z5nsggYAu2ZTx5r3jmsQHElls5de96z4xDhNLMiz5IDngO/9niR65NhWEJXY8kEWubh+mQHvVDGl2rFC38ZwoGbs2+5Uq+Gzgp1b+byOoEc4E= + user: $USER_PYPI + password: $PWD_PYPI on: - tags: true \ No newline at end of file + tags: true diff --git a/MANIFEST.in b/MANIFEST.in index e432b3d..b3098f4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,8 @@ include *.md include LICENSE include requirements.txt +include requirements-test.txt +include .coveragerc recursive-include altair *.py *.json *.ipynb *.html global-exclude *.py[co] __pycache__ \ No newline at end of file diff --git a/README.md b/README.md index ae3765f..bbe3430 100755 --- a/README.md +++ b/README.md @@ -1,11 +1,15 @@ # [whatstk](http://lucasrodes.github.io/whatstk) -![Package version](https://img.shields.io/badge/whatstk-v0.2.0-teal.svg?style=for-the-badge&color=25D366&logo=whatsapp) +![Package version](https://img.shields.io/badge/whatstk-v0.2.4-teal.svg?style=for-the-badge&color=25D366&logo=whatsapp) [![Build Status](https://travis-ci.com/lucasrodes/whatstk.svg?branch=develop)](https://travis-ci.com/lucasrodes/whatstk) -[![Python 3.6](https://img.shields.io/badge/python-3.5|3.7|3.8-blue.svg)](https://www.python.org/downloads/release/python-3/) +[![codecov](https://codecov.io/gh/lucasrodes/whatstk/branch/master/graph/badge.svg)](https://codecov.io/gh/lucasrodes/whatstk) +[![Python 3.6](https://img.shields.io/badge/python-3.7|3.8-blue.svg)](https://www.python.org/downloads/release/python-3/) [![Documentation](https://img.shields.io/badge/documentation-royalblue.svg)](docs/index.md) -[![GitHub license](https://img.shields.io/github/license/lucasrodes/whatstk.svg)](https://github.com/lucasrodes/whatstk/blob/master/LICENSE) +[![GitHub +license](https://img.shields.io/github/license/lucasrodes/whatstk.svg)](https://github.com/lucasrodes/whatstk/blob/master/LICENSE) + + > [Get the Desktop App](https://lcsrg.me/whatstk-gui) @@ -28,7 +32,7 @@ Make sure to first obtain the chat to be analyzed. Export it as a `txt` file usi Check more on how-to use it in the [docs](docs/index.md) -#### Obtain a dataframe from your chat log file +### Obtain a dataframe from your chat log file Load your chat using the object `WhatsAppChat`. Example below we use chat [example.txt](chats/example.txt) @@ -68,7 +72,7 @@ header format. In our example, it would be: `hformat = '%d.%m.%y, %H:%M - %name: _Note 2: If your chat uses 12h clock, it may not work as expected. If it is your case, please report it in the issues section._ -#### Plot the cumulative messages sent by day +### Plot the cumulative messages sent by day Once you have your `WhatsAppChat` object, you can easily get the number of interventions per user per, say, `day` using the method `interventions()` with `date_mode` argument set to `'day'`. With this, some minor processing, `plotly` and `vis` method from `whatstk.plot` you can get really insightful plots. @@ -86,7 +90,7 @@ plot(vis(counts_cumsum, 'cumulative number of messages sent per day')) ![](assets/example1.png) -### What's the header? +## What's the header? The chat file syntax can differ between devices, OS and language settings, which makes it hard some times to correctly parse the data and make WhatsTK work correctly. The header appears for each message sent in the chat. It contains a timestamp and the name of the user that sent the message. @@ -149,6 +153,21 @@ header unit. +## Known issues +- 12h clock header format is not supported. + ## Contribute + We are very open to have collaborators. You can freely fork and issue a pull request with your updates! -For other issues/bugs/suggestions, please report it as an issue or [text me](mailto:lucasrg@kth.se). \ No newline at end of file +For other issues/bugs/suggestions, please report it as an issue or [text me](mailto:lucasrg@kth.se). + +###  Pull Requests +Make sure to test your code before issuing a pull request: + +``` +py.test --cov-report term --cov=whatstk tests/ +``` + +_Note 1: Use `--html=testreport.html --cov-report html` to generate HTML reports._ + +However, pull requests will trigger the Travis CI pipeline, which will run the tests as well. \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 9cb0221..915f4de 100644 --- a/docs/index.md +++ b/docs/index.md @@ -17,7 +17,6 @@ * [parse\_chat](#.whatstk.utils.parser.parse_chat) * [remove\_alerts\_from\_df](#.whatstk.utils.parser.remove_alerts_from_df) * [whatstk.utils.exceptions](#.whatstk.utils.exceptions) - * [InterventionModeError](#.whatstk.utils.exceptions.InterventionModeError) * [whatstk.objects](#.whatstk.objects) * [WhatsAppChat](#.whatstk.objects.WhatsAppChat) * [\_\_init\_\_](#.whatstk.objects.WhatsAppChat.__init__) @@ -120,7 +119,7 @@ The unit of time can be chosen by means of argument `date_mode`. **Raises**: -- `whatstk.exceptions.InterventionModeError` - if invalid mode is chosen. +- `ValueError` - if invalid mode is chosen. ## whatstk.core @@ -254,15 +253,6 @@ Tries to get rid of alert/notification messages Library exceptions. - -### InterventionModeError - -```python -class InterventionModeError(Exception) -``` - -Raised when a non-implemented mode is selected. - ## whatstk.objects @@ -338,6 +328,7 @@ Get length of DataFrame #### shape ```python + | @property | def WhatsAppChat.shape() ``` diff --git a/make-docs.sh b/make-docs.sh index 4198a64..a6aff6a 100644 --- a/make-docs.sh +++ b/make-docs.sh @@ -1,5 +1,7 @@ -# Make sure to have pydoc-markdown:develop installed +# Make sure to have pydoc-markdown (develop branch) installed # pip install git+https://github.com/NiklasRosenstein/pydoc-markdown.git@develop +# +# Then simply execute `sh make-docs.sh` # Script to make documentation mkdir -p docs diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000..0a362a8 --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,4 @@ +pytest>=5.4.1 +pytest-cov>=2.8.1 +coverage>=4.5.1 +codecov>=2.0.22 diff --git a/requirements.txt b/requirements.txt index 35d6621..f88b994 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -pandas -numpy -plotly \ No newline at end of file +pandas==1.0.3 +numpy==1.18.2 +plotly==4.5.0 \ No newline at end of file diff --git a/setup.py b/setup.py index b21b91a..7ead3e3 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ setup( name='whatstk', - version="0.2.0", + version="0.2.4", description="Parser and analytics tools for WhatsApp group chats", long_description=long_description, long_description_content_type='text/markdown', diff --git a/tests/chats/example.txt b/tests/chats/example_1.txt similarity index 100% rename from tests/chats/example.txt rename to tests/chats/example_1.txt diff --git a/tests/chats/example_2.txt b/tests/chats/example_2.txt new file mode 100644 index 0000000..d11a89c --- /dev/null +++ b/tests/chats/example_2.txt @@ -0,0 +1,21 @@ +[2016/04/15 15:04] You created group “Sample Group” +[2016/08/06 13:18] Messages you send to this group are now secured with end-to-end encryption. Tap for more info. +[2016/08/06 13:23] Ash Ketchum: Hey guys! +[2016/08/06 13:25] Brock: Hey Ash, good to have a common group! +[2016/08/06 13:30] Misty: Hey guys! Long time haven't heard anything from you +[2016/08/06 13:45] Ash Ketchum: Indeed. I think having a whatsapp group nowadays is a good idea +[2016/08/06 14:30] Misty: Definetly +[2016/08/06 17:25] Brock: I totally agree +[2016/08/07 11:45] Prof. Oak: Kids, shall I design a smart poke-ball? +[2016/08/07 18:45] Ash Ketchum: I don't mind Prof. I quitted capturing pokemon. +[2016/08/07 19:30] Misty: Was a great time, but had enough also. +[2016/08/07 23:25] Brock: Guys, I am still in the first gym. No one is playing pokemon, they went crazy with pokemon Go. +[2016/08/10 09:45] Jessie & James: Hey, thanks for adding us. Wanna meet soon? Just for the old times. +[2016/08/10 11:25] Raichu: I am in! +[2016/08/10 13:23] Ash Ketchum: FFS, no way, Pikachu did you evolve? +[2016/08/10 15:23] Raichu: Yes... Weird to have a different body! +[2016/08/11 19:30] Misty: Gotta see that. +[2016/09/11 20:25] Meowth: Hey people, I was on holidays in Sinnoh. Crazy region. +[2016/10/31 11:45] Prof. Oak: Smart-pokeball is created. +[2016/10/31 12:23] Wobbuffet: Wo-bbu-ffet + diff --git a/tests/test_analysis.py b/tests/test_analysis.py new file mode 100644 index 0000000..dbb3da9 --- /dev/null +++ b/tests/test_analysis.py @@ -0,0 +1,175 @@ +from whatstk.analysis import interventions +from whatstk.objects import WhatsAppChat +import pandas as pd +import pytest + + +def test_interventions_date(): + + filename = 'tests/chats/example_1.txt' + chat = WhatsAppChat.from_txt(filename) + counts = interventions(chat, date_mode='date', msg_length=False) + + assert(isinstance(counts, pd.DataFrame)) + # Asswert chat df and counts df have same users + assert(set(chat.users) == set(counts.columns)) + + # Assert chat df and counts df have same date window + assert(chat.df.index.max().date() == counts.index.max().date()) + assert(chat.df.index.min().date() == counts.index.min().date()) + + +def test_interventions_date_msg_length(): + + filename = 'tests/chats/example_1.txt' + chat = WhatsAppChat.from_txt(filename) + counts = interventions(chat, date_mode='date', msg_length=True) + + assert(isinstance(counts, pd.DataFrame)) + # Asswert chat df and counts df have same users + assert(set(chat.users) == set(counts.columns)) + + # Assert chat df and counts df have same date window + assert(chat.df.index.max().date() == counts.index.max().date()) + assert(chat.df.index.min().date() == counts.index.min().date()) + + +def test_interventions_hour(): + + filename = 'tests/chats/example_1.txt' + chat = WhatsAppChat.from_txt(filename) + counts = interventions(chat, date_mode='hour', msg_length=False) + + assert(isinstance(counts, pd.DataFrame)) + # Asswert chat df and counts df have same users + assert(set(chat.users) == set(counts.columns)) + + # Check range hours + assert(counts.index.max() == chat.df.index.hour.max()) + assert(counts.index.min() == chat.df.index.hour.min()) + + + +def test_interventions_hour_msg_length(): + + filename = 'tests/chats/example_1.txt' + chat = WhatsAppChat.from_txt(filename) + counts = interventions(chat, date_mode='hour', msg_length=True) + + assert(isinstance(counts, pd.DataFrame)) + # Asswert chat df and counts df have same users + assert(set(chat.users) == set(counts.columns)) + + # Check range hours + assert(counts.index.max() == chat.df.index.hour.max()) + assert(counts.index.min() == chat.df.index.hour.min()) + + +def test_interventions_month(): + + filename = 'tests/chats/example_1.txt' + chat = WhatsAppChat.from_txt(filename) + counts = interventions(chat, date_mode='month', msg_length=False) + + assert(isinstance(counts, pd.DataFrame)) + # Asswert chat df and counts df have same users + assert(set(chat.users) == set(counts.columns)) + + # Check range hours + assert(counts.index.max() == chat.df.index.month.max()) + assert(counts.index.min() == chat.df.index.month.min()) + + + +def test_interventions_month_msg_length(): + + filename = 'tests/chats/example_1.txt' + chat = WhatsAppChat.from_txt(filename) + counts = interventions(chat, date_mode='month', msg_length=False) + + assert(isinstance(counts, pd.DataFrame)) + # Asswert chat df and counts df have same users + assert(set(chat.users) == set(counts.columns)) + + # Check range hours + assert(counts.index.max() == chat.df.index.month.max()) + assert(counts.index.min() == chat.df.index.month.min()) + + +def test_interventions_weekday(): + + filename = 'tests/chats/example_1.txt' + chat = WhatsAppChat.from_txt(filename) + counts = interventions(chat, date_mode='weekday', msg_length=False) + + assert(isinstance(counts, pd.DataFrame)) + # Asswert chat df and counts df have same users + assert(set(chat.users) == set(counts.columns)) + + # Check range hours + assert(counts.index.max() == chat.df.index.weekday.max()) + assert(counts.index.min() == chat.df.index.weekday.min()) + + + +def test_interventions_weekday_msg_length(): + + filename = 'tests/chats/example_1.txt' + chat = WhatsAppChat.from_txt(filename) + counts = interventions(chat, date_mode='weekday', msg_length=True) + + assert(isinstance(counts, pd.DataFrame)) + # Asswert chat df and counts df have same users + assert(set(chat.users) == set(counts.columns)) + + # Check range hours + assert(counts.index.max() == chat.df.index.weekday.max()) + assert(counts.index.min() == chat.df.index.weekday.min()) + + + +def test_interventions_hourweekday(): + + filename = 'tests/chats/example_1.txt' + chat = WhatsAppChat.from_txt(filename) + counts = interventions(chat, date_mode='hourweekday', msg_length=False) + + assert(isinstance(counts, pd.DataFrame)) + # Asswert chat df and counts df have same users + assert(set(chat.users) == set(counts.columns)) + + # Check range days + assert(counts.index.levels[0].max() == chat.df.index.weekday.max()) + assert(counts.index.levels[0].min() == chat.df.index.weekday.min()) + + # Check range hours + assert(counts.index.levels[1].max() == chat.df.index.hour.max()) + assert(counts.index.levels[1].min() == chat.df.index.hour.min()) + + +def test_interventions_hourweekday_msg_length(): + + filename = 'tests/chats/example_1.txt' + chat = WhatsAppChat.from_txt(filename) + counts = interventions(chat, date_mode='hourweekday', msg_length=True) + + assert(isinstance(counts, pd.DataFrame)) + # Asswert chat df and counts df have same users + assert(set(chat.users) == set(counts.columns)) + + # Check range days + assert(counts.index.levels[0].max() == chat.df.index.weekday.max()) + assert(counts.index.levels[0].min() == chat.df.index.weekday.min()) + + # Check range hours + assert(counts.index.levels[1].max() == chat.df.index.hour.max()) + assert(counts.index.levels[1].min() == chat.df.index.hour.min()) + +def test_interventions_error(): + + filename = 'tests/chats/example_1.txt' + chat = WhatsAppChat.from_txt(filename) + with pytest.raises(ValueError): + counts = interventions(chat, date_mode='error', msg_length=False) + with pytest.raises(ValueError): + counts = interventions(chat, date_mode='error', msg_length=True) diff --git a/tests/test_core.py b/tests/test_core.py new file mode 100644 index 0000000..5ea6e7d --- /dev/null +++ b/tests/test_core.py @@ -0,0 +1,5 @@ +from whatstk.core import WhatsAppChat +from whatstk.core import interventions + +def test(): + assert(True) \ No newline at end of file diff --git a/tests/test_objects.py b/tests/test_objects.py index 441bb03..942af9e 100644 --- a/tests/test_objects.py +++ b/tests/test_objects.py @@ -1,17 +1,42 @@ from whatstk.objects import WhatsAppChat import pandas as pd +import pytest def test_object_auto(): - filename = './chats/example.txt' + filename = 'tests/chats/example_1.txt' + chat = WhatsAppChat.from_txt(filename) + assert(isinstance(chat.df, pd.DataFrame)) + + filename = 'tests/chats/example_2.txt' chat = WhatsAppChat.from_txt(filename) assert(isinstance(chat.df, pd.DataFrame)) def test_object_hformat(): - filename = './chats/example.txt' + filename = 'tests/chats/example_1.txt' hformat = '%d.%m.%y, %H:%M - %name:' chat = WhatsAppChat.from_txt(filename) assert(isinstance(chat.df, pd.DataFrame)) + filename = 'tests/chats/example_2.txt' + hformat = '[%y/%m/%d %H:%M] %name:' + chat = WhatsAppChat.from_txt(filename) + assert(isinstance(chat.df, pd.DataFrame)) + + +def test_object_error(): + filename = 'tests/chats/example_1.txt' + with pytest.raises(ValueError): + chat = WhatsAppChat.from_txt(filename, auto_header=False) + +def test_object_len_shape(): + filename = 'tests/chats/example_1.txt' + hformat = '%d.%m.%y, %H:%M - %name:' + chat = WhatsAppChat.from_txt(filename) + l = len(chat) + assert(isinstance(l, int)) + s = chat.shape + assert(isinstance(s, tuple)) + assert(len(s)==2) diff --git a/tests/test_plot.py b/tests/test_plot.py new file mode 100644 index 0000000..1a2c98f --- /dev/null +++ b/tests/test_plot.py @@ -0,0 +1,13 @@ +from whatstk.plot import vis +from whatstk.objects import WhatsAppChat +from whatstk.analysis import interventions + + +def test_vis(): + filename = 'tests/chats/example_1.txt' + chat = WhatsAppChat.from_txt(filename) + counts = interventions(chat=chat, date_mode='date', msg_length=False) + counts_cumsum = counts.cumsum() + fig = vis(counts_cumsum, 'cumulative number of messages sent per day') + assert isinstance(fig, dict) + assert ('data' in fig and 'layout' in fig) diff --git a/whatstk/__init__.py b/whatstk/__init__.py index 8d8b3b1..f5fc41d 100755 --- a/whatstk/__init__.py +++ b/whatstk/__init__.py @@ -4,7 +4,7 @@ name = "whatstk" -__version__ = "0.2.0" +__version__ = "0.2.4" __all__ = [ 'WhatsAppChat', diff --git a/whatstk/analysis/base.py b/whatstk/analysis/base.py index fbfd3b3..6f7303b 100644 --- a/whatstk/analysis/base.py +++ b/whatstk/analysis/base.py @@ -32,7 +32,7 @@ def interventions(chat, date_mode='date', msg_length=False): pandas.DataFrame: DataFrame with shape NxU, where N: number of time-slots and U: number of users. Raises: - whatstk.exceptions.InterventionModeError: if invalid mode is chosen. + ValueError: if invalid mode is chosen. """ if date_mode == 'date': @@ -47,10 +47,13 @@ def interventions(chat, date_mode='date', msg_length=False): elif date_mode == 'month': n_interventions = _interventions(chat, [chat.df.index.month], msg_length) else: - raise InterventionModeError("Mode {} is not implemented. Valid modes are 'date', 'hour', 'weekday', " + raise ValueError("Mode {} is not implemented. Valid modes are 'date', 'hour', 'weekday', " "'hourweekday' and 'month".format(date_mode)) - n_interventions.index.name = date_mode + if date_mode == 'hourweekday': + n_interventions.index = n_interventions.index.set_names(['weekday', 'hour']) + else: + n_interventions.index.name = date_mode n_interventions.columns = n_interventions.columns.get_level_values('username') return n_interventions diff --git a/whatstk/objects.py b/whatstk/objects.py index dad60dd..bf45c89 100644 --- a/whatstk/objects.py +++ b/whatstk/objects.py @@ -1,6 +1,5 @@ from whatstk.utils.parser import generate_regex, parse_chat, remove_alerts_from_df from whatstk.utils.auto_header import extract_header_from_text -from whatstk.utils.exceptions import InterventionModeError @@ -100,6 +99,7 @@ def __len__(self): """ return len(self.df) + @property def shape(self): """Get shape of DataFrame-formatted chat. diff --git a/whatstk/utils/auto_header.py b/whatstk/utils/auto_header.py index d7adbd3..9e1e2fa 100644 --- a/whatstk/utils/auto_header.py +++ b/whatstk/utils/auto_header.py @@ -205,9 +205,9 @@ def _extract_header_format_from_components(elements_list, template_list): template = template_list[0] hour_code = "%H" # day - day_pos = ((dates_df.max()>27) & (dates_df.max()<32)).argmax() + day_pos = ((dates_df.max()>27) & (dates_df.max()<32)).idxmax() # year - year_pos = dates_df.std().argmin() + year_pos = dates_df.std().idxmin() # month and hour positions = (dates_df.max() < 13) # & (dates_df.max() > 11) positions = positions.index[positions].tolist() diff --git a/whatstk/utils/exceptions.py b/whatstk/utils/exceptions.py index 9e0f417..25cf1eb 100644 --- a/whatstk/utils/exceptions.py +++ b/whatstk/utils/exceptions.py @@ -1,6 +1 @@ """Library exceptions.""" - -class InterventionModeError(Exception): - """Raised when a non-implemented mode is selected.""" - - pass \ No newline at end of file