diff --git a/Pipfile b/Pipfile index 8275fff..ea149f9 100644 --- a/Pipfile +++ b/Pipfile @@ -7,6 +7,8 @@ verify_ssl = true autopep8 = "*" yapf = "*" sphinx = "*" +pytest = "*" +twine = "*" [packages] certifi = "*" @@ -46,6 +48,8 @@ Werkzeug = "*" flask-cors = "*" twint = {editable = true,git = "https://github.com/twintproject/twint.git"} nltk = "*" +setuptools = "*" +yapf = "*" [requires] -python_version = "3.7" +python_version = "3.6" diff --git a/Pipfile.lock b/Pipfile.lock index 424395b..48b5dee 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,11 +1,11 @@ { "_meta": { "hash": { - "sha256": "9fb0e10b3fd2ef225ea66b85e0eee9bc2d0bf47dc24322ebc28d066f003f92ef" + "sha256": "b2689adf5ee4eeef3aa7a0642af1af465111e5286ea5721d236e971d476984fc" }, "pipfile-spec": 6, "requires": { - "python_version": "3.7" + "python_version": "3.6" }, "sources": [ { @@ -279,6 +279,13 @@ "index": "pypi", "version": "==2.8" }, + "idna-ssl": { + "hashes": [ + "sha256:a933e3bb13da54383f9e8f35dc4f9cb9eb9b3b78c6b36f311254d6d0d92c6c7c" + ], + "markers": "python_version < '3.7'", + "version": "==1.1.0" + }, "imageio": { "hashes": [ "sha256:1a2bbbb7cd38161340fa3b14d806dfbf914abf3ee6fd4592af2afb87d049f209", @@ -720,6 +727,24 @@ "git": "https://github.com/twintproject/twint.git", "ref": "c5c6f1d60554cd0ee64ba223850b070553a17e74" }, + "typing": { + "hashes": [ + "sha256:38566c558a0a94d6531012c8e917b1b8518a41e418f7f15f00e129cc80162ad3", + "sha256:53765ec4f83a2b720214727e319607879fec4acde22c4fbb54fa2604e79e44ce", + "sha256:84698954b4e6719e912ef9a42a2431407fe3755590831699debda6fba92aac55" + ], + "markers": "python_version < '3.7'", + "version": "==3.7.4" + }, + "typing-extensions": { + "hashes": [ + "sha256:2ed632b30bb54fc3941c382decfd0ee4148f5c591651c9272473fea2c6397d95", + "sha256:b1edbbf0652660e32ae780ac9433f4231e7339c7f9a8057d0f042fcbcea49b87", + "sha256:d8179012ec2c620d3791ca6fe2bf7979d979acdbef1fca0bc56b37411db682ed" + ], + "markers": "python_version < '3.7'", + "version": "==3.7.4" + }, "urllib3": { "hashes": [ "sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1", @@ -736,6 +761,14 @@ "index": "pypi", "version": "==0.15.4" }, + "yapf": { + "hashes": [ + "sha256:34f6f80c446dcb2c44bd644c4037a2024b6645e293a4c9c4521983dd0bb247a1", + "sha256:613deba14233623ff3432d9d5032631b5f600be97b39f66932cbe67648bfa8ea" + ], + "index": "pypi", + "version": "==0.27.0" + }, "yarl": { "hashes": [ "sha256:024ecdc12bc02b321bc66b41327f930d1c2c543fa9a561b39861da9388ba7aa9", @@ -761,6 +794,20 @@ ], "version": "==0.7.12" }, + "atomicwrites": { + "hashes": [ + "sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4", + "sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6" + ], + "version": "==1.3.0" + }, + "attrs": { + "hashes": [ + "sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79", + "sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399" + ], + "version": "==19.1.0" + }, "autopep8": { "hashes": [ "sha256:4d8eec30cc81bc5617dbf1218201d770dc35629363547f17577c61683ccfb3ee" @@ -775,6 +822,13 @@ ], "version": "==2.7.0" }, + "bleach": { + "hashes": [ + "sha256:213336e49e102af26d9cde77dd2d0397afabc5a6bf2fed985dc35b5d1e285a16", + "sha256:3fdf7f77adcf649c9911387df51254b813185e32b2c6619f690b593a617e19fa" + ], + "version": "==3.1.0" + }, "certifi": { "hashes": [ "sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939", @@ -814,6 +868,13 @@ ], "version": "==1.1.0" }, + "importlib-metadata": { + "hashes": [ + "sha256:6dfd58dfe281e8d240937776065dd3624ad5469c835248219bd16cf2e12dbeb7", + "sha256:cb6ee23b46173539939964df59d3d72c3e0c1b5d54b84f1d8a7e912fe43612db" + ], + "version": "==0.18" + }, "jinja2": { "hashes": [ "sha256:065c4f02ebe7f7cf559e49ee5a95fb800a9e4528727aec6f24402a5374c65013", @@ -856,6 +917,13 @@ "index": "pypi", "version": "==1.1.1" }, + "more-itertools": { + "hashes": [ + "sha256:3ad685ff8512bf6dc5a8b82ebf73543999b657eded8c11803d9ba6b648986f4d", + "sha256:8bb43d1f51ecef60d81854af61a3a880555a14643691cc4b64a6ee269c78f09a" + ], + "version": "==7.1.0" + }, "packaging": { "hashes": [ "sha256:0c98a5d0be38ed775798ece1b9727178c4469d9c3b4ada66e8e6b7849f8732af", @@ -863,6 +931,27 @@ ], "version": "==19.0" }, + "pkginfo": { + "hashes": [ + "sha256:7424f2c8511c186cd5424bbf31045b77435b37a8d604990b79d4e70d741148bb", + "sha256:a6d9e40ca61ad3ebd0b72fbadd4fba16e4c0e4df0428c041e01e06eb6ee71f32" + ], + "version": "==1.5.0.1" + }, + "pluggy": { + "hashes": [ + "sha256:0825a152ac059776623854c1543d65a4ad408eb3d33ee114dff91e57ec6ae6fc", + "sha256:b9817417e95936bf75d85d3f8767f7df6cdde751fc40aed3bb3074cbcb77757c" + ], + "version": "==0.12.0" + }, + "py": { + "hashes": [ + "sha256:64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa", + "sha256:dc639b046a6e2cff5bbe40194ad65936d6ba360b52b3c3fe1d08a82dd50b5e53" + ], + "version": "==1.8.0" + }, "pycodestyle": { "hashes": [ "sha256:95a2219d12372f05704562a14ec30bc76b05a5b297b21a5dfe3f6fac3491ae56", @@ -886,6 +975,14 @@ "index": "pypi", "version": "==2.4.0" }, + "pytest": { + "hashes": [ + "sha256:2878de8ae1c79a62c012da6186b88ff0562ea96ce29c4208d2a9b11d9f607df1", + "sha256:95b700cf21ed5b7e91bce7a6b5a573b2e3ef7b3643d00f681d8f9c4672f9fbdf" + ], + "index": "pypi", + "version": "==5.0.0" + }, "pytz": { "hashes": [ "sha256:303879e36b721603cc54604edcac9d20401bdbe31e1e4fdee5b9f98d5d31dfda", @@ -894,6 +991,13 @@ "index": "pypi", "version": "==2019.1" }, + "readme-renderer": { + "hashes": [ + "sha256:bb16f55b259f27f75f640acf5e00cf897845a8b3e4731b5c1a436e4b8529202f", + "sha256:c8532b79afc0375a85f10433eca157d6b50f7d6990f337fa498c96cd4bfc203d" + ], + "version": "==24.0" + }, "requests": { "hashes": [ "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", @@ -902,6 +1006,13 @@ "index": "pypi", "version": "==2.22.0" }, + "requests-toolbelt": { + "hashes": [ + "sha256:380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f", + "sha256:968089d4584ad4ad7c171454f0a5c6dac23971e9472521ea3b6d49d610aa6fc0" + ], + "version": "==0.9.1" + }, "six": { "hashes": [ "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", @@ -966,6 +1077,21 @@ ], "version": "==1.1.3" }, + "tqdm": { + "hashes": [ + "sha256:14a285392c32b6f8222ecfbcd217838f88e11630affe9006cd0e94c7eff3cb61", + "sha256:25d4c0ea02a305a688e7e9c2cdc8f862f989ef2a4701ab28ee963295f5b109ab" + ], + "version": "==4.32.2" + }, + "twine": { + "hashes": [ + "sha256:0fb0bfa3df4f62076cab5def36b1a71a2e4acb4d1fa5c97475b048117b1a6446", + "sha256:d6c29c933ecfc74e9b1d9fa13aa1f87c5d5770e119f5a4ce032092f0ff5b14dc" + ], + "index": "pypi", + "version": "==1.13.0" + }, "urllib3": { "hashes": [ "sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1", @@ -974,6 +1100,20 @@ "index": "pypi", "version": "==1.25.3" }, + "wcwidth": { + "hashes": [ + "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e", + "sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c" + ], + "version": "==0.1.7" + }, + "webencodings": { + "hashes": [ + "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", + "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923" + ], + "version": "==0.5.1" + }, "yapf": { "hashes": [ "sha256:34f6f80c446dcb2c44bd644c4037a2024b6645e293a4c9c4521983dd0bb247a1", @@ -981,6 +1121,13 @@ ], "index": "pypi", "version": "==0.27.0" + }, + "zipp": { + "hashes": [ + "sha256:8c1019c6aad13642199fbe458275ad6a84907634cc9f0989877ccc4a2840139d", + "sha256:ca943a7e809cc12257001ccfb99e3563da9af99d52f261725e96dfe0f9275bc3" + ], + "version": "==0.5.1" } } } diff --git a/README.md b/README.md index dfc0a6b..5f8b5a4 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,47 @@ A fake tweet screenshot looks very convincing, misleading the general public. Fo |:-------------------------:|:-------------------------:| |![alt text](https://i.imgur.com/gG1RYiR.png "Tweet 1") | ![alt text](https://i.imgur.com/eTKpOFY.png "Tweet 2")| -Verify Tweet attempts to resolve the problem by letting users upload such tweet screenshots and verify if the user actually tweeted or not. A combination of Image processing, Natural language processing as well as Twitter Search API makes this possible. Due to Twitter API rate limits, currently only tweets from last 7 days can be verified. +Verify Tweet attempts to resolve the problem by letting users upload such tweet screenshots and verify if the user actually tweeted or not. A combination of Image processing, Natural language processing as well as Twitter Search API makes this possible. + +## Installation + +### Prerequisites + +- Install [Tesseract-OCR](https://github.com/tesseract-ocr/tesseract/wiki#installation) and add to PATH. +- Install [ImageMagick](https://imagemagick.org/script/download.php) and add to PATH. +- Python >= 3.6 + +Installing via pip: + +```sh +pip install -U git+https://github.com/kamidipreetham/verifytweet.git@origin/master#egg=verifytweet +``` + +Or via pipenv: + +```sh +pipenv install -e git+https://github.com/kamidipreetham/verifytweet.git@origin/master#egg=verifytweet +``` + +## Usage + +Quickstart + +```sh +verifytweet -f +``` + +Help + +```sh +verifytweet --help +``` + +## License + +Verify Tweet is released under GNU Affero General Public License v3.0. ## Future features - [ ] Support for Image links -- [ ] Processing Status -- [ ] Support for Tweets with replies \ No newline at end of file +- [ ] Support for Tweets with replies diff --git a/requirements.txt b/requirements.txt index bfe1780..c873492 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,35 +1,92 @@ +aiodns==2.0.0 +aiohttp==3.5.4 +aiohttp-socks==0.2.2 +alabaster==0.7.12 +async-timeout==3.0.1 +atomicwrites==1.3.0 +attrs==19.1.0 autopep8==1.4.4 -certifi==2019.3.9 +Babel==2.7.0 +beautifulsoup4==4.7.1 +bleach==3.1.0 +cchardet==2.1.4 +certifi==2019.6.16 +cffi==1.12.3 chardet==3.0.4 Click==7.0 cycler==0.10.0 decorator==4.4.0 dnspython==1.16.0 +docutils==0.14 +elasticsearch==7.0.2 eventlet==0.25.0 +fake-useragent==0.1.11 Flask==1.0.3 +Flask-Cors==3.0.8 +geographiclib==1.49 +geopy==1.20.0 greenlet==0.4.15 gunicorn==19.9.0 idna==2.8 +idna-ssl==1.1.0 imageio==2.5.0 +imagesize==1.1.0 +importlib-metadata==0.18 itsdangerous==1.1.0 Jinja2==2.10.1 joblib==0.13.2 kiwisolver==1.1.0 MarkupSafe==1.1.1 monotonic==1.5 +more-itertools==7.1.0 +multidict==4.5.2 networkx==2.3 +nltk==3.4.3 numpy==1.16.4 +packaging==19.0 +pandas==0.24.2 Pillow==6.0.0 +pkginfo==1.5.0.1 +pluggy==0.12.0 +py==1.8.0 +pycares==3.0.0 pycodestyle==2.5.0 +pycparser==2.19 +Pygments==2.4.2 pyparsing==2.4.0 -pytesseract==0.2.6 +PySocks==1.7.0 +pytesseract==0.2.7 +pytest==5.0.0 python-dateutil==2.8.0 pytz==2019.1 PyWavelets==1.0.3 -regex +readme-renderer==24.0 +regex==2019.6.8 requests==2.22.0 +requests-toolbelt==0.9.1 +schedule==0.6.0 scikit-learn==0.21.2 scipy==1.3.0 six==1.12.0 +snowballstemmer==1.9.0 +soupsieve==1.9.2 +Sphinx==2.1.2 +sphinxcontrib-applehelp==1.0.1 +sphinxcontrib-devhelp==1.0.1 +sphinxcontrib-htmlhelp==1.0.2 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-qthelp==1.0.2 +sphinxcontrib-serializinghtml==1.1.3 +tqdm==4.32.2 +twine==1.13.0 +-e git+https://github.com/twintproject/twint.git@c5c6f1d60554cd0ee64ba223850b070553a17e74#egg=twint +typing==3.7.4 +typing-extensions==3.7.4 urllib3==1.25.3 +-e git+git@github.com:kamidipreetham/verifytweet.git@251b221ad452a80d54ef165b90b450a4710ac04d#egg=verifytweet +wcwidth==0.1.7 +webencodings==0.5.1 Werkzeug==0.15.4 +yapf==0.27.0 +yarl==1.3.0 +zipp==0.5.1 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f15bd04 --- /dev/null +++ b/setup.py @@ -0,0 +1,73 @@ +# Verify Tweet verifies tweets of a public user +# from tweet screenshots: real or generated from +# tweet generators. +# Copyright (C) 2019 Preetham Kamidi + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import io +import re + +from setuptools import find_packages +from setuptools import setup + +with io.open("README.md", "rt", encoding="utf8") as f: + readme = f.read() + +with io.open("verifytweet/__init__.py", "rt", encoding="utf8") as f: + version = re.search(r'__version__ = "(.*?)"', f.read()).group(1) + +setup( + name="verifytweet", + version=version, + url="https://preethamkamidi.com/projects/verify", + project_urls={ + "Documentation": "https://github.com/kamidipreetham/verifytweet", + "Code": "https://github.com/kamidipreetham/verifytweet", + "Issue tracker": + "https://github.com/kamidipreetham/verifytweet/issues", + }, + license="AGPLv3", + author="Preetham Kamidi", + author_email="contact@preethamkamidi.com", + description="A tool to verify Tweet screenshots", + long_description=readme, + long_description_content_type='text/markdown', + classifiers=[ + "Development Status :: 3 - Alpha", + "Environment :: Console", + "Intended Audience :: Information Technology", + "License :: OSI Approved :: GNU Affero General Public License v3", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + ], + packages=find_packages(), + include_package_data=True, + python_requires=">=3.6.*", + install_requires=[ + "click>=5.1", "Pillow==6.0.0", "pytesseract==0.2.6", + "requests==2.22.0", "scikit-learn==0.21.2","Werkzeug>=0.15", + "nltk>=3.4.3", "python-dateutil==2.8.0", + "twint @ git+https://github.com/twintproject/twint.git" + ], + dependency_links=[ + "" + ], + entry_points={ + "console_scripts": ["verifytweet = verifytweet.cli:run_as_command"] + }, +) \ No newline at end of file diff --git a/verifytweet/__init__.py b/verifytweet/__init__.py index e69de29..441ed22 100644 --- a/verifytweet/__init__.py +++ b/verifytweet/__init__.py @@ -0,0 +1,19 @@ +# Verify Tweet verifies tweets of a public user +# from tweet screenshots: real or generated from +# tweet generators. +# Copyright (C) 2019 Preetham Kamidi + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +__version__ = "0.4.0" \ No newline at end of file diff --git a/verifytweet/app.py b/verifytweet/app.py index 7584d44..3693425 100644 --- a/verifytweet/app.py +++ b/verifytweet/app.py @@ -79,8 +79,13 @@ def verify_tweet(): 'status': ResultStatus.MODULE_FAILURE.value, 'result': None }) - tweet_validity, controller_status = rest_controller.exec() + result, controller_status = rest_controller.exec() + if controller_status != ResultStatus.ALL_OKAY: + return jsonify({ + 'status': controller_status.value, + 'result': result + }) return jsonify({ 'status': controller_status.value, - 'result': tweet_validity + 'result': result.tweet }) diff --git a/verifytweet/cli.py b/verifytweet/cli.py new file mode 100644 index 0000000..9537499 --- /dev/null +++ b/verifytweet/cli.py @@ -0,0 +1,64 @@ +# Verify Tweet verifies tweets of a public user +# from tweet screenshots: real or generated from +# tweet generators. +# Copyright (C) 2019 Preetham Kamidi + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import os + +import click + +os.environ["VERIFYTWEET_RUN_FROM_CLI"] = "true" + +from .services import controller +from .config.settings import app_config +from .util.logging import logger +from .util.result import ResultStatus + + +@click.command() +@click.option("--filepath", + "-f", + required=True, + help="The filepath for the tweet screenshot image", + type=click.Path(exists=True, + dir_okay=False, + resolve_path=True, + readable=True)) +def run_as_command(filepath): + """Verifies tweet from given image. + + Verifies tweet from image given via file path + using a combination of image processing, text processing + as well as a search service. + + Args: + filepath: The filepath for the tweet screenshot image. + """ + + try: + verify_controller = controller.NonAPIApproach(filepath) + except Exception as e: + logger.exception(e) + tweet_obj, controller_status = verify_controller.exec() + if controller_status == ResultStatus.MODULE_FAILURE: + print(f"Something went wrong, Please try again!") + elif controller_status == ResultStatus.NO_RESULT: + print(f"Fake Tweet!") + else: + print(f"Verified Tweet!\n") + print( + f"**** Username: {tweet_obj.username} ****\n**** Tweet: {tweet_obj.tweet} ****\n**** Likes: {tweet_obj.likes_count} ****\n**** Retweets: {tweet_obj.retweets_count} ****\n**** Link: {tweet_obj.link} ****" + ) diff --git a/verifytweet/config/__init__.py b/verifytweet/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/verifytweet/config/settings.py b/verifytweet/config/settings.py index 06beb97..f4087ba 100644 --- a/verifytweet/config/settings.py +++ b/verifytweet/config/settings.py @@ -18,6 +18,7 @@ import os import multiprocessing +import tempfile def no_of_workers(): @@ -35,30 +36,45 @@ class Config(object): """Configuration for the app """ + IMAGEMAGICK_PATH = os.getenv('IMAGEMAGICK_PATH') if os.getenv( + 'IMAGEMAGICK_PATH') else "convert" + UPSCALE_RESOLUTION = "300x300" + FILE_DIRECTORY = tempfile.mkdtemp() + TWEET_MAX_STORE = 150 + + +class TwitterAPIConfig(Config): + """Twitter API Configuration + """ + + TWITTER_ACCESSTOKEN = os.getenv('TWITTER_ACCESSTOKEN') + TWITTER_HOSTNAME = "https://api.twitter.com" + TWITTER_APIVER = "1.1" + TWITTER_CONTEXT = "statuses/user_timeline.json" + TWEET_DATE_KEY = "created_at" + TWEET_USERNAME_KEY = "screen_name" + TWEET_COUNT = 150 + TWEET_COUNT_KEY = "count" + TWEET_MAX_OLD = 7 + TWEET_TEXT_KEY = "text" + SIMILARITY_THRESHOLD = 0.6 + + +class WebConfig(Config): + """Configuration for running as web service + """ + APP_HOST = os.getenv('APP_HOST') APP_PORT = os.getenv('APP_PORT') - TIMEOUT = os.getenv('TIMEOUT') - MAX_CONTENT_LENGTH_KEY = os.getenv('MAX_CONTENT_LENGTH_KEY') - MAX_CONTENT_LENGTH = int(os.getenv('MAX_CONTENT_LENGTH')) - TESSERACT_PATH = os.getenv('TESSERACT_PATH') - IMAGEMAGICK_PATH = os.getenv('IMAGEMAGICK_PATH') - UPSCALE_RESOLUTION = os.getenv('UPSCALE_RESOLUTION') + TIMEOUT = 2000 + MAX_CONTENT_LENGTH_KEY = "MAX_CONTENT_LENGTH" + MAX_CONTENT_LENGTH = 2097152 WORKER_COUNT = no_of_workers() - WORKER_CLASS = os.getenv('WORKER_CLASS') - FILE_DIRECTORY = os.getenv('FILE_DIRECTORY') - ALLOWED_EXTENSIONS = set(os.getenv('ALLOWED_EXTENSIONS').split(',')) - SIMILARITY_THRESHOLD = float(os.getenv('SIMILARITY_THRESHOLD')) - TWITTER_HOSTNAME = os.getenv('TWITTER_HOSTNAME') - TWITTER_APIVER = os.getenv('TWITTER_APIVER') - TWITTER_CONTEXT = os.getenv('TWITTER_CONTEXT') - TWITTER_ACCESSTOKEN = os.getenv('TWITTER_ACCESSTOKEN') - TWEET_COUNT = int(os.getenv('TWEET_COUNT')) - TWEET_DATE_KEY = os.getenv('TWEET_DATE_KEY') - TWEET_USERNAME_KEY = os.getenv('TWEET_USERNAME_KEY') - TWEET_COUNT_KEY = os.getenv('TWEET_COUNT_KEY') - TWEET_MAX_OLD = int(os.getenv('TWEET_MAX_OLD')) - TWEET_TEXT_KEY = os.getenv('TWEET_TEXT_KEY') - TWEET_MAX_STORE = int(os.getenv('TWEET_MAX_STORE')) + WORKER_CLASS = "eventlet" + ALLOWED_EXTENSIONS = set(["png", "jpg", "jpeg"]) + +run_method = "cli" if "VERIFYTWEET_RUN_FROM_CLI" in os.environ else "web" +configurations = {"web": WebConfig, "cli": Config} -app_config = Config() +app_config = configurations[run_method] diff --git a/verifytweet/services/__init__.py b/verifytweet/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/verifytweet/services/controller.py b/verifytweet/services/controller.py index 177c012..9c9830c 100644 --- a/verifytweet/services/controller.py +++ b/verifytweet/services/controller.py @@ -110,7 +110,7 @@ def exec(self): if search_status != ResultStatus.ALL_OKAY: return (None, search_status) - return (search_results[0].tweet, ResultStatus.ALL_OKAY) + return (search_results[0], ResultStatus.ALL_OKAY) def preprocess(file_path): @@ -133,4 +133,4 @@ def preprocess(file_path): if parser_status != ResultStatus.ALL_OKAY: return (None, parser_status) logger.info('Entities: ' + str(entities)) - return (entities, parser_status) \ No newline at end of file + return (entities, parser_status) diff --git a/verifytweet/services/image.py b/verifytweet/services/image.py index 3ff7dca..c69dc8c 100644 --- a/verifytweet/services/image.py +++ b/verifytweet/services/image.py @@ -45,6 +45,8 @@ def __init__(self, file_obj: FileStorage): if not isinstance(file_obj, FileStorage): raise TypeError( 'file obj must be type werkzeug.datastructures.FileStorage') + if not file_obj: + raise ValueError('file obj cannot be empty') self.file = file_obj def save_to_disk(self): @@ -99,7 +101,7 @@ def rescale(file_path): logger.info('Rescaling Image to 300 dpi') new_file_path = file_path.rsplit('.', 1)[0] + '.jpg' cmd = [ - app_config.IMAGEMAGICK_PATH, file_path, '-bordercolor', 'White', + 'convert', file_path, '-bordercolor', 'White', '-resample', app_config.UPSCALE_RESOLUTION, '-border', '10x10', '-alpha', 'off', new_file_path ] diff --git a/verifytweet/services/search.py b/verifytweet/services/search.py index 667a1e2..f2550da 100644 --- a/verifytweet/services/search.py +++ b/verifytweet/services/search.py @@ -149,5 +149,5 @@ def search(self): results = twint.output.tweets_object if not results: return (results, ResultStatus.NO_RESULT) - logger.info(f'Search results: {results}') + logger.info(f'Search results: {results}\n') return (results, ResultStatus.ALL_OKAY) diff --git a/verifytweet/services/text.py b/verifytweet/services/text.py index 2967862..31cfd42 100644 --- a/verifytweet/services/text.py +++ b/verifytweet/services/text.py @@ -30,8 +30,8 @@ from verifytweet.config.settings import app_config from verifytweet.util.result import ResultStatus -nltk.download('stopwords') -nltk.download('punkt') +nltk.download('stopwords', quiet=True) +nltk.download('punkt', quiet=True) count_vectorizer = CountVectorizer() stopwords = set(nltk.corpus.stopwords.words('english')) @@ -103,8 +103,7 @@ def clean_text(self): logger.exception(e) return (None, ResultStatus.MODULE_FAILURE) filtered_sentence = [w for w in word_tokens if not w in stopwords] - picked_words = random.sample(set(filtered_sentence), - min([len(filtered_sentence), 3])) + picked_words = filtered_sentence[0:min([len(filtered_sentence), 4])] tweet_snippet = " ".join(picked_words) if not tweet_snippet: return (tweet_snippet, ResultStatus.NO_RESULT) diff --git a/verifytweet/util/__init__.py b/verifytweet/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/verifytweet/util/logging.py b/verifytweet/util/logging.py index e3c77ff..4cb51ca 100644 --- a/verifytweet/util/logging.py +++ b/verifytweet/util/logging.py @@ -20,10 +20,10 @@ import sys logger = logging.getLogger() -logger.setLevel(logging.DEBUG) +logger.setLevel(logging.INFO) handler = logging.StreamHandler(sys.stdout) -handler.setLevel(logging.DEBUG) +handler.setLevel(logging.INFO) formatter = logging.Formatter(u'%(asctime)s -- %(levelname)s -- %(message)s') handler.setFormatter(formatter) diff --git a/wsgi.py b/wsgi.py index 169a487..627cdf2 100644 --- a/wsgi.py +++ b/wsgi.py @@ -24,8 +24,8 @@ import gunicorn.app.base from gunicorn.six import iteritems -from verifytweet.app import router from verifytweet.config.settings import app_config +from verifytweet.app import router class VerifyTweetApp(gunicorn.app.base.BaseApplication):