diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..d26401f --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,10 @@ +# Read the Docs configuration file + +version: 2 +sphinx: + configuration: docs/conf.py +formats: all +python: + version: 3.7 + install: + - requirements: docs/requirements.txt \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 229abd2..f69081e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,8 +11,11 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. # import os +import io +import re import sys -sys.path.insert(0, os.path.abspath('../..')) + +sys.path.insert(0, os.path.abspath('..')) sys.setrecursionlimit(1500) # -- Project information ----------------------------------------------------- @@ -21,8 +24,11 @@ copyright = '2019, Preetham Kamidi' author = 'Preetham Kamidi' +with io.open(os.path.join('../', 'verifytweet/__init__.py'), 'rt', encoding='utf8') as f: + version = re.search(r'__version__ = "(.*?)"', f.read()).group(1) + # The full version, including alpha/beta/rc tags -release = 'v0.3' +release = version # -- General configuration --------------------------------------------------- diff --git a/docs/index.rst b/docs/index.rst index 10225b5..e5ef4f0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -10,9 +10,10 @@ Welcome to verifytweet's documentation! :maxdepth: 2 :caption: Contents: -router + +controller =================== -.. automodule:: verifytweet.app +.. automodule:: verifytweet.services.controller :members: image service @@ -30,7 +31,7 @@ text service .. automodule:: verifytweet.services.text :members: -shared module: date checker +utility module: date checker ============================= .. automodule:: verifytweet.util.date_checker :members: diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..8ba2030 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,25 @@ +alabaster==0.7.12 +Babel==2.7.0 +certifi==2019.6.16 +chardet==3.0.4 +docutils==0.14 +idna==2.8 +imagesize==1.1.0 +Jinja2==2.10.1 +MarkupSafe==1.1.1 +packaging==19.0 +Pygments==2.4.2 +pyparsing==2.4.0 +pytz==2019.1 +requests==2.22.0 +six==1.12.0 +snowballstemmer==1.9.0 +Sphinx==2.1.2 +sphinx_bootstrap_theme +sphinxcontrib-applehelp==1.0.1 +sphinxcontrib-devhelp==1.0.1 +sphinxcontrib-htmlhelp==1.0.2 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-qthelp==1.0.2 +sphinxcontrib-serializinghtml==1.1.3 +urllib3==1.25.3 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c873492..05a741d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -83,7 +83,6 @@ twine==1.13.0 typing==3.7.4 typing-extensions==3.7.4 urllib3==1.25.3 --e git+git@github.com:kamidipreetham/verifytweet.git@251b221ad452a80d54ef165b90b450a4710ac04d#egg=verifytweet wcwidth==0.1.7 webencodings==0.5.1 Werkzeug==0.15.4 diff --git a/setup.py b/setup.py index f15bd04..758caec 100644 --- a/setup.py +++ b/setup.py @@ -60,13 +60,10 @@ python_requires=">=3.6.*", install_requires=[ "click>=5.1", "Pillow==6.0.0", "pytesseract==0.2.6", - "requests==2.22.0", "scikit-learn==0.21.2","Werkzeug>=0.15", - "nltk>=3.4.3", "python-dateutil==2.8.0", + "requests==2.22.0", "scikit-learn==0.21.2", "nltk>=3.4.3", + "python-dateutil==2.8.0", "twint @ git+https://github.com/twintproject/twint.git" ], - dependency_links=[ - "" - ], entry_points={ "console_scripts": ["verifytweet = verifytweet.cli:run_as_command"] }, diff --git a/verifytweet/__init__.py b/verifytweet/__init__.py index 441ed22..7601049 100644 --- a/verifytweet/__init__.py +++ b/verifytweet/__init__.py @@ -16,4 +16,4 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -__version__ = "0.4.0" \ No newline at end of file +__version__ = "0.4.1" \ No newline at end of file diff --git a/verifytweet/app.py b/verifytweet/app.py index 3693425..40046d9 100644 --- a/verifytweet/app.py +++ b/verifytweet/app.py @@ -23,6 +23,7 @@ import verifytweet.services.controller as controller import verifytweet.services.image as image_service +import verifytweet.util.uploader as image_uploader from verifytweet.util.logging import logger from verifytweet.config.settings import app_config @@ -70,8 +71,7 @@ def verify_tweet(): if not data_type or not request_image: return "Missing form fields", 400 try: - uploader = image_service.Uploader(request_image) - file_path = uploader.save_to_disk() + file_path = image_uploader.save_to_disk(request_image) rest_controller = controller.NonAPIApproach(file_path) except Exception as e: logger.exception(e) diff --git a/verifytweet/cli.py b/verifytweet/cli.py index 9537499..541adef 100644 --- a/verifytweet/cli.py +++ b/verifytweet/cli.py @@ -58,7 +58,7 @@ def run_as_command(filepath): elif controller_status == ResultStatus.NO_RESULT: print(f"Fake Tweet!") else: - print(f"Verified Tweet!\n") + print(f"\nVerified Tweet!") print( f"**** Username: {tweet_obj.username} ****\n**** Tweet: {tweet_obj.tweet} ****\n**** Likes: {tweet_obj.likes_count} ****\n**** Retweets: {tweet_obj.retweets_count} ****\n**** Link: {tweet_obj.link} ****" ) diff --git a/verifytweet/config/settings.py b/verifytweet/config/settings.py index f4087ba..edb8dbc 100644 --- a/verifytweet/config/settings.py +++ b/verifytweet/config/settings.py @@ -19,6 +19,7 @@ import os import multiprocessing import tempfile +import logging def no_of_workers(): @@ -41,6 +42,8 @@ class Config(object): UPSCALE_RESOLUTION = "300x300" FILE_DIRECTORY = tempfile.mkdtemp() TWEET_MAX_STORE = 150 + RUN_METHOD = "cli" + LOG_LEVEL = logging.INFO class TwitterAPIConfig(Config): @@ -75,6 +78,7 @@ class WebConfig(Config): run_method = "cli" if "VERIFYTWEET_RUN_FROM_CLI" in os.environ else "web" +Config.RUN_METHOD = run_method configurations = {"web": WebConfig, "cli": Config} app_config = configurations[run_method] diff --git a/verifytweet/services/controller.py b/verifytweet/services/controller.py index 9c9830c..6bf96eb 100644 --- a/verifytweet/services/controller.py +++ b/verifytweet/services/controller.py @@ -122,7 +122,7 @@ def preprocess(file_path): extracted_text, extractor_status = text_extractor.get_text() if extractor_status != ResultStatus.ALL_OKAY: return (None, extractor_status) - logger.info('Processed text: ' + extracted_text) + logger.debug('Processed text: ' + extracted_text) try: entity_parser = text_service.DataParser(extracted_text) @@ -132,5 +132,5 @@ def preprocess(file_path): entities, parser_status = entity_parser.get_entities() if parser_status != ResultStatus.ALL_OKAY: return (None, parser_status) - logger.info('Entities: ' + str(entities)) + logger.debug('Entities: ' + str(entities)) return (entities, parser_status) diff --git a/verifytweet/services/image.py b/verifytweet/services/image.py index c69dc8c..a98311c 100644 --- a/verifytweet/services/image.py +++ b/verifytweet/services/image.py @@ -16,57 +16,16 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import os -import uuid import subprocess -import pytesseract import PIL - -from werkzeug.utils import secure_filename -from werkzeug.datastructures import FileStorage +import pytesseract from verifytweet.util.logging import logger from verifytweet.config.settings import app_config from verifytweet.util.result import ResultStatus -class Uploader(object): - """Saves image file received from POST request. - - Saves image file to a temporary location received by - Flask after checking valid file types. - - Attributes: - file_obj: Image file, type: werkzeug.datastructures.FileStorage. - """ - - def __init__(self, file_obj: FileStorage): - if not isinstance(file_obj, FileStorage): - raise TypeError( - 'file obj must be type werkzeug.datastructures.FileStorage') - if not file_obj: - raise ValueError('file obj cannot be empty') - self.file = file_obj - - def save_to_disk(self): - filename = secure_filename(self.file.filename) - if self.file and self.allowed_file(filename): - saved_file_name = str(uuid.uuid4()) + '.' + \ - filename.rsplit('.', 1)[1].lower() - saved_file_path = os.path.join(app_config.FILE_DIRECTORY, - saved_file_name) - logger.info('Saving file to path: ' + saved_file_path) - self.file.save(saved_file_path) - return saved_file_path - return None - - @staticmethod - def allowed_file(filename): - return '.' in filename and \ - filename.rsplit('.', 1)[1].lower() in app_config.ALLOWED_EXTENSIONS - - class Extractor(object): """Extracts text from image @@ -76,7 +35,7 @@ class Extractor(object): file_path: A string indicating file path where the image is stored. """ - def __init__(self, file_path:str): + def __init__(self, file_path: str): if not isinstance(file_path, str): raise TypeError('File path must be type string') if not file_path: @@ -86,24 +45,25 @@ def __init__(self, file_path:str): def get_text(self): logger.info('Processing Image...') new_file_path = self.rescale(self.file_path) - img = PIL.Image.open(new_file_path).convert('L') - logger.info('Extracting text from rescaled image..') + logger.info('Extracting text from rescaled image...') try: + img = PIL.Image.open(new_file_path) text = pytesseract.image_to_string(image=img) if not text: return (None, ResultStatus.NO_RESULT) return (text, ResultStatus.ALL_OKAY) except Exception as e: + logger.exception(e) return (None, ResultStatus.MODULE_FAILURE) @staticmethod def rescale(file_path): - logger.info('Rescaling Image to 300 dpi') - new_file_path = file_path.rsplit('.', 1)[0] + '.jpg' + logger.info('Rescaling Image to 300 dpi...') + new_file_path = file_path.rsplit('.', 1)[0] + '.png' cmd = [ - 'convert', file_path, '-bordercolor', 'White', - '-resample', app_config.UPSCALE_RESOLUTION, '-border', '10x10', - '-alpha', 'off', new_file_path + 'convert', file_path, '-resample', app_config.UPSCALE_RESOLUTION, + '-alpha', 'off', '-colorspace', 'Gray', '-threshold', '75%', + new_file_path ] subprocess.run(cmd) return new_file_path diff --git a/verifytweet/services/search.py b/verifytweet/services/search.py index f2550da..1a2caf3 100644 --- a/verifytweet/services/search.py +++ b/verifytweet/services/search.py @@ -89,8 +89,8 @@ def aggregate_tweets(self): if date_checker.format_for_date( tweet_date) == date_checker.format_for_date( self.date) and date_checker.valid_date(tweet_date): - logger.info('Tweet found...: ' + - str(entry[app_config.TWEET_TEXT_KEY])) + logger.debug('Tweet found...: ' + + str(entry[app_config.TWEET_TEXT_KEY])) same_day_tweets.append(entry[app_config.TWEET_TEXT_KEY]) if not same_day_tweets: return (same_day_tweets, ResultStatus.NO_RESULT) @@ -108,7 +108,7 @@ def _call_twitter_api(querystring): '/', app_config.TWITTER_CONTEXT) r = requests.get(search_url, headers=headers, params=querystring) response = r.json() - logger.info('Status Code for Twitter API: ' + str(r.status_code)) + logger.debug('Status Code for Twitter API: ' + str(r.status_code)) if r.status_code != 200: raise RuntimeError('Twitter API returned status:' + str(r.status_code)) @@ -149,5 +149,5 @@ def search(self): results = twint.output.tweets_object if not results: return (results, ResultStatus.NO_RESULT) - logger.info(f'Search results: {results}\n') + logger.debug(f'Search results: {results}\n') return (results, ResultStatus.ALL_OKAY) diff --git a/verifytweet/services/text.py b/verifytweet/services/text.py index 31cfd42..094d0a4 100644 --- a/verifytweet/services/text.py +++ b/verifytweet/services/text.py @@ -107,7 +107,7 @@ def clean_text(self): tweet_snippet = " ".join(picked_words) if not tweet_snippet: return (tweet_snippet, ResultStatus.NO_RESULT) - logger.info(f'Tweet Snippet: {tweet_snippet}') + logger.debug(f'Tweet Snippet: {tweet_snippet}') return (tweet_snippet, ResultStatus.ALL_OKAY) @@ -159,5 +159,5 @@ def get_similarity(self): except Exception as e: logger.exception(e) return (None, ResultStatus.MODULE_FAILURE) - logger.info('Similartiy Matrix: ' + str(similarity_matrix)) + logger.debug('Similartiy Matrix: ' + str(similarity_matrix)) return (similarity_matrix, ResultStatus.ALL_OKAY) diff --git a/verifytweet/util/logging.py b/verifytweet/util/logging.py index 4cb51ca..8fa08b8 100644 --- a/verifytweet/util/logging.py +++ b/verifytweet/util/logging.py @@ -19,12 +19,17 @@ import logging import sys +from verifytweet.config.settings import app_config + logger = logging.getLogger() logger.setLevel(logging.INFO) handler = logging.StreamHandler(sys.stdout) handler.setLevel(logging.INFO) -formatter = logging.Formatter(u'%(asctime)s -- %(levelname)s -- %(message)s') +web_formatter = logging.Formatter(u'%(asctime)s -- %(levelname)s -- %(message)s') +cli_formatter = logging.Formatter(u'%(message)s') +formatter = cli_formatter if app_config.RUN_METHOD == "cli" else web_formatter + handler.setFormatter(formatter) logger.addHandler(handler) diff --git a/verifytweet/util/uploader.py b/verifytweet/util/uploader.py new file mode 100644 index 0000000..37a17b8 --- /dev/null +++ b/verifytweet/util/uploader.py @@ -0,0 +1,53 @@ +# Verify Tweet verifies tweets of a public user +# from tweet screenshots: real or generated from +# tweet generators. +# Copyright (C) 2019 Preetham Kamidi + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import os +import uuid + +from werkzeug.utils import secure_filename +from werkzeug.datastructures import FileStorage + +from verifytweet.util.logging import logger +from verifytweet.config.settings import app_config +from verifytweet.util.result import ResultStatus + + +def save_to_disk(file_obj): + """Saves an uploaded file via POST request to disk + """ + if not isinstance(file_obj, FileStorage): + raise TypeError( + 'file obj must be type werkzeug.datastructures.FileStorage') + if not file_obj: + raise ValueError('file obj cannot be empty') + filename = secure_filename(file_obj.filename) + if file_obj and allowed_file(filename): + saved_file_name = str(uuid.uuid4()) + '.' + \ + filename.rsplit('.', 1)[1].lower() + saved_file_path = os.path.join(app_config.FILE_DIRECTORY, + saved_file_name) + logger.debug('Saving file to path: ' + saved_file_path) + file_obj.save(saved_file_path) + return saved_file_path + return None + +def allowed_file(filename): + """Checks if uploaded file has valid extenstion + """ + return '.' in filename and \ + filename.rsplit('.', 1)[1].lower() in app_config.ALLOWED_EXTENSIONS \ No newline at end of file