diff --git a/.readthedocs.yml b/.readthedocs.yml
new file mode 100644
index 0000000..d26401f
--- /dev/null
+++ b/.readthedocs.yml
@@ -0,0 +1,10 @@
+# Read the Docs configuration file
+
+version: 2
+sphinx:
+ configuration: docs/conf.py
+formats: all
+python:
+ version: 3.7
+ install:
+ - requirements: docs/requirements.txt
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
index 229abd2..f69081e 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -11,8 +11,11 @@
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
+import io
+import re
import sys
-sys.path.insert(0, os.path.abspath('../..'))
+
+sys.path.insert(0, os.path.abspath('..'))
sys.setrecursionlimit(1500)
# -- Project information -----------------------------------------------------
@@ -21,8 +24,11 @@
copyright = '2019, Preetham Kamidi'
author = 'Preetham Kamidi'
+with io.open(os.path.join('../', 'verifytweet/__init__.py'), 'rt', encoding='utf8') as f:
+ version = re.search(r'__version__ = "(.*?)"', f.read()).group(1)
+
# The full version, including alpha/beta/rc tags
-release = 'v0.3'
+release = version
# -- General configuration ---------------------------------------------------
diff --git a/docs/index.rst b/docs/index.rst
index 10225b5..e5ef4f0 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -10,9 +10,10 @@ Welcome to verifytweet's documentation!
:maxdepth: 2
:caption: Contents:
-router
+
+controller
===================
-.. automodule:: verifytweet.app
+.. automodule:: verifytweet.services.controller
:members:
image service
@@ -30,7 +31,7 @@ text service
.. automodule:: verifytweet.services.text
:members:
-shared module: date checker
+utility module: date checker
=============================
.. automodule:: verifytweet.util.date_checker
:members:
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..8ba2030
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,25 @@
+alabaster==0.7.12
+Babel==2.7.0
+certifi==2019.6.16
+chardet==3.0.4
+docutils==0.14
+idna==2.8
+imagesize==1.1.0
+Jinja2==2.10.1
+MarkupSafe==1.1.1
+packaging==19.0
+Pygments==2.4.2
+pyparsing==2.4.0
+pytz==2019.1
+requests==2.22.0
+six==1.12.0
+snowballstemmer==1.9.0
+Sphinx==2.1.2
+sphinx_bootstrap_theme
+sphinxcontrib-applehelp==1.0.1
+sphinxcontrib-devhelp==1.0.1
+sphinxcontrib-htmlhelp==1.0.2
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-qthelp==1.0.2
+sphinxcontrib-serializinghtml==1.1.3
+urllib3==1.25.3
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index c873492..05a741d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -83,7 +83,6 @@ twine==1.13.0
typing==3.7.4
typing-extensions==3.7.4
urllib3==1.25.3
--e git+git@github.com:kamidipreetham/verifytweet.git@251b221ad452a80d54ef165b90b450a4710ac04d#egg=verifytweet
wcwidth==0.1.7
webencodings==0.5.1
Werkzeug==0.15.4
diff --git a/setup.py b/setup.py
index f15bd04..758caec 100644
--- a/setup.py
+++ b/setup.py
@@ -60,13 +60,10 @@
python_requires=">=3.6.*",
install_requires=[
"click>=5.1", "Pillow==6.0.0", "pytesseract==0.2.6",
- "requests==2.22.0", "scikit-learn==0.21.2","Werkzeug>=0.15",
- "nltk>=3.4.3", "python-dateutil==2.8.0",
+ "requests==2.22.0", "scikit-learn==0.21.2", "nltk>=3.4.3",
+ "python-dateutil==2.8.0",
"twint @ git+https://github.com/twintproject/twint.git"
],
- dependency_links=[
- ""
- ],
entry_points={
"console_scripts": ["verifytweet = verifytweet.cli:run_as_command"]
},
diff --git a/verifytweet/__init__.py b/verifytweet/__init__.py
index 441ed22..7601049 100644
--- a/verifytweet/__init__.py
+++ b/verifytweet/__init__.py
@@ -16,4 +16,4 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
-__version__ = "0.4.0"
\ No newline at end of file
+__version__ = "0.4.1"
\ No newline at end of file
diff --git a/verifytweet/app.py b/verifytweet/app.py
index 3693425..40046d9 100644
--- a/verifytweet/app.py
+++ b/verifytweet/app.py
@@ -23,6 +23,7 @@
import verifytweet.services.controller as controller
import verifytweet.services.image as image_service
+import verifytweet.util.uploader as image_uploader
from verifytweet.util.logging import logger
from verifytweet.config.settings import app_config
@@ -70,8 +71,7 @@ def verify_tweet():
if not data_type or not request_image:
return "Missing form fields", 400
try:
- uploader = image_service.Uploader(request_image)
- file_path = uploader.save_to_disk()
+ file_path = image_uploader.save_to_disk(request_image)
rest_controller = controller.NonAPIApproach(file_path)
except Exception as e:
logger.exception(e)
diff --git a/verifytweet/cli.py b/verifytweet/cli.py
index 9537499..541adef 100644
--- a/verifytweet/cli.py
+++ b/verifytweet/cli.py
@@ -58,7 +58,7 @@ def run_as_command(filepath):
elif controller_status == ResultStatus.NO_RESULT:
print(f"Fake Tweet!")
else:
- print(f"Verified Tweet!\n")
+ print(f"\nVerified Tweet!")
print(
f"**** Username: {tweet_obj.username} ****\n**** Tweet: {tweet_obj.tweet} ****\n**** Likes: {tweet_obj.likes_count} ****\n**** Retweets: {tweet_obj.retweets_count} ****\n**** Link: {tweet_obj.link} ****"
)
diff --git a/verifytweet/config/settings.py b/verifytweet/config/settings.py
index f4087ba..edb8dbc 100644
--- a/verifytweet/config/settings.py
+++ b/verifytweet/config/settings.py
@@ -19,6 +19,7 @@
import os
import multiprocessing
import tempfile
+import logging
def no_of_workers():
@@ -41,6 +42,8 @@ class Config(object):
UPSCALE_RESOLUTION = "300x300"
FILE_DIRECTORY = tempfile.mkdtemp()
TWEET_MAX_STORE = 150
+ RUN_METHOD = "cli"
+ LOG_LEVEL = logging.INFO
class TwitterAPIConfig(Config):
@@ -75,6 +78,7 @@ class WebConfig(Config):
run_method = "cli" if "VERIFYTWEET_RUN_FROM_CLI" in os.environ else "web"
+Config.RUN_METHOD = run_method
configurations = {"web": WebConfig, "cli": Config}
app_config = configurations[run_method]
diff --git a/verifytweet/services/controller.py b/verifytweet/services/controller.py
index 9c9830c..6bf96eb 100644
--- a/verifytweet/services/controller.py
+++ b/verifytweet/services/controller.py
@@ -122,7 +122,7 @@ def preprocess(file_path):
extracted_text, extractor_status = text_extractor.get_text()
if extractor_status != ResultStatus.ALL_OKAY:
return (None, extractor_status)
- logger.info('Processed text: ' + extracted_text)
+ logger.debug('Processed text: ' + extracted_text)
try:
entity_parser = text_service.DataParser(extracted_text)
@@ -132,5 +132,5 @@ def preprocess(file_path):
entities, parser_status = entity_parser.get_entities()
if parser_status != ResultStatus.ALL_OKAY:
return (None, parser_status)
- logger.info('Entities: ' + str(entities))
+ logger.debug('Entities: ' + str(entities))
return (entities, parser_status)
diff --git a/verifytweet/services/image.py b/verifytweet/services/image.py
index c69dc8c..a98311c 100644
--- a/verifytweet/services/image.py
+++ b/verifytweet/services/image.py
@@ -16,57 +16,16 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
-import os
-import uuid
import subprocess
-import pytesseract
import PIL
-
-from werkzeug.utils import secure_filename
-from werkzeug.datastructures import FileStorage
+import pytesseract
from verifytweet.util.logging import logger
from verifytweet.config.settings import app_config
from verifytweet.util.result import ResultStatus
-class Uploader(object):
- """Saves image file received from POST request.
-
- Saves image file to a temporary location received by
- Flask after checking valid file types.
-
- Attributes:
- file_obj: Image file, type: werkzeug.datastructures.FileStorage.
- """
-
- def __init__(self, file_obj: FileStorage):
- if not isinstance(file_obj, FileStorage):
- raise TypeError(
- 'file obj must be type werkzeug.datastructures.FileStorage')
- if not file_obj:
- raise ValueError('file obj cannot be empty')
- self.file = file_obj
-
- def save_to_disk(self):
- filename = secure_filename(self.file.filename)
- if self.file and self.allowed_file(filename):
- saved_file_name = str(uuid.uuid4()) + '.' + \
- filename.rsplit('.', 1)[1].lower()
- saved_file_path = os.path.join(app_config.FILE_DIRECTORY,
- saved_file_name)
- logger.info('Saving file to path: ' + saved_file_path)
- self.file.save(saved_file_path)
- return saved_file_path
- return None
-
- @staticmethod
- def allowed_file(filename):
- return '.' in filename and \
- filename.rsplit('.', 1)[1].lower() in app_config.ALLOWED_EXTENSIONS
-
-
class Extractor(object):
"""Extracts text from image
@@ -76,7 +35,7 @@ class Extractor(object):
file_path: A string indicating file path where the image is stored.
"""
- def __init__(self, file_path:str):
+ def __init__(self, file_path: str):
if not isinstance(file_path, str):
raise TypeError('File path must be type string')
if not file_path:
@@ -86,24 +45,25 @@ def __init__(self, file_path:str):
def get_text(self):
logger.info('Processing Image...')
new_file_path = self.rescale(self.file_path)
- img = PIL.Image.open(new_file_path).convert('L')
- logger.info('Extracting text from rescaled image..')
+ logger.info('Extracting text from rescaled image...')
try:
+ img = PIL.Image.open(new_file_path)
text = pytesseract.image_to_string(image=img)
if not text:
return (None, ResultStatus.NO_RESULT)
return (text, ResultStatus.ALL_OKAY)
except Exception as e:
+ logger.exception(e)
return (None, ResultStatus.MODULE_FAILURE)
@staticmethod
def rescale(file_path):
- logger.info('Rescaling Image to 300 dpi')
- new_file_path = file_path.rsplit('.', 1)[0] + '.jpg'
+ logger.info('Rescaling Image to 300 dpi...')
+ new_file_path = file_path.rsplit('.', 1)[0] + '.png'
cmd = [
- 'convert', file_path, '-bordercolor', 'White',
- '-resample', app_config.UPSCALE_RESOLUTION, '-border', '10x10',
- '-alpha', 'off', new_file_path
+ 'convert', file_path, '-resample', app_config.UPSCALE_RESOLUTION,
+ '-alpha', 'off', '-colorspace', 'Gray', '-threshold', '75%',
+ new_file_path
]
subprocess.run(cmd)
return new_file_path
diff --git a/verifytweet/services/search.py b/verifytweet/services/search.py
index f2550da..1a2caf3 100644
--- a/verifytweet/services/search.py
+++ b/verifytweet/services/search.py
@@ -89,8 +89,8 @@ def aggregate_tweets(self):
if date_checker.format_for_date(
tweet_date) == date_checker.format_for_date(
self.date) and date_checker.valid_date(tweet_date):
- logger.info('Tweet found...: ' +
- str(entry[app_config.TWEET_TEXT_KEY]))
+ logger.debug('Tweet found...: ' +
+ str(entry[app_config.TWEET_TEXT_KEY]))
same_day_tweets.append(entry[app_config.TWEET_TEXT_KEY])
if not same_day_tweets:
return (same_day_tweets, ResultStatus.NO_RESULT)
@@ -108,7 +108,7 @@ def _call_twitter_api(querystring):
'/', app_config.TWITTER_CONTEXT)
r = requests.get(search_url, headers=headers, params=querystring)
response = r.json()
- logger.info('Status Code for Twitter API: ' + str(r.status_code))
+ logger.debug('Status Code for Twitter API: ' + str(r.status_code))
if r.status_code != 200:
raise RuntimeError('Twitter API returned status:' +
str(r.status_code))
@@ -149,5 +149,5 @@ def search(self):
results = twint.output.tweets_object
if not results:
return (results, ResultStatus.NO_RESULT)
- logger.info(f'Search results: {results}\n')
+ logger.debug(f'Search results: {results}\n')
return (results, ResultStatus.ALL_OKAY)
diff --git a/verifytweet/services/text.py b/verifytweet/services/text.py
index 31cfd42..094d0a4 100644
--- a/verifytweet/services/text.py
+++ b/verifytweet/services/text.py
@@ -107,7 +107,7 @@ def clean_text(self):
tweet_snippet = " ".join(picked_words)
if not tweet_snippet:
return (tweet_snippet, ResultStatus.NO_RESULT)
- logger.info(f'Tweet Snippet: {tweet_snippet}')
+ logger.debug(f'Tweet Snippet: {tweet_snippet}')
return (tweet_snippet, ResultStatus.ALL_OKAY)
@@ -159,5 +159,5 @@ def get_similarity(self):
except Exception as e:
logger.exception(e)
return (None, ResultStatus.MODULE_FAILURE)
- logger.info('Similartiy Matrix: ' + str(similarity_matrix))
+ logger.debug('Similartiy Matrix: ' + str(similarity_matrix))
return (similarity_matrix, ResultStatus.ALL_OKAY)
diff --git a/verifytweet/util/logging.py b/verifytweet/util/logging.py
index 4cb51ca..8fa08b8 100644
--- a/verifytweet/util/logging.py
+++ b/verifytweet/util/logging.py
@@ -19,12 +19,17 @@
import logging
import sys
+from verifytweet.config.settings import app_config
+
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
-formatter = logging.Formatter(u'%(asctime)s -- %(levelname)s -- %(message)s')
+web_formatter = logging.Formatter(u'%(asctime)s -- %(levelname)s -- %(message)s')
+cli_formatter = logging.Formatter(u'%(message)s')
+formatter = cli_formatter if app_config.RUN_METHOD == "cli" else web_formatter
+
handler.setFormatter(formatter)
logger.addHandler(handler)
diff --git a/verifytweet/util/uploader.py b/verifytweet/util/uploader.py
new file mode 100644
index 0000000..37a17b8
--- /dev/null
+++ b/verifytweet/util/uploader.py
@@ -0,0 +1,53 @@
+# Verify Tweet verifies tweets of a public user
+# from tweet screenshots: real or generated from
+# tweet generators.
+# Copyright (C) 2019 Preetham Kamidi
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import os
+import uuid
+
+from werkzeug.utils import secure_filename
+from werkzeug.datastructures import FileStorage
+
+from verifytweet.util.logging import logger
+from verifytweet.config.settings import app_config
+from verifytweet.util.result import ResultStatus
+
+
+def save_to_disk(file_obj):
+ """Saves an uploaded file via POST request to disk
+ """
+ if not isinstance(file_obj, FileStorage):
+ raise TypeError(
+ 'file obj must be type werkzeug.datastructures.FileStorage')
+ if not file_obj:
+ raise ValueError('file obj cannot be empty')
+ filename = secure_filename(file_obj.filename)
+ if file_obj and allowed_file(filename):
+ saved_file_name = str(uuid.uuid4()) + '.' + \
+ filename.rsplit('.', 1)[1].lower()
+ saved_file_path = os.path.join(app_config.FILE_DIRECTORY,
+ saved_file_name)
+ logger.debug('Saving file to path: ' + saved_file_path)
+ file_obj.save(saved_file_path)
+ return saved_file_path
+ return None
+
+def allowed_file(filename):
+ """Checks if uploaded file has valid extenstion
+ """
+ return '.' in filename and \
+ filename.rsplit('.', 1)[1].lower() in app_config.ALLOWED_EXTENSIONS
\ No newline at end of file