Skip to content

Commit

Permalink
Merge pull request #8 from kamidipreetham/develop
Browse files Browse the repository at this point in the history
v0.4.1 Release
  • Loading branch information
Preetham Kamidi committed Jun 30, 2019
2 parents 68e2050 + f71cc6c commit 9f2817d
Show file tree
Hide file tree
Showing 16 changed files with 134 additions and 74 deletions.
10 changes: 10 additions & 0 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Read the Docs configuration file

version: 2
sphinx:
configuration: docs/conf.py
formats: all
python:
version: 3.7
install:
- requirements: docs/requirements.txt
10 changes: 8 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import io
import re
import sys
sys.path.insert(0, os.path.abspath('../..'))

sys.path.insert(0, os.path.abspath('..'))
sys.setrecursionlimit(1500)

# -- Project information -----------------------------------------------------
Expand All @@ -21,8 +24,11 @@
copyright = '2019, Preetham Kamidi'
author = 'Preetham Kamidi'

with io.open(os.path.join('../', 'verifytweet/__init__.py'), 'rt', encoding='utf8') as f:
version = re.search(r'__version__ = "(.*?)"', f.read()).group(1)

# The full version, including alpha/beta/rc tags
release = 'v0.3'
release = version


# -- General configuration ---------------------------------------------------
Expand Down
7 changes: 4 additions & 3 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ Welcome to verifytweet's documentation!
:maxdepth: 2
:caption: Contents:

router

controller
===================
.. automodule:: verifytweet.app
.. automodule:: verifytweet.services.controller
:members:

image service
Expand All @@ -30,7 +31,7 @@ text service
.. automodule:: verifytweet.services.text
:members:

shared module: date checker
utility module: date checker
=============================
.. automodule:: verifytweet.util.date_checker
:members:
Expand Down
25 changes: 25 additions & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
alabaster==0.7.12
Babel==2.7.0
certifi==2019.6.16
chardet==3.0.4
docutils==0.14
idna==2.8
imagesize==1.1.0
Jinja2==2.10.1
MarkupSafe==1.1.1
packaging==19.0
Pygments==2.4.2
pyparsing==2.4.0
pytz==2019.1
requests==2.22.0
six==1.12.0
snowballstemmer==1.9.0
Sphinx==2.1.2
sphinx_bootstrap_theme
sphinxcontrib-applehelp==1.0.1
sphinxcontrib-devhelp==1.0.1
sphinxcontrib-htmlhelp==1.0.2
sphinxcontrib-jsmath==1.0.1
sphinxcontrib-qthelp==1.0.2
sphinxcontrib-serializinghtml==1.1.3
urllib3==1.25.3
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ twine==1.13.0
typing==3.7.4
typing-extensions==3.7.4
urllib3==1.25.3
-e git+git@github.com:kamidipreetham/verifytweet.git@251b221ad452a80d54ef165b90b450a4710ac04d#egg=verifytweet
wcwidth==0.1.7
webencodings==0.5.1
Werkzeug==0.15.4
Expand Down
7 changes: 2 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,10 @@
python_requires=">=3.6.*",
install_requires=[
"click>=5.1", "Pillow==6.0.0", "pytesseract==0.2.6",
"requests==2.22.0", "scikit-learn==0.21.2","Werkzeug>=0.15",
"nltk>=3.4.3", "python-dateutil==2.8.0",
"requests==2.22.0", "scikit-learn==0.21.2", "nltk>=3.4.3",
"python-dateutil==2.8.0",
"twint @ git+https://github.com/twintproject/twint.git"
],
dependency_links=[
""
],
entry_points={
"console_scripts": ["verifytweet = verifytweet.cli:run_as_command"]
},
Expand Down
2 changes: 1 addition & 1 deletion verifytweet/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__version__ = "0.4.0"
__version__ = "0.4.1"
4 changes: 2 additions & 2 deletions verifytweet/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import verifytweet.services.controller as controller
import verifytweet.services.image as image_service
import verifytweet.util.uploader as image_uploader

from verifytweet.util.logging import logger
from verifytweet.config.settings import app_config
Expand Down Expand Up @@ -70,8 +71,7 @@ def verify_tweet():
if not data_type or not request_image:
return "Missing form fields", 400
try:
uploader = image_service.Uploader(request_image)
file_path = uploader.save_to_disk()
file_path = image_uploader.save_to_disk(request_image)
rest_controller = controller.NonAPIApproach(file_path)
except Exception as e:
logger.exception(e)
Expand Down
2 changes: 1 addition & 1 deletion verifytweet/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def run_as_command(filepath):
elif controller_status == ResultStatus.NO_RESULT:
print(f"Fake Tweet!")
else:
print(f"Verified Tweet!\n")
print(f"\nVerified Tweet!")
print(
f"**** Username: {tweet_obj.username} ****\n**** Tweet: {tweet_obj.tweet} ****\n**** Likes: {tweet_obj.likes_count} ****\n**** Retweets: {tweet_obj.retweets_count} ****\n**** Link: {tweet_obj.link} ****"
)
4 changes: 4 additions & 0 deletions verifytweet/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import os
import multiprocessing
import tempfile
import logging


def no_of_workers():
Expand All @@ -41,6 +42,8 @@ class Config(object):
UPSCALE_RESOLUTION = "300x300"
FILE_DIRECTORY = tempfile.mkdtemp()
TWEET_MAX_STORE = 150
RUN_METHOD = "cli"
LOG_LEVEL = logging.INFO


class TwitterAPIConfig(Config):
Expand Down Expand Up @@ -75,6 +78,7 @@ class WebConfig(Config):


run_method = "cli" if "VERIFYTWEET_RUN_FROM_CLI" in os.environ else "web"
Config.RUN_METHOD = run_method
configurations = {"web": WebConfig, "cli": Config}

app_config = configurations[run_method]
4 changes: 2 additions & 2 deletions verifytweet/services/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def preprocess(file_path):
extracted_text, extractor_status = text_extractor.get_text()
if extractor_status != ResultStatus.ALL_OKAY:
return (None, extractor_status)
logger.info('Processed text: ' + extracted_text)
logger.debug('Processed text: ' + extracted_text)

try:
entity_parser = text_service.DataParser(extracted_text)
Expand All @@ -132,5 +132,5 @@ def preprocess(file_path):
entities, parser_status = entity_parser.get_entities()
if parser_status != ResultStatus.ALL_OKAY:
return (None, parser_status)
logger.info('Entities: ' + str(entities))
logger.debug('Entities: ' + str(entities))
return (entities, parser_status)
60 changes: 10 additions & 50 deletions verifytweet/services/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,57 +16,16 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import os
import uuid
import subprocess

import pytesseract
import PIL

from werkzeug.utils import secure_filename
from werkzeug.datastructures import FileStorage
import pytesseract

from verifytweet.util.logging import logger
from verifytweet.config.settings import app_config
from verifytweet.util.result import ResultStatus


class Uploader(object):
"""Saves image file received from POST request.
Saves image file to a temporary location received by
Flask after checking valid file types.
Attributes:
file_obj: Image file, type: werkzeug.datastructures.FileStorage.
"""

def __init__(self, file_obj: FileStorage):
if not isinstance(file_obj, FileStorage):
raise TypeError(
'file obj must be type werkzeug.datastructures.FileStorage')
if not file_obj:
raise ValueError('file obj cannot be empty')
self.file = file_obj

def save_to_disk(self):
filename = secure_filename(self.file.filename)
if self.file and self.allowed_file(filename):
saved_file_name = str(uuid.uuid4()) + '.' + \
filename.rsplit('.', 1)[1].lower()
saved_file_path = os.path.join(app_config.FILE_DIRECTORY,
saved_file_name)
logger.info('Saving file to path: ' + saved_file_path)
self.file.save(saved_file_path)
return saved_file_path
return None

@staticmethod
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in app_config.ALLOWED_EXTENSIONS


class Extractor(object):
"""Extracts text from image
Expand All @@ -76,7 +35,7 @@ class Extractor(object):
file_path: A string indicating file path where the image is stored.
"""

def __init__(self, file_path:str):
def __init__(self, file_path: str):
if not isinstance(file_path, str):
raise TypeError('File path must be type string')
if not file_path:
Expand All @@ -86,24 +45,25 @@ def __init__(self, file_path:str):
def get_text(self):
logger.info('Processing Image...')
new_file_path = self.rescale(self.file_path)
img = PIL.Image.open(new_file_path).convert('L')
logger.info('Extracting text from rescaled image..')
logger.info('Extracting text from rescaled image...')
try:
img = PIL.Image.open(new_file_path)
text = pytesseract.image_to_string(image=img)
if not text:
return (None, ResultStatus.NO_RESULT)
return (text, ResultStatus.ALL_OKAY)
except Exception as e:
logger.exception(e)
return (None, ResultStatus.MODULE_FAILURE)

@staticmethod
def rescale(file_path):
logger.info('Rescaling Image to 300 dpi')
new_file_path = file_path.rsplit('.', 1)[0] + '.jpg'
logger.info('Rescaling Image to 300 dpi...')
new_file_path = file_path.rsplit('.', 1)[0] + '.png'
cmd = [
'convert', file_path, '-bordercolor', 'White',
'-resample', app_config.UPSCALE_RESOLUTION, '-border', '10x10',
'-alpha', 'off', new_file_path
'convert', file_path, '-resample', app_config.UPSCALE_RESOLUTION,
'-alpha', 'off', '-colorspace', 'Gray', '-threshold', '75%',
new_file_path
]
subprocess.run(cmd)
return new_file_path
8 changes: 4 additions & 4 deletions verifytweet/services/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ def aggregate_tweets(self):
if date_checker.format_for_date(
tweet_date) == date_checker.format_for_date(
self.date) and date_checker.valid_date(tweet_date):
logger.info('Tweet found...: ' +
str(entry[app_config.TWEET_TEXT_KEY]))
logger.debug('Tweet found...: ' +
str(entry[app_config.TWEET_TEXT_KEY]))
same_day_tweets.append(entry[app_config.TWEET_TEXT_KEY])
if not same_day_tweets:
return (same_day_tweets, ResultStatus.NO_RESULT)
Expand All @@ -108,7 +108,7 @@ def _call_twitter_api(querystring):
'/', app_config.TWITTER_CONTEXT)
r = requests.get(search_url, headers=headers, params=querystring)
response = r.json()
logger.info('Status Code for Twitter API: ' + str(r.status_code))
logger.debug('Status Code for Twitter API: ' + str(r.status_code))
if r.status_code != 200:
raise RuntimeError('Twitter API returned status:' +
str(r.status_code))
Expand Down Expand Up @@ -149,5 +149,5 @@ def search(self):
results = twint.output.tweets_object
if not results:
return (results, ResultStatus.NO_RESULT)
logger.info(f'Search results: {results}\n')
logger.debug(f'Search results: {results}\n')
return (results, ResultStatus.ALL_OKAY)
4 changes: 2 additions & 2 deletions verifytweet/services/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def clean_text(self):
tweet_snippet = " ".join(picked_words)
if not tweet_snippet:
return (tweet_snippet, ResultStatus.NO_RESULT)
logger.info(f'Tweet Snippet: {tweet_snippet}')
logger.debug(f'Tweet Snippet: {tweet_snippet}')
return (tweet_snippet, ResultStatus.ALL_OKAY)


Expand Down Expand Up @@ -159,5 +159,5 @@ def get_similarity(self):
except Exception as e:
logger.exception(e)
return (None, ResultStatus.MODULE_FAILURE)
logger.info('Similartiy Matrix: ' + str(similarity_matrix))
logger.debug('Similartiy Matrix: ' + str(similarity_matrix))
return (similarity_matrix, ResultStatus.ALL_OKAY)
7 changes: 6 additions & 1 deletion verifytweet/util/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,17 @@
import logging
import sys

from verifytweet.config.settings import app_config

logger = logging.getLogger()
logger.setLevel(logging.INFO)

handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)

formatter = logging.Formatter(u'%(asctime)s -- %(levelname)s -- %(message)s')
web_formatter = logging.Formatter(u'%(asctime)s -- %(levelname)s -- %(message)s')
cli_formatter = logging.Formatter(u'%(message)s')
formatter = cli_formatter if app_config.RUN_METHOD == "cli" else web_formatter

handler.setFormatter(formatter)
logger.addHandler(handler)
Loading

0 comments on commit 9f2817d

Please sign in to comment.