Skip to content

Commit

Permalink
Merge pull request #23 from kamidipreetham/develop
Browse files Browse the repository at this point in the history
Release v0.5.2
  • Loading branch information
Preetham Kamidi committed Feb 18, 2020
2 parents 443cf00 + 4f399ea commit 6521fcb
Show file tree
Hide file tree
Showing 14 changed files with 99 additions and 1,328 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,5 @@ venv.bak/

#data folders
dataset
test-dataset
.DS_Store
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,5 @@ venv.bak/

#data folders
dataset
test-dataset
.DS_Store
59 changes: 0 additions & 59 deletions Pipfile

This file was deleted.

1,245 changes: 0 additions & 1,245 deletions Pipfile.lock

This file was deleted.

12 changes: 10 additions & 2 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,45 @@ alabaster==0.7.12
atomicwrites==1.3.0
attrs==19.1.0
autopep8==1.4.4
awscli==1.16.196
babel==2.7.0
bandit==1.6.2
bleach==3.1.0
botocore==1.12.186
certifi==2019.6.16
chardet==3.0.4
colorama==0.3.9
coverage==4.5.3
docutils==0.14
gitdb2==2.0.5
gitpython==2.1.11
hypothesis==4.26.4
hypothesis==4.27.0
idna==2.8
imagesize==1.1.0
importlib-metadata==0.18
jinja2==2.10.1
jmespath==0.9.4
markupsafe==1.1.1
more-itertools==7.1.0
packaging==19.0
pbr==5.4.0
pkginfo==1.5.0.1
pluggy==0.12.0
py==1.8.0
pyasn1==0.4.5
pycodestyle==2.5.0
pygments==2.4.2
pyparsing==2.4.0
pytest-cov==2.7.1
pytest==5.0.1
python-dateutil==2.8.0
pytz==2019.1
pyyaml==5.1.1
pyyaml==5.1 ; python_version != '2.6'
readme-renderer==24.0
requests-toolbelt==0.9.1
requests==2.22.0
rsa==3.4.2
s3transfer==0.2.1
six==1.12.0
smmap2==2.0.5
snowballstemmer==1.9.0
Expand Down
8 changes: 4 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
-i https://pypi.org/simple
-e git+https://github.com/twintproject/twint.git@ad27650fbc0bf8c3f2c78449088a5ede7239f53a#egg=twint
aiodns==2.0.0
aiohttp-socks==0.2.2
aiohttp==3.5.4
Expand All @@ -18,7 +17,7 @@ elasticsearch==7.0.2
eventlet==0.25.0
fake-useragent==0.1.11
flask-cors==3.0.8
flask==1.1.0
flask==1.1.1
geographiclib==1.49
geopy==1.20.0
greenlet==0.4.15
Expand All @@ -34,10 +33,10 @@ markupsafe==1.1.1
monotonic==1.5
multidict==4.5.2
networkx==2.3
nltk==3.4.4
nltk>=3.4.5
numpy==1.16.4
pandas==0.24.2
pillow==6.1.0
pillow>=6.2.0
pycares==3.0.0
pycodestyle==2.5.0
pycparser==2.19
Expand All @@ -54,6 +53,7 @@ scikit-learn==0.21.2
scipy==1.3.0
six==1.12.0
soupsieve==1.9.2
twint==2.1.13
typing-extensions==3.7.4 ; python_version < '3.7'
typing==3.7.4 ; python_version < '3.7'
urllib3==1.25.3
Expand Down
28 changes: 28 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,31 @@ def tweet_data():
for row in csvreader:
tweet_list.append(row[10])
return tweet_list[1:]


@pytest.fixture
def test_data():
import subprocess
from verifytweet import util

local_dir_path = os.path.abspath('./tests/static/test-dataset')
cmd_process = subprocess.run([
'aws', 's3', 'sync', 's3://verifytweet-dataset',
os.path.abspath(local_dir_path)
])
cmd_process.check_returncode()
dataset = list()
for (dirpath, dirnames, filenames) in os.walk(local_dir_path):
if filenames:
subset = dict()
subset_type = dirpath.split('/')[-1]
subset_truth = True if dirpath.split('/')[-2] == "real" else False
file_paths = list()
for filename in filenames:
if util.uploader.allowed_file(filename):
file_paths.append(os.path.join(os.path.abspath(dirpath), filename))
subset['expected_value'] = subset_truth
subset['type'] = subset_type
subset['files'] = file_paths
dataset.append(subset)
return dataset
4 changes: 1 addition & 3 deletions tests/test_search_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,10 @@ def test_search_valid_input():
"""Test search for valid input
"""
test_user_id = 'elonmusk'
test_datetime = datetime.datetime.strptime('2019-07-06', '%Y-%m-%d')
test_tweet_snippet = 'Sharknado'
test_tweet = 'Sharknado is real'
module_result, module_status = twint_search.search(test_user_id,
test_tweet_snippet,
test_datetime)
test_tweet_snippet)
assert module_status == result.ResultStatus.ALL_OKAY
assert len(module_result) > 0
assert isinstance(module_result[0].tweet, str)
Expand Down
8 changes: 0 additions & 8 deletions tests/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,6 @@ def test_validator_invalid_type_input():
validator.verify_validity([[]])


def test_validator_invalid_input():
"""Test verify validity for invalid input
"""
test_numpy_array = numpy.array([[None, None], [None, None]])
with pytest.raises(ValueError):
validator.verify_validity(test_numpy_array)


def test_validator_valid_similarity_matrix():
"""Test verfiy validity for valid similarity matrix
"""
Expand Down
50 changes: 50 additions & 0 deletions tests/test_verification_accuracy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Verify Tweet verifies tweets of a public user
# from tweet screenshots: real or generated from
# tweet generators.
# Copyright (C) 2019 Preetham Kamidi

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import pytest

from sklearn.metrics import accuracy_score

from verifytweet import controller
from verifytweet import result

def test_overall_accuracy(test_data):
overall_expected_output = list()
overall_actual_output = list()
subset_accuracy = list()
non_api_controller = controller.NonAPIApproach()
for subset in test_data:
subset_expected_output = [subset['expected_value']] * len(
subset['files'])
overall_expected_output.extend(subset_expected_output)
actual_output = list()
accuracy_dict = dict()
for file_path in subset['files']:
module_result, module_status = non_api_controller.exec(
file_path)
validity = True if module_status == result.ResultStatus.ALL_OKAY else False
actual_output.append(validity)
overall_actual_output.append(validity)
accuracy_dict['type'] = subset['type']
accuracy_dict['expected'] = subset['expected_value']
accuracy_dict['accuracy'] = accuracy_score(subset_expected_output, actual_output)
subset_accuracy.append(accuracy_dict)
accuracy = accuracy_score(overall_expected_output, overall_actual_output)
print(f'Subset Accuracy: {subset_accuracy}')
print(f'Overall Accuracy: {accuracy}')
assert accuracy > 0.7
5 changes: 2 additions & 3 deletions verifytweet/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ class Config(object):

IMAGEMAGICK_PATH = os.getenv('IMAGEMAGICK_PATH') if os.getenv(
'IMAGEMAGICK_PATH') else "convert"
UPSCALE_RESOLUTION = "300x300"
FILE_DIRECTORY = tempfile.mkdtemp()
TWEET_MAX_STORE = 150
RUN_METHOD = "cli"
LOG_LEVEL = logging.DEBUG if os.getenv('DEBUG') else logging.INFO
LOG_LEVEL = logging.DEBUG if os.getenv('VERIFYTWEET_DEBUG') else logging.INFO
ALLOWED_EXTENSIONS = set(["png", "jpg", "jpeg"])
SIMILARITY_THRESHOLD = 0.6


Expand Down Expand Up @@ -74,7 +74,6 @@ class WebConfig(Config):
MAX_CONTENT_LENGTH = 2097152
WORKER_COUNT = no_of_workers()
WORKER_CLASS = "eventlet"
ALLOWED_EXTENSIONS = set(["png", "jpg", "jpeg"])


run_method = "web" if "VERIFYTWEET_RUN_FOR_WEB" in os.environ else "cli"
Expand Down
2 changes: 1 addition & 1 deletion verifytweet/services/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def rescale(file_path):
new_file_path = os.path.join(app_config.FILE_DIRECTORY,
str(uuid.uuid1()) + '.png')
cmd = [
'convert', file_path, '-resample', app_config.UPSCALE_RESOLUTION,
'convert', file_path, '-resample', '300x300',
'-alpha', 'off', '-colorspace', 'Gray', '-threshold', '75%',
'-density', '300x300', '-units', 'PixelsPerCentimeter', '-blur',
'1x65000', '-level', '50x100%', new_file_path
Expand Down
2 changes: 1 addition & 1 deletion verifytweet/services/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def search(self, user_id: str, tweet_snippet: str,
twint_config.Username = user_id
if date:
twint_config.Since = date_checker.format_for_date(date)
twint_config.Until = date_checker.format_for_date(date + datetime.timedelta(days=1))
twint_config.Until = date_checker.format_for_date(date + datetime.timedelta(days=2))
else:
twint_config.Search = tweet_snippet
twint_config.Limit = app_config.TWEET_MAX_STORE
Expand Down
2 changes: 0 additions & 2 deletions verifytweet/util/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ def verify_validity(similarity_matrix: ndarray):
"""
if not isinstance(similarity_matrix, ndarray):
raise TypeError('Similarity matrix must type numpy.ndarray')
if not similarity_matrix.all():
raise ValueError('Similarity matrix must be a valid numpy array')
row = similarity_matrix[0]
for column_index in range(1, row.shape[0]):
if row[column_index] > app_config.SIMILARITY_THRESHOLD:
Expand Down

0 comments on commit 6521fcb

Please sign in to comment.