Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved spam prevention #2954

Merged
merged 22 commits into from Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements/install.in
Expand Up @@ -31,6 +31,7 @@ html2text==2019.9.26
Jinja2==3.1.2
jsonschema==3.2.0
kombu[redis]==4.4.0
langdetect==1.0.9
lxml==4.4.2
mistune==0.8.4
mongoengine==0.27.0
Expand Down
12 changes: 5 additions & 7 deletions requirements/install.pip
@@ -1,5 +1,5 @@
#
# This file is autogenerated by pip-compile with Python 3.7
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile --output-file=requirements/install.pip requirements/install.in
Expand Down Expand Up @@ -145,9 +145,7 @@ idna==2.10
# requests
# urlextract
importlib-metadata==6.0.0
# via
# flask
# jsonschema
# via flask
isodate==0.6.0
# via rdflib
itsdangerous==2.1.2
Expand All @@ -172,6 +170,8 @@ kombu[redis]==4.4.0
# via
# -r requirements/install.in
# celery
langdetect==1.0.9
# via -r requirements/install.in
lxml==4.4.2
# via -r requirements/install.in
markupsafe==2.1.2
Expand Down Expand Up @@ -219,7 +219,6 @@ python-dateutil==2.8.1
pytz==2022.7
# via
# -r requirements/install.in
# babel
# celery
# flask-babel
# flask-restx
Expand All @@ -246,6 +245,7 @@ six==1.16.0
# flask-cors
# isodate
# jsonschema
# langdetect
# python-dateutil
# wtforms-json
speaklater==1.3
Expand All @@ -256,8 +256,6 @@ text-unidecode==1.3
# via faker
tlds==2021080800
# via -r requirements/install.in
typing-extensions==4.7.1
# via importlib-metadata
ujson==1.35
# via -r requirements/install.in
unicodecsv==0.14.1
Expand Down
2 changes: 1 addition & 1 deletion udata/app.py
Expand Up @@ -207,7 +207,7 @@ def standalone(app):
def register_extensions(app):
from udata import (
models, routing, tasks, mail, i18n, auth, search, sitemap,
sentry
sentry, notifications
)
tasks.init_app(app)
i18n.init_app(app)
Expand Down
28 changes: 25 additions & 3 deletions udata/core/discussions/api.py
Expand Up @@ -6,6 +6,8 @@

from udata.auth import admin_permission
from udata.api import api, API, fields
from udata.core.spam.api import SpamAPIMixin
from udata.core.spam.fields import spam_fields
from udata.utils import id_or_404
from udata.core.user.api_fields import user_ref_fields

Expand All @@ -24,6 +26,7 @@
'posted_by': fields.Nested(user_ref_fields,
description='The message author'),
'posted_on': fields.ISODateTime(description='The message posting date'),
'spam': fields.Nested(spam_fields),
})

discussion_fields = api.model('Discussion', {
Expand All @@ -43,6 +46,7 @@
'url': fields.UrlFor('api.discussion',
description='The discussion API URI'),
'extras': fields.Raw(description='Extra attributes as key-value pairs'),
'spam': fields.Nested(spam_fields),
})

start_discussion_fields = api.model('DiscussionStart', {
Expand Down Expand Up @@ -86,6 +90,12 @@
help='The page size to fetch')


@ns.route('/<id>/spam/', endpoint='discussion_spam')
@ns.doc(delete={'id': 'unspam'})
class DiscussionSpamAPI(SpamAPIMixin):
model = Discussion


@ns.route('/<id>/', endpoint='discussion')
class DiscussionAPI(API):
'''
Expand Down Expand Up @@ -123,9 +133,9 @@ def post(self, id):
discussion.closed = datetime.utcnow()
discussion.save()
if close:
on_discussion_closed.send(discussion, message=message_idx)
discussion.signal_close(message=message_idx)
else:
on_new_discussion_comment.send(discussion, message=message_idx)
discussion.signal_comment(message=message_idx)
return discussion

@api.secure(admin_permission)
Expand All @@ -139,6 +149,17 @@ def delete(self, id):
return '', 204


@ns.route('/<id>/comments/<int:cidx>/spam', endpoint='discussion_comment_spam')
@ns.doc(delete={'id': 'unspam'})
class DiscussionSpamAPI(SpamAPIMixin):
def get_model(self, id, cidx):
discussion = Discussion.objects.get_or_404(id=id_or_404(id))
if len(discussion.discussion) <= cidx:
api.abort(404, 'Comment does not exist')
elif cidx == 0:
api.abort(400, 'You cannot unspam the first comment of a discussion')
return discussion, discussion.discussion[cidx]

@ns.route('/<id>/comments/<int:cidx>', endpoint='discussion_comment')
class DiscussionCommentAPI(API):
'''
Expand Down Expand Up @@ -196,6 +217,7 @@ def post(self):
discussion = Discussion(user=current_user.id, discussion=[message])
form.populate_obj(discussion)
discussion.save()
on_new_discussion.send(discussion)

discussion.signal_new()

return discussion, 201
35 changes: 33 additions & 2 deletions udata/core/discussions/models.py
Expand Up @@ -2,20 +2,26 @@
from datetime import datetime

from udata.models import db
from udata.core.spam.models import SpamMixin, spam_protected
from .signals import (on_new_discussion, on_discussion_closed, on_new_discussion_comment)


log = logging.getLogger(__name__)


COMMENT_SIZE_LIMIT = 50000


class Message(db.EmbeddedDocument):
class Message(SpamMixin, db.EmbeddedDocument):
content = db.StringField(required=True)
posted_on = db.DateTimeField(default=datetime.utcnow, required=True)
posted_by = db.ReferenceField('User')

def texts_to_check_for_spam(self):
return [self.content]


class Discussion(db.Document):
class Discussion(SpamMixin, db.Document):
user = db.ReferenceField('User')
subject = db.GenericReferenceField()
title = db.StringField(required=True)
Expand All @@ -41,8 +47,33 @@ def person_involved(self, person):
"""
return any(message.posted_by == person for message in self.discussion)

def texts_to_check_for_spam(self):
# Discussion should always have a first message but it's not the case in some tests…
return [self.title, self.discussion[0].content if len(self.discussion) else '']

def embeds_to_check_for_spam(self):
return self.discussion[1:]

@property
def external_url(self):
return self.subject.url_for(
_anchor='discussion-{id}'.format(id=self.id),
_external=True)

def spam_report_title(self):
return self.title

def spam_report_link(self):
return self.external_url

@spam_protected()
def signal_new(self):
on_new_discussion.send(self)

@spam_protected(lambda discussion, message: discussion.discussion[message])
def signal_close(self, message):
on_discussion_closed.send(self, message=message)

@spam_protected(lambda discussion, message: discussion.discussion[message])
def signal_comment(self, message):
on_new_discussion_comment.send(self, message=message)
Empty file added udata/core/spam/__init__.py
Empty file.
58 changes: 58 additions & 0 deletions udata/core/spam/api.py
@@ -0,0 +1,58 @@
from mongoengine import Q

from udata.api import api, API
from udata.auth import admin_permission
from udata.core.discussions.models import Discussion
from udata.core.spam.fields import potential_spam_fields
from udata.core.spam.models import POTENTIAL_SPAM
from udata.utils import id_or_404


class SpamAPIMixin(API):
"""
Base Spam Model API.
"""
model = None

def get_model(self, id):
"""
This function returns the base model and the spamable model which can be different. The base model is the
model stored inside Mongo and the spamable model is the embed document (for example a comment inside a
discussion)
"""
model = self.model.objects.get_or_404(id=id_or_404(id))
return model, model
maudetes marked this conversation as resolved.
Show resolved Hide resolved

@api.secure(admin_permission)
def delete(self, **kwargs):
ThibaudDauce marked this conversation as resolved.
Show resolved Hide resolved
"""
Mark a potential spam as no spam
"""
base_model, model = self.get_model(**kwargs)

if not model.is_spam():
return {}, 200

model.mark_as_no_spam(base_model)
return {}, 200


ns = api.namespace('spam', 'Spam related operations')


@ns.route('/', endpoint='spam')
class SpamAPI(API):
"""
Base class for a discussion thread.
"""
@api.doc('get_potential_spams')
@api.secure(admin_permission)
@api.marshal_with(potential_spam_fields)
def get(self):
"""Get all potential spam objects"""
discussions = Discussion.objects(Q(spam__status=POTENTIAL_SPAM) | Q(discussion__spam__status=POTENTIAL_SPAM))

return [{
'title': discussion.spam_report_title(),
'link': discussion.spam_report_link(),
} for discussion in discussions]
12 changes: 12 additions & 0 deletions udata/core/spam/fields.py
@@ -0,0 +1,12 @@
from udata.api import api, fields
from .models import SPAM_STATUS_CHOICES

spam_fields = api.model('Spam', {
'status': fields.String(description='Status', enum=SPAM_STATUS_CHOICES, readonly=True),
})

potential_spam_fields = api.model('PotentialSpam', {
'title': fields.String(readonly=True),
'link': fields.String(readonly=True),
})