snguyenthanh · showierdata9978 · Sep 27, 2022 · Sep 27, 2022 · Sep 27, 2022 · Sep 27, 2022
diff --git a/better_profanity/better_profanity.py b/better_profanity/better_profanity.py
@@ -2,7 +2,7 @@
 
 from collections.abc import Iterable
 
-from .constants import ALLOWED_CHARACTERS
+from .constants import ALLOWED_CHARACTERS, ALLOWED_CONTANING_PROFANITY
 from .utils import (
     any_next_words_form_swear_word,
     get_complete_path_of_file,
@@ -13,7 +13,7 @@
 
 
 class Profanity:
-    def __init__(self, words=None):
+    def __init__(self, words=None, whitelist=None):
         """
         Args:
             words (Iterable/str): Collection of words or file path for a list of
@@ -43,6 +43,11 @@ def __init__(self, words=None):
         }
         self.MAX_NUMBER_COMBINATIONS = 1
         self.ALLOWED_CHARACTERS = ALLOWED_CHARACTERS
+
+        self.whitelist = whitelist or set([])
+        self.whitelist = set(self.whitelist)
+        self.whitelist.update(ALLOWED_CONTANING_PROFANITY)
+
         self._default_wordlist_filename = get_complete_path_of_file(
             "profanity_wordlist.txt"
         )
@@ -98,7 +103,9 @@ def _populate_words_to_wordset(self, words, *, whitelist_words=None):
             )
 
         # Validation
-        whitelist_words = whitelist_words or []
+        whitelist_words = whitelist_words or set([])
+        self.whitelist.update(whitelist_words)
+
         for index, word in enumerate(whitelist_words):
             if not isinstance(word, str):
                 raise ValueError(
@@ -176,11 +183,13 @@ def _hide_swear_words(self, text, censor_char):
                 cur_word = ""
                 continue
 
+
             # Iterate the next words combined with the current one
             # to check if it forms a swear word
             next_words_indices = self._update_next_words_indices(
                 text, next_words_indices, index
             )
+
             contains_swear_word, end_index = any_next_words_form_swear_word(
                 cur_word, next_words_indices, self.CENSOR_WORDSET
             )
@@ -194,16 +203,50 @@ def _hide_swear_words(self, text, censor_char):
             if cur_word.lower() in self.CENSOR_WORDSET:
                 cur_word = get_replacement_for_swear_word(censor_char)
 
+
+
             censored_text += cur_word + char
             cur_word = ""
+
+
 
+
         # Final check
         if cur_word != "" and skip_index < len(text) - 1:
             if cur_word.lower() in self.CENSOR_WORDSET:
                 cur_word = get_replacement_for_swear_word(censor_char)
+
+
+            # Check if removeing letters from behind makes a swear word
+            cur_word = self._check_for_profanity_within(cur_word, censor_char, [])
             censored_text += cur_word
+
         return censored_text
 
+    def _check_for_profanity_within(self, cur_word, censor_char, next_words_indices):
+      """Checks if there is profanity within """
+
+      if cur_word in self.CENSOR_WORDSET:
+        return cur_word
+
+      if not cur_word.lower() in self.whitelist:
+        for idx, chr in iter(enumerate(cur_word)):
+          if cur_word[idx:].lower() in self.CENSOR_WORDSET:
+            cur_word = cur_word[:idx] + get_replacement_for_swear_word(censor_char)
+
+            break
+
+        cur_check_word = cur_word + 'a'
+
+        for idx, chr in iter(enumerate(cur_word)):
+          if cur_check_word.lower() in self.CENSOR_WORDSET:
+            cur_word = get_replacement_for_swear_word(censor_char) + cur_word[len(cur_check_word):]
+            break
+
+          cur_check_word = cur_check_word[:-1]
+
+      return cur_word
+
     def _get_start_index_of_next_word(self, text, start_idx):
         """Return the index of the first character of the next word in the given text."""
         start_idx_of_next_word = len(text)

diff --git a/better_profanity/constants.py b/better_profanity/constants.py
@@ -13,3 +13,9 @@
 # Pre-load the unicode characters
 with open(get_complete_path_of_file("alphabetic_unicode.json"), "r") as json_file:
     ALLOWED_CHARACTERS.update(load(json_file))
+
+
+
+with open(get_complete_path_of_file("contaning_allowed.txt"), "r") as txt_file:
+  ALLOWED_CONTANING_PROFANITY = [a.strip() for a in  txt_file.readlines()]
+
diff --git a/better_profanity/contaning_allowed.txt b/better_profanity/contaning_allowed.txt
@@ -0,0 +1,4 @@
+night
+nightmare
+laborum
+hello
diff --git a/tests.py b/tests.py
@@ -67,7 +67,27 @@ def test_censorship_with_ending_swear_word(self):
         bad_text = "That wh0re gave m3 a very good H@nD j0b."
         censored_text = "That **** gave m3 a very good ****."
         self.assertEqual(profanity.censor(bad_text), censored_text)
+
+    def test_obstructing_letter_1(self):
+        bad_text = "Afoobar"
+        censored_text = "A****"
+        self.assertEqual(profanity.censor(bad_text), censored_text)
+
+    def test_obstructing_letter_multible(self):
+        bad_text      = "AAAAAAAAAfoobar"
+        censored_text = "AAAAAAAAA****"
+        self.assertEqual(profanity.censor(bad_text), censored_text)
+
+    def test_end_letter_obstructing(self):
+        bad_text      = "foobarAAAAAAAAA"
+        censored_text = "****AAAAAAAAA"
+        self.assertEqual(profanity.censor(bad_text), censored_text)
 
+    def test_clean_word_that_contains(self):
+        clean_text = "night"
+        self.assertEqual(profanity.censor(clean_text), clean_text)
+
+
     def test_censorship_empty_text(self):
         empty_text = ""
         self.assertEqual(profanity.censor(empty_text), empty_text)
@@ -205,6 +225,7 @@ def setUp(self):
     def test_whitelist_words(self):
         bad_text = "I have boobs"
         censored_text = "I have ****"
+
         self.assertEqual(profanity.censor(bad_text), censored_text)
 
         # Whitelist the word `boobs`