In [65]:
import string, nltk, os, openai
from autocorrect import Speller
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize

In [66]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/sternsemasuka/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/sternsemasuka/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/sternsemasuka/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [67]:
class IntentService:
    def __init__(self):
        self.spell_checker = Speller()
        self.stop_words = set(stopwords.words('english'))
        self.stemmer = PorterStemmer()
        self.nlp = spacy.load('en_core_web_sm')
        openai.api_key = os.getenv("OPENAI_API_KEY")

    def preprocess_question(self, question):
        # Lowercase
        question = question.lower()
        # Remove punctuation and special characters
        question = question.translate(str.maketrans('', '', string.punctuation))
        # Tokenization
        tokens = word_tokenize(question)
        # Stop words removal
        tokens = [word for word in tokens if word not in self.stop_words]
        # Stemming and Lemmatization
        tokens = [self.stemmer.stem(word) for word in tokens]
        tokens = [self.nlp(word)[0].lemma_ for word in tokens]
        # Negation tracking, POS tagging, NER, and spelling correction
        revised_tokens = []
        doc = self.nlp(' '.join(tokens))
        for token in doc:
            # Correct spelling
            corrected_word = self.spell_checker(token.text)
            # Handle negation
            if "not_" in token.dep_:
                corrected_word = "not_" + corrected_word
            # POS tagging and NER are included in the Spacy pipeline
            revised_tokens.append(corrected_word)
        # Rejoin tokens and trim spaces
        question = ' '.join(revised_tokens).strip()
        return question
    
    def detect_malicious_intent(self, question):
        try: 
            response = openai.moderations.create(
                input=question,
                model="text-moderation-latest"
                )
            # checking if it is flagged or not
            is_flagged = response.results[0].flagged
            if is_flagged:
                return "This question has been flagged for malicious content and cannot be processed."
            else:
                return "No malicious intent detected."
        except Exception as e:
            print(f"Error in moderation: {e}")
            return "Unable to determine intent due to an error."



In [64]:
# Example usage
preprocessor = IntentService()
sample_question = "What's the impact of AI in Healthcare?"
processed_question = preprocessor.preprocess_question(sample_question)
print(processed_question)
final_output = preprocessor.detect_malicious_intent(processed_question)
print(final_output)

what impact ai healthcare
No malicious intent detected.
