In [18]:
import nltk
import enchant   #for spelling correction and checking
from nltk.metrics import edit_distance  # to find the case where spelling correction is needed
d = enchant.Dict("en_US")

from sklearn.feature_extraction.text import CountVectorizer
from nltk.corpus import wordnet as wn,stopwords
from nltk import pos_tag
from nltk.stem import WordNetLemmatizer
from collections import defaultdict

import pickle
import re

In [3]:



#Lets define some function for replacement of common sentece use cases
replacement_patterns = [
 (r'won\'t', 'will not'),
 (r'can\'t', 'cannot'),
 (r'i\'m', 'i am'),
 (r'', ''),
 (r'wanna', 'want'),
 (r'gonna', 'going to'),
 (r'ain\'t', 'is not'),
 (r'(\w+)\'ll', '\g<1> will'),
 (r'(\w+)n\'t', '\g<1> not'),
 (r'(\w+)\'ve', '\g<1> have'),
 (r'(\w+)\'s', '\g<1> is'),
 (r'(\w+)\'re', '\g<1> are'),
 (r'(\w+)\'d', '\g<1> would')
]
patterns = [(re.compile(regex), repl) for (regex, repl) in replacement_patterns]

def replace_function(text):
    s = text
    for (pattern, repl) in patterns:
        s = re.sub(pattern, repl, s)
    return s




#the following function remove stopwords
def remove_stopwords(text):
    stopwords_list=stopwords.words("english")
    text_without_stopword=""
    for i in str(text).split():
        if i not in stopwords_list:
            text_without_stopword=text_without_stopword+" "+str(i).lower()
    return text_without_stopword.strip()




#the following function is used for spelling checking and correction
def correct_spellings_all(text):
    words = text.split()
    corrected_words = []
    for word in words:
        if d.check(word):
            corrected_words.append(word)
        else:
            suggestions = d.suggest(word)
            if suggestions:
                if (edit_distance(word,suggestions[0])>1):
                    corrected_words.append(suggestions[0])
                else:
                    corrected_words.append(word)
            else:
                corrected_words.append(word)
    return ' '.join(corrected_words)




#the follwing function is used for lammetizing by finding the POS
tag_map = defaultdict(lambda : wn.NOUN)
tag_map['J'] = wn.ADJ
tag_map['V'] = wn.VERB
tag_map['R'] = wn.ADV

lemmatizer = WordNetLemmatizer()


def lemmatize_sentence(text):
    words = nltk.word_tokenize(text)
    corrected_words = []
    for token, tag in pos_tag(words):
        lemma = lemmatizer.lemmatize(token, tag_map[tag[0]])
        corrected_words.append(lemma)
    return ' '.join(corrected_words)

In [4]:
cv = CountVectorizer(ngram_range=(1,2))

In [58]:
import numpy as np

def preprocess_text(text):
    # Apply preprocessing steps to the text
    processed_text = replace_function(text)
    processed_text = remove_stopwords(processed_text)
    processed_text = correct_spellings_all(processed_text)
    processed_text = lemmatize_sentence(processed_text)
    return processed_text

def expression_check(prediction_input):
    if prediction_input == 0:
        return "It has Negative Sentiment."
    elif prediction_input == 1:
        return "It has Positive Sentiment."
    else:
        return "Invalid Statement."

def predict_from_user_input(user_input, model, cv):
    # Preprocess the user input
    processed_input = preprocess_text(user_input)
    
    # Transform the preprocessed input into numerical features
    input_data = cv.transform([processed_input])
    
    # Make predictions using the trained model
    predicted_class = model.predict(input_data)

    predicted_probabilities = model.predict_proba(input_data)
    predicted_class_index = np.argmax(predicted_probabilities)
    probability_predicted = model.predict_proba(input_data)
    confidence = predicted_probabilities[0, predicted_class_index]
    print(probability_predicted,confidence)
    
    prediction_msg =expression_check(predicted_class)

    return prediction_msg

# Load the saved model
with open('sentiment_analysis_model.pkl', 'rb') as f:
    model = pickle.load(f)

# Load the CountVectorizer
with open('cv.pkl', 'rb') as f:
    cv = pickle.load(f)

## answer is 112669

In [60]:
user_input = "I am happy today and I want to dance."
result = predict_from_user_input(user_input, model, cv)
print(result)

[[0.41742857 0.58257143]] 0.5825714285714284
It has Positive Sentiment.


In [None]:
112669