In [1]:
import nltk
import enchant   #for spelling correction and checking
from nltk.metrics import edit_distance  # to find the case where spelling correction is needed
with open('enchant_dict.pkl', 'rb') as f:
    d = pickle.load(f)

from sklearn.feature_extraction.text import CountVectorizer
from nltk.corpus import wordnet as wn,stopwords
from nltk import pos_tag
from nltk.stem import WordNetLemmatizer
from collections import defaultdict

import pickle
import re

In [2]:



#Lets define some function for replacement of common sentece use cases
replacement_patterns = [
 (r'won\'t', 'will not'),
 (r'can\'t', 'cannot'),
 (r'i\'m', 'i am'),
 (r'', ''),
 (r'wanna', 'want'),
 (r'gonna', 'going to'),
 (r'ain\'t', 'is not'),
 (r'(\w+)\'ll', '\g<1> will'),
 (r'(\w+)n\'t', '\g<1> not'),
 (r'(\w+)\'ve', '\g<1> have'),
 (r'(\w+)\'s', '\g<1> is'),
 (r'(\w+)\'re', '\g<1> are'),
 (r'(\w+)\'d', '\g<1> would')
]
patterns = [(re.compile(regex), repl) for (regex, repl) in replacement_patterns]

def replace_function(text):
    s = text
    for (pattern, repl) in patterns:
        s = re.sub(pattern, repl, s)
    return s




#the following function remove stopwords
def remove_stopwords(text):
    stopwords_list=stopwords.words("english")
    text_without_stopword=""
    for i in str(text).split():
        if i not in stopwords_list:
            text_without_stopword=text_without_stopword+" "+str(i).lower()
    return text_without_stopword.strip()




#the following function is used for spelling checking and correction
def correct_spellings_all(text):
    words = text.split()
    corrected_words = []
    for word in words:
        if d.check(word):
            corrected_words.append(word)
        else:
            suggestions = d.suggest(word)
            if suggestions:
                if (edit_distance(word,suggestions[0])>1):
                    corrected_words.append(suggestions[0])
                else:
                    corrected_words.append(word)
            else:
                corrected_words.append(word)
    return ' '.join(corrected_words)




#the follwing function is used for lammetizing by finding the POS
tag_map = defaultdict(lambda : wn.NOUN)
tag_map['J'] = wn.ADJ
tag_map['V'] = wn.VERB
tag_map['R'] = wn.ADV

lemmatizer = WordNetLemmatizer()


def lemmatize_sentence(text):
    words = nltk.word_tokenize(text)
    corrected_words = []
    for token, tag in pos_tag(words):
        lemma = lemmatizer.lemmatize(token, tag_map[tag[0]])
        corrected_words.append(lemma)
    return ' '.join(corrected_words)

In [6]:
import numpy as np

def preprocess_text(text):
    # Apply preprocessing steps to the text
    processed_text = replace_function(text)
    processed_text = remove_stopwords(processed_text)
    processed_text = correct_spellings_all(processed_text)
    processed_text = lemmatize_sentence(processed_text)
    return processed_text

def expression_check(prediction_input):
    if prediction_input == 0:
        return "It has Negative Sentiment."
    elif prediction_input == 1:
        return "It has Positive Sentiment."
    else:
        return "Invalid Statement."

def predict_from_user_input(user_input, model, cv):
    # Preprocess the user input
    processed_input = preprocess_text(user_input)
    
    # Transform the preprocessed input into numerical features
    input_data = cv.transform([processed_input])
    
    # Make predictions using the trained model
    predicted_class = model.predict(input_data)

    predicted_probabilities = model.predict_proba(input_data)
    predicted_class_index = np.argmax(predicted_probabilities)
    probability_predicted = model.predict_proba(input_data)
    confidence = predicted_probabilities[0, predicted_class_index]
    print(probability_predicted,confidence)
    
    prediction_msg =expression_check(predicted_class)

    return predicted_probabilities,prediction_msg

# Load the saved model
with open('sentiment_analysis_model.pkl', 'rb') as f:
    model = pickle.load(f)

# Load the CountVectorizer
with open('cv.pkl', 'rb') as f:
    cv = pickle.load(f)

## answer is 112669

In [4]:
user_input = "I am happy today and I want to dance."
result = predict_from_user_input(user_input, model, cv)
print(result)

(array([[0.41742857, 0.58257143]]), 'It has Positive Sentiment.')


In [32]:
112669

112669

In [7]:
sentences = [
    "I am extremely happy with my new car; it drives smoothly and looks fantastic.",
    "I felt really disappointed with my performance in the meeting today.",
    "My vacation was absolutely wonderful; I enjoyed every single day.",
    "I am very frustrated with my computer; it keeps crashing and losing my work.",
    "I am proud of my achievements this year; I've worked hard and it paid off.",
    "I can't stand my new job; the work environment is toxic and stressful.",
    "I love spending time with my family; they always make me feel happy and supported.",
    "I am unhappy with my current living situation; the neighbors are too noisy.",
    "My health has improved significantly, and I feel better than ever.",
    "I regret my decision to move to this city; I feel lonely and out of place.",
    "I am excited about my upcoming project; it's going to be a great opportunity.",
    "I am worried about my financial situation; my expenses are too high.",
    "I feel confident in my abilities; I know I can handle any challenge.",
    "I am dissatisfied with my internet service; it's slow and unreliable.",
    "I am feeling very stressed about my upcoming exams.",
    "I am thrilled with my new hobby; it brings me so much joy.",
]


for user_input in sentences:
    print(user_input)
    probabilities,result = predict_from_user_input(user_input, model, cv)
    print(probabilities[0][0])
    print(f"{probabilities[0][0]:,.2f}")
    print(type(probabilities[0][0]))
    print(result)

    print("-"*40)

I am extremely happy with my new car; it drives smoothly and looks fantastic.
[[0.24002778 0.75997222]] 0.7599722222222222
0.2400277777777778
0.24
<class 'numpy.float64'>
It has Positive Sentiment.
----------------------------------------
I felt really disappointed with my performance in the meeting today.
[[0.87928175 0.12071825]] 0.8792817460317459
0.8792817460317459
0.88
<class 'numpy.float64'>
It has Negative Sentiment.
----------------------------------------
My vacation was absolutely wonderful; I enjoyed every single day.
[[0.30549603 0.69450397]] 0.6945039682539685
0.30549603174603174
0.31
<class 'numpy.float64'>
It has Positive Sentiment.
----------------------------------------
I am very frustrated with my computer; it keeps crashing and losing my work.
[[0.91295238 0.08704762]] 0.9129523809523807
0.9129523809523807
0.91
<class 'numpy.float64'>
It has Negative Sentiment.
----------------------------------------
I am proud of my achievements this year; I've worked hard and it 

In [34]:
d = enchant.Dict("en_US")

In [35]:
d

<enchant.Dict at 0x1d51304a4d0>

In [8]:
with open('enchant_dict.pkl', 'wb') as f:
    pickle.dump(d, f)