<a href="https://colab.research.google.com/github/ravi-3690/ML-WORKSHOP-PROJECTS/blob/main/NLP(29nov).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#import libraries
import nltk, string, pickle  #pickle convert source code to the model
from nltk.corpus import movie_reviews, stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [None]:
#Load and suffle data
nltk.download(['movie_reviews','stopwords','wordnet'])
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]
import random
random.shuffle(documents)
reviews, sentiments = zip(*documents)

[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [None]:
#Preprocess text
def preprocess(review):
  lemmatizer = WordNetLemmatizer()
  stop_words = set(stopwords.words('english'))
  return ' '.join([lemmatizer.lemmatize(word.lower()) for word in review if word not in string.punctuation and word.lower() not in stop_words])
processed_reviews = [preprocess(review) for review in reviews]

In [None]:
#Train test split
X_train, X_test, y_train, y_test = train_test_split(processed_reviews, sentiments, test_size=0.2, random_state=42)

In [None]:
#Vectorize model
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec, X_test_vec = vectorizer.fit_transform(X_train), vectorizer.transform(X_test)
model = LogisticRegression()
model.fit(X_train_vec, y_train)

In [None]:
#Evaluate
y_pred = model.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.8375
Report:
               precision    recall  f1-score   support

         neg       0.84      0.84      0.84       207
         pos       0.83      0.83      0.83       193

    accuracy                           0.84       400
   macro avg       0.84      0.84      0.84       400
weighted avg       0.84      0.84      0.84       400



In [None]:
#Save model and  Vectorizer
with open('sentiment_model.pkl', 'wb') as m, open('tfidf_vectorizer.pkl', 'wb') as v:
    pickle.dump(model, m)
    pickle.dump(vectorizer, v)

In [None]:
import pickle
import string
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [None]:
#Load the trained model and vectorizer
with open('sentiment_model.pkl', 'rb') as model_file, open('tfidf_vectorizer.pkl', 'rb') as vectorizer_file:
    model = pickle.load(model_file)
    vectorizer = pickle.load(vectorizer_file)

In [None]:
#Function for text preprocessing
def preprocess_input(text):
    lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words('english'))
    text = text.lower()
    text = ' '.join([char for char in text if char not in string.punctuation])
    words = text.split()
    words = [word for word in words if word not in stop_words]
    words = [lemmatizer.lemmatize(word) for word in words]
    return ' '.join(words)

In [None]:
#Customer input review
custom_review = "i enjoyed it"


In [None]:
#Preprocess and transform the input
processed_review = preprocess_input(custom_review)
transformed_review = vectorizer.transform([processed_review])

In [None]:
#predict sentiment
predicted_sentiment = model.predict(transformed_review)[0]

In [None]:
#output result
print(f"Review: {custom_review}")
print(f"Predicted Sentiment: {predicted_sentiment}")

Review: i enjoyed it
Predicted Sentiment: pos
