**Preparing the Data for Text Classification**

This section covers the steps for loading, preprocessing, and transforming the raw text data into a format suitable for training and evaluating machine learning models

In [None]:
# Importing necessariey libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

In [None]:
# Step 1: Create the dataset

data = {
    'Review':[
        "This product is amazing, I love it!",
        "Wrost purchase I've ever made, completely disappointed.",
        "It's okay, not bad but not great either.",
        "The quality is fantastic, will definitely buy again.",
        "Terrible. Stopped working after a week.",
        "satisfactory, does the job well.",
        "Excellent value for money. Highly recmmended!",
        "Not what I expected, very poor quality.",
        "Decent quality for the price.",
        "Loved the product. Worth every penny."
    ],
    'Sentiment':[
        'Positive', 'Negative', 'Neutral', 'Positive', 'Negative', 'Neutral',
        'Positive', 'Negative', 'Neutral', 'Positive'
    ]
}

In [None]:
# Coverting the data into DataFrame
df = pd.DataFrame(data)

df

Unnamed: 0,Review,Sentiment
0,"This product is amazing, I love it!",Positive
1,"Wrost purchase I've ever made, completely disa...",Negative
2,"It's okay, not bad but not great either.",Neutral
3,"The quality is fantastic, will definitely buy ...",Positive
4,Terrible. Stopped working after a week.,Negative
5,"satisfactory, does the job well.",Neutral
6,Excellent value for money. Highly recmmended!,Positive
7,"Not what I expected, very poor quality.",Negative
8,Decent quality for the price.,Neutral
9,Loved the product. Worth every penny.,Positive


In [None]:
# Step 2: Preprocessingthe text data

# Creating text to lowercase
df['Review'] = df['Review'].str.lower()

In [None]:
# Remove stopwords
stop_words = stopwords.words('english')
df['Review'] = df['Review'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop_words)]))

In [None]:
# Step 3: Splitting the dataset into training and testing sets
X = df['Review']     # Features (text reviews)
y = df['Sentiment']  # Labels (sentiment)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Step 4: Vectorization (Convert text to numerical data using TF-IDF)
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

**Building a Text Classification Model**

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from nltk.tokenize import word_tokenize     # Import word_tokenize

In [None]:
# Step 5: Train an SVM classification
model = SVC(kernel='linear', random_state=42)
model.fit(X_train_tfidf, y_train)

In [None]:
# Step 6:
def predict_sentiment(new_reviews):
  # Preparing the new reviews
  preprocessed_reviews = [
      ' '.join([word for word in word_tokenize(review.lower)() if word.isalpha() and word not in stopword])
      for review in new_reviews
  ]

  # Transform the reviews using the vectorizer
  reviews_tfidf = vectorizer.transform(preprocessed_reviews)

  # Predict sentiment
  predictions = model.predict(reviews_tfidf)
  return predictions

In [None]:
# Step 7: Request user input for reviews
print("Please enter a review to classify its sentiment (type 'exit' to stop):")

while True:
  # Take input from the user
  user_input = input("Enter Review: ")

  # If user types 'exit', break out of the loop
  if user_input.lower() == 'exit':
    print("Exiting program.")
    break

  # Predict sentiment for the entered review

  predicted_sentiment = predict_sentiment([user_input])

  # Display prediction
  print(f"Review: {user_input}\nPredicted Sentiment: {predicted_sentiment[0]}\n")

Please enter a review to classify its sentiment (type 'exit' to stop):
