<a href="https://colab.research.google.com/github/thatswhatmeetcoded/Sentiment-Classification/blob/main/decision_tree/simulation_decision_tree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from tkinter.constants import BROWSE
import pandas as pd
import numpy as np
import re
import nltk
import joblib
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.tree import DecisionTreeClassifier

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Download NLTK resources (if needed)
nltk.download('stopwords')
nltk.download('wordnet')

# Text cleaning setup
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+|www.\S+", "", text)
    text = re.sub(r"@\w+|#\w+", "", text)
    text = re.sub(r"[^\w\s]", "", text)
    words = text.split()
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    return " ".join(words)


# Paths
vec_path = '/content/drive/MyDrive/vectorizers'
model_path = '/content/drive/MyDrive/models/random_forest_bow.pkl'
pca_path = f"{vec_path}/svd_bow.pkl"
label_path = '/content/drive/MyDrive/features/y.npy'

# Load vectorizer, PCA, model
bow_vectorizer = joblib.load(f"{vec_path}/bow_vectorizer.pkl")
svd_bow = joblib.load(pca_path)
model = joblib.load(model_path)

# Load label encoder (in case you want to decode prediction)
from sklearn.preprocessing import LabelEncoder
le = joblib.load('/content/drive/MyDrive/vectorizers/label_encoder.pkl')  # Load the saved encoder

# Input example
new_text = "i am a bad boy"

# Preprocess
cleaned = clean_text(new_text)
print("Cleaned Text:", cleaned)

# Vectorize
X_vec =bow_vectorizer.transform([cleaned])

# Apply PCA
X_reduced = svd_bow.transform(X_vec)

# Predict
y_pred = model.predict(X_reduced)
sentiment_label = le.inverse_transform(y_pred)[0]

print("Predicted Sentiment:", sentiment_label)

print(dict(zip(le.transform(le.classes_), le.classes_)))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Cleaned Text: bad boy
Predicted Sentiment: neutral
{np.int64(0): 'negative', np.int64(1): 'neutral', np.int64(2): 'positive'}


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
