In [69]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [70]:
%cd /content/drive/MyDrive/BdCalling_IT_Project

/content/drive/MyDrive/BdCalling_IT_Project


In [71]:
# Import libraries
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
import pickle
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import nltk

In [72]:
# Download NLTK data if necessary
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [73]:
# Initialize stopwords and lemmatizer
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

In [74]:
# Define preprocessing function
def preprocess_text(text):
    text = re.sub(r'http\S+|www\S+|@\w+|#\w+', '', text)  # Remove URLs, mentions, hashtags
    text = re.sub(r'[^a-zA-Z\s]', '', text)              # Remove special characters and numbers
    tokens = word_tokenize(text.lower())                # Tokenize and lowercase
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]  # Lemmatize
    return ' '.join(tokens)

In [75]:
# Load saved model
model = load_model('sentiment_ann_model.h5')



In [76]:
# Load saved TF-IDF vectorizer
with open('tfidf_vectorizer.pkl', 'rb') as f:
    tfidf = pickle.load(f)

In [77]:
import pickle
import os

current_directory = os.getcwd()

file_path = os.path.join(current_directory, 'tfidf_vectorizer.pkl')

# Check if the file exists
if os.path.exists(file_path):
    with open(file_path, 'rb') as f:
        scaler = pickle.load(f)
else:
    print(f"Error: File not found at {file_path}")

In [95]:
# Example test data (new tweets)
test_data = pd.DataFrame({
    'text': [
        "I absolutely love this product, it's fantastic!",  # Positive
        "This is okay, not the best but not bad either.",   # Neutral
        "I'm really unhappy with the service, very poor."   # Negative
    ],
    'user_followers': [300, 150, 50],  # Example numerical features
    'user_friends': [400, 200, 75]    # Example numerical features
})

In [96]:
# Preprocess test text data
test_data['cleaned_text'] = test_data['text'].apply(preprocess_text)

In [97]:
# Debug: Check preprocessed text
print("\nCleaned Test Data:")
print(test_data[['text', 'cleaned_text']])


Cleaned Test Data:
                                              text  \
0  I absolutely love this product, it's fantastic!   
1   This is okay, not the best but not bad either.   
2  I'm really unhappy with the service, very poor.   

                        cleaned_text  
0  absolutely love product fantastic  
1               okay best bad either  
2     im really unhappy service poor  


In [98]:
# Transform text data into TF-IDF features
text_features = tfidf.transform(test_data['cleaned_text'])

In [99]:
# Debug: TF-IDF features
print("\nTF-IDF Feature Shape:", text_features.shape)


TF-IDF Feature Shape: (3, 5000)


In [100]:
# Scale numerical features
import pandas as pd
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
test_data[['user_followers', 'user_friends']] = scaler.fit_transform(test_data[['user_followers', 'user_friends']])

In [101]:
# Debug: Scaled numerical features
print("\nScaled Numerical Features:")
print(test_data[['user_followers', 'user_friends']])


Scaled Numerical Features:
   user_followers  user_friends
0        1.297771      1.307403
1       -0.162221     -0.186772
2       -1.135550     -1.120631


In [102]:
# Combine TF-IDF features and scaled numerical features
test_features = np.hstack((text_features.toarray(), test_data[['user_followers', 'user_friends']].values))

In [103]:
# Make predictions
predictions = model.predict(test_features)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step


In [104]:
# Map predictions to sentiment labels
label_mapping = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
predicted_labels = np.argmax(predictions, axis=1)
predicted_sentiments = [label_mapping[label] for label in predicted_labels]

In [105]:
# Output predictions
test_data['Predicted Sentiment'] = predicted_sentiments

In [106]:
# Print results
print("\nFinal Results:")
print(test_data[['text', 'Predicted Sentiment']])


Final Results:
                                              text Predicted Sentiment
0  I absolutely love this product, it's fantastic!            Positive
1   This is okay, not the best but not bad either.            Positive
2  I'm really unhappy with the service, very poor.            Negative
