In [18]:
from de_classes.data_loader import data_loader
import json

file = "full_sentiments.json"

try:
    data = data_loader.from_json_lines(file)
    print(f"Successfully loaded {len(data)} records")
except AttributeError:
    # Method 2: Alternative approach if the import is problematic
    print("Import failed. Trying alternative approach...")
    
    # Import the specific file directly
    import sys
    import os
    
    # Add the parent directory to the path if needed
    current_dir = os.path.dirname(os.getcwd())
    if current_dir not in sys.path:
        sys.path.append(current_dir)
    
    # Re-import with the full path
    import de_classes.data_loader
    # Force reload in case it was previously imported incorrectly
    import importlib
    importlib.reload(de_classes.data_loader)
    
    # Try again
    data = de_classes.data_loader.data_loader.from_json_lines(file)
    print(f"Successfully loaded {len(data)} records with alternative method")


Successfully loaded 300 records


In [19]:
from de_classes.pymongo_utils import PyMongoUtils

# Initialize your connection
mongo_obj = PyMongoUtils()

# Define database and collection names
mongo_db = "twitter_sentiment"  
mongo_collection = "sentiment_analysis"

# Get or create collection
collection = mongo_obj.get_collection(mongo_db, mongo_collection)


In [21]:

from de_classes.sentiment_insertion import sentiment_insertion

try:
    # Insert data 
    inserter = sentiment_insertion(collection)
    inserter.insert_many(data)
    print("Data inserted successfully to MongoDB Atlas!")
    
except Exception as e:
    print(f"Error connecting to MongoDB Atlas: {e}")
    print("Please check your internet connection and verify the connection string.")



Data inserted successfully to MongoDB Atlas!


# Querying Section

### Find by sentiment

In [22]:
from de_classes.sentiment_query import sentiment_query

# Initialize the query object
query = sentiment_query(collection)

In [23]:

try:

    # Get tweets that mentionining "keyword"
    tourism_tweets = query.search_tweets("tourism")
    print(f"Found {len(tourism_tweets)} tweets mentioning 'tourism'")
    
    # print a sample:
    for tweet in tourism_tweets[:99]:
        print(tweet)

except Exception as e:
    print(f"Error connecting to MongoDB Atlas: {e}")
    print("Unable to query data")

Found 1 tweets mentioning 'tourism'
{'_id': ObjectId('67f7da10879e5494807a5dc8'), 'Tweet': 'According to Widiyanti, the tourism sector, classified as a service industry, is not subject to such tariffs and continues to generate foreign exchange.', 'prediction': 0.0, 'sentiment': 'Neutral', 'name': 'The Star'}


### Find by prediction

In [24]:
predicted_neutral = query.find_by_prediction(0)
print(f"Found {len(predicted_neutral)} tweets with prediction 0")

#only showing top 3
for doc in predicted_neutral[:]:
    print(doc)

Found 242 tweets with prediction 0
{'_id': ObjectId('67f7da10879e5494807a5dc7'), 'Tweet': '#Singapore Lee: ‚ÄúThis is going to affect our trade, it‚Äôs going to affect our economy, it‚Äôs going to affect our region, and it‚Äôs going to affect our future. And it‚Äôs not good news.‚Äù', 'prediction': 0.0, 'sentiment': 'Negative', 'name': 'The Star'}
{'_id': ObjectId('67f7da10879e5494807a5dc8'), 'Tweet': 'According to Widiyanti, the tourism sector, classified as a service industry, is not subject to such tariffs and continues to generate foreign exchange.', 'prediction': 0.0, 'sentiment': 'Neutral', 'name': 'The Star'}
{'_id': ObjectId('67f7da10879e5494807a5dca'), 'Tweet': 'Abdul Rais noted that the global economy is currently experiencing a "tariff war‚Äù and that the US remains a major trading partner for Malaysia.', 'prediction': 0.0, 'sentiment': 'Neutral', 'name': 'The Star'}
{'_id': ObjectId('67f7da10879e5494807a5dcb'), 'Tweet': '#NSTnation  "Our economy remains resilient," Anwar sa

### Find by source name

In [25]:
from_the_star = query.find_by_source("The Star")
print(f"Found {len(from_the_star)} tweets from 'The Star'")

for doc in from_the_star[:]:
    print(doc)

Found 50 tweets from 'The Star'
{'_id': ObjectId('67f7da10879e5494807a5dc7'), 'Tweet': '#Singapore Lee: ‚ÄúThis is going to affect our trade, it‚Äôs going to affect our economy, it‚Äôs going to affect our region, and it‚Äôs going to affect our future. And it‚Äôs not good news.‚Äù', 'prediction': 0.0, 'sentiment': 'Negative', 'name': 'The Star'}
{'_id': ObjectId('67f7da10879e5494807a5dc8'), 'Tweet': 'According to Widiyanti, the tourism sector, classified as a service industry, is not subject to such tariffs and continues to generate foreign exchange.', 'prediction': 0.0, 'sentiment': 'Neutral', 'name': 'The Star'}
{'_id': ObjectId('67f7da10879e5494807a5dc9'), 'Tweet': 'Abdul Rasheed said the central bank is still awaiting further details from the United States about the tariffs.', 'prediction': 2.0, 'sentiment': 'Positive', 'name': 'The Star'}
{'_id': ObjectId('67f7da10879e5494807a5dca'), 'Tweet': 'Abdul Rais noted that the global economy is currently experiencing a "tariff war‚Äù and t

### Count by sentiment

In [26]:
sentiment_summary = query.count_by_sentiment()
print("Tweet count by sentiment:")
for entry in sentiment_summary:
    print(f"{entry['_id']}: {entry['count']}")

Tweet count by sentiment:
Negative: 112
Neutral: 99
Positive: 88
Unknown: 1


### Count by source

In [27]:
source_summary = query.count_by_source()
print("Tweet count by news source:")
for entry in source_summary:
    print(f"{entry['_id']}: {entry['count']}")

Tweet count by news source:
New Straits Times: 50
theSun: 50
Free Malaysia Today: 50
Herald Malaysia: 50
malaysiakini.com: 50
The Star: 50
