Author: Edwina Hon Kai Xin

In [2]:
from graph_db.neo4j_handler import Neo4jHandler

neo = Neo4jHandler()

### Validate and Extract Entities

In [None]:
from graph_db.entity_extractor import EntityExtractor
from mongodb.tweet_validation import TweetValidator
import json

# Load data
with open("data/full_sentiments.json", "r") as f:
    data = [json.loads(line) for line in f]

# Validate
validator = TweetValidator()
data = validator.filter_valid(data)
# Init
extractor = EntityExtractor()

# Push tweets to Neo4j

In [2]:
# Loop through tweets and push to Neo4j
for tweet in data:
    entities = extractor.extract_entities(tweet["Tweet"])

    neo.insert_tweet_graph(
        tweet_text=tweet.get("Tweet"),
        sentiment=tweet.get("sentiment"),
        source=tweet.get("name"),
        time=tweet.get("Time"),
        entities=entities
    )

neo.close()



# Querying

In [31]:
from graph_db.tweet_graph import tweet_graph
import pprint

handler = Neo4jHandler()  
queries = tweet_graph(handler)

### Tweets of sentiments by source


In [33]:
# Get all tweets of sentiments by source
sentiments_by_source = queries.get_sentiment_by_source("The Star")
for s in sentiments_by_source:
    pprint.pprint(s)

{'count': 22, 'sentiment': 'Negative'}
{'count': 17, 'sentiment': 'Positive'}
{'count': 11, 'sentiment': 'Neutral'}


### Tweets mentioning a specific person

In [34]:
# Get all tweets mentioning "Person"
swift_tweets = queries.get_tweets_mentioning_person("Obama")
for t in swift_tweets:
    pprint.pprint(t)

{'time': 'Sat Apr 12 00:05:31 +0000 2025',
 'tweet': 'Trump replaces Obama portrait with painting of himself #FMTNews '
          '#FMTWorld'}


### More talked about topics

In [36]:
 #Get top topics
top_topics = queries.get_top_topics()
print("Top topics:", top_topics)

Top topics: [{'topic': 'Malaysian', 'count': 3}, {'topic': 'Muslims', 'count': 2}, {'topic': 'Palestinian', 'count': 2}, {'topic': 'Dewan', 'count': 2}, {'topic': 'Italian', 'count': 2}]


### Tweets from a specific source between a date range

In [25]:
jan_tweets = queries.get_tweets_by_source_and_date(
    "The Star", "2025-01-01T00:00:00", "2025-04-30T23:59:59"
)
for t in jan_tweets:
    print(t)


### Tweets about a specific location

In [38]:
# Get all tweets about "Location"
location_tweets = queries.get_tweets_about_location("China")
for t in location_tweets:
    pprint.pprint(t)

{'time': 'Sat Apr 12 01:28:23 +0000 2025',
 'tweet': 'The shifting trade dynamics between the US and China may present '
          'new openings for Malaysian entrepreneurs.'}
{'time': 'Fri Apr 11 05:03:33 +0000 2025',
 'tweet': "COMMENT | M'sia's strategic opportunity in US-China trade war"}
{'time': 'Sat Apr 12 04:28:57 +0000 2025',
 'tweet': 'Like countless other companies in the manufacturing powerhouse of '
          'Zhejiang province, its products are geared largely towards export '
          '-- a sector freshly menaced by Donald Trump’s roiling of the global '
          'economy and increasingly brutal China tariffs.'}
{'time': 'Sat Apr 12 04:00:20 +0000 2025',
 'tweet': '"Retaliatory US-China tariffs are damaging to the world, including '
          'Malaysia – a small, open, trading nation.\n'
          '\n'
          '"Still, this is no time to panic." - Tengku Zafrul'}
{'time': 'Fri Apr 11 15:41:51 +0000 2025',
 'tweet': "Sze Fei-Izzuddin gave it their all but could not ove

### Get all tweets that mention both a person and an organisation

In [21]:
collab_mentions = queries.get_joint_mentions("Elon Musk", "Tesla")
for t in collab_mentions:
    print(t)

### Tweets by sentiment

In [39]:
# Get all tweets by sentiment
sentiment_tweets = queries.get_tweets_by_sentiment("Positive")
for t in sentiment_tweets:
    pprint.pprint(t)

{'time': 'Sat Apr 12 03:40:15 +0000 2025',
 'tweet': '"Can we have the best of both worlds? Big, sporty, exciting, '
          'luxurious cars that are also economical to buy and run?\n'
          '\n'
          '"Simply put, yes." - Ravindran Kurusamy'}
{'time': 'Sat Apr 12 03:30:16 +0000 2025',
 'tweet': 'These toys have managed to transcend the traditional toy market, '
          'appealing to the emotional and psychological needs of young adults.'}
{'time': 'Sat Apr 12 02:15:11 +0000 2025',
 'tweet': 'Fahmi expressed optimism that the visit would not only strengthen '
          'economic ties but also deepen cultural and people-to-people '
          'connections between the two nations.'}
{'time': 'Sat Apr 12 01:28:23 +0000 2025',
 'tweet': 'The shifting trade dynamics between the US and China may present '
          'new openings for Malaysian entrepreneurs.'}
{'time': 'Fri Apr 11 15:09:38 +0000 2025',
 'tweet': '“Several Thai businesses are with us today to introduce authentic '


### Most referenced organisations

In [41]:
top_orgs = queries.get_most_referenced_organisations(limit=5)
print("Top referenced organizations:", top_orgs)

Top referenced organizations: [{'organisation': 'Ayer Kuning', 'references': 13}, {'organisation': 'Trump', 'references': 8}, {'organisation': 'Asean', 'references': 5}, {'organisation': 'BN', 'references': 4}, {'organisation': 'Church', 'references': 4}]


### Get tweets about a topic and a location

In [28]:
climate_california = queries.get_tweets_by_topic_and_location("Climate", "California")
for t in climate_california:
    print(t)

In [43]:
sentiment_counts = queries.sentiment_distribution()
print("Overall sentiment distribution:", sentiment_counts)

Overall sentiment distribution: [{'sentiment': 'Negative', 'count': 111}, {'sentiment': 'Neutral', 'count': 101}, {'sentiment': 'Positive', 'count': 88}]


In [44]:
handler.close()