In [1]:
import pandas as pd
from pymongo import MongoClient

MONGO_URI = "mongodb://localhost:27017/"
DB_NAME = "CS3203"
COLLECTION_CATEGORIES = "categories"
COLLECTION_SOURCES = "sources"
COLLECTION_COMPLAINTS = "complaints"

client = MongoClient(MONGO_URI)
db = client[DB_NAME]
categories_collection = db[COLLECTION_CATEGORIES]

data = [
    {"name": "Public Safety", "color": "#FF5733"},  # Vibrant Red-Orange
    {"name": "Financial", "color": "#2E86C1"},  # Deep Blue
    {"name": "Transportation", "color": "#1ABC9C"},  # Teal
    {"name": "Technology", "color": "#8E44AD"},  # Purple
    {"name": "Food Services", "color": "#E67E22"},  # Warm Orange
    {"name": "Housing", "color": "#27AE60"},  # Forest Green
    {"name": "Employment", "color": "#F1C40F"},  # Bright Yellow
    {"name": "Retail", "color": "#D35400"},  # Burnt Orange
    {"name": "Healthcare", "color": "#E74C3C"},  # Bold Red
    {"name": "Social Services", "color": "#5DADE2"},  # Sky Blue
    {"name": "Education", "color": "#9B59B6"},  # Soft Purple
    {"name": "Environmental", "color": "#2ECC71"},  # Fresh Green
    {"name": "Noise", "color": "#F39C12"},  # Golden Yellow
    {"name": "Infrastructure", "color": "#34495E"},  # Dark Gray-Blue
    {"name": "Others", "color": "#95A5A6"}  # Neutral Gray
]

categories = {i["name"] for i in data}

categories_collection.drop()
categories_collection.insert_many(data)
categories_collection.create_index([("name", 1)], unique=True)

print("Categories inserted successfully!")

client = MongoClient(MONGO_URI)
db = client[DB_NAME]
sources_collection = db[COLLECTION_SOURCES]

data = [
    {"name": "Reddit", "color": "#FF5733"}  # Vibrant Red-Orange
]

sources_collection.drop()
sources_collection.insert_many(data)
sources_collection.create_index([("name", 1)], unique=True)

print("Sources inserted successfully!")

raw_df = pd.read_csv("./data/sentiment_scored_2023_data.csv")
raw_df = raw_df[raw_df["Intent Category"] == "Direct Complaint"]
clean_df = pd.DataFrame()
clean_df["id"] = raw_df["name"]
clean_df["title"] = raw_df["title"]
clean_df["description"] = raw_df["selftext"].fillna("")
clean_df["category"] = raw_df["Domain Category"].apply(lambda i: i if i in categories else "Others")
clean_df["date"] = pd.to_datetime(raw_df["created_utc"])
clean_df["sentiment"] = raw_df["sentiment_title_selftext_polarity"]
clean_df["url"] = raw_df["name"].apply(lambda i: f"https://www.reddit.com/r/singapore/comments/{i}")
clean_df["source"] = "Reddit"
clean_df = clean_df.dropna()

client = MongoClient(MONGO_URI)  
db = client[DB_NAME]  
complaints_collection = db[COLLECTION_COMPLAINTS]  

data = clean_df.to_dict(orient="records")

complaints_collection.drop()
complaints_collection.insert_many(data)
complaints_collection.create_index([("title", "text"), ("description", "text")])

print("Complaints inserted successfully!")

Categories inserted successfully!
Sources inserted successfully!
Complaints inserted successfully!
