# Initialization

In [None]:
!pip install 'snowflake-connector-python[pandas]' redis faker

Collecting redis
  Downloading redis-5.2.1-py3-none-any.whl.metadata (9.1 kB)
Downloading redis-5.2.1-py3-none-any.whl (261 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.5/261.5 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: redis
Successfully installed redis-5.2.1


In [None]:
from sqlalchemy.orm import sessionmaker
import snowflake.connector
import redis
import pandas as pd
import matplotlib.pyplot as plt
import os
from sqlalchemy import create_engine, text
from dotenv import load_dotenv
from sqlalchemy import Column, Integer, String, Text, Float, TIMESTAMP
from sqlalchemy.ext.declarative import declarative_base
from snowflake.connector.pandas_tools import write_pandas

In [None]:
DATABASE_URL = "postgresql://student:HSUnivPGTests981@database-1.c10eiwo8yylk.ap-south-1.rds.amazonaws.com:5432/postgres"
REDIS_HOST = 'redis-10515.c330.asia-south1-1.gce.redns.redis-cloud.com'
REDIS_PORT = 10515

REDIS_USERNAME = 'default'
REDIS_PASSWORD = 'lWmcwbRcHhQrogqmeGPkl4VECEz5who0'

SNOWFLAKE_USER = 'student'
SNOWFLAKE_PASSWORD = 'HSUnivSFTests970'
SNOWFLAKE_ACCOUNT = 'gkb48589'
SNOWFLAKE_WAREHOUSE = 'COMPUTE_s'
SNOWFLAKE_DATABASE = 'DWH'
SNOWFLAKE_SCHEMA = 'SF_SAMPLE.PUBLIC'

# 1. Postgres Database
Here we fetch the issues from `Postgres` database.

In [None]:
postgres_engine = create_engine(DATABASE_URL)
with postgres_engine.connect() as conn:
    df_issues = pd.read_sql(text("SELECT * FROM urbanpulse.issue"), conn)

print("Extracted issues:")
df_issues.sample(5)

Extracted issues:


Unnamed: 0,issue_id,citizen_id,description,category,priority_level,latitude,longitude,status,created_at
32,337,11,Damaged speed bumps on Oak Street need fixing.,Road Repair,3,12.34,56.78,Pending,2025-03-04 07:28:02.975096
58,661,41,Discolored tap water reported in multiple areas.,Water Supply,3,12.34,56.78,Pending,2025-03-04 07:31:12.845736
79,836,70,Streetlight at Elm St. is flickering at night.,Streetlights,3,12.34,56.78,Pending,2025-03-04 07:32:55.526296
20,211,11,Malfunctioning LED streetlights creating safet...,Streetlights,3,12.34,56.78,Pending,2025-03-04 07:26:49.262204
61,676,25,Illegal dumping reported near the highway.,Waste Management,3,12.34,56.78,Pending,2025-03-04 07:31:21.629514


# 2. Redis Database
We then get the total number of votes from `Redis` database.

In [None]:
import random
import redis
import pandas as pd
from tqdm import tqdm

# Redis connection
r = redis.Redis(
    host=REDIS_HOST,
    port=REDIS_PORT,
    username=REDIS_USERNAME,
    password=REDIS_PASSWORD,
    decode_responses=True
)
ISSUE_PREFIX = "issue:"

In [None]:
def get_votes_per_issue():
    """Fetches total votes per issue ID from Redis."""
    vote_counts = {}

    # Fetch all keys matching the voting pattern
    all_keys = r.keys(f"{ISSUE_PREFIX}*:votes")

    for key in tqdm(all_keys):
        issue_id = key.split(":")[1]  # Extract issue ID from key
        vote_count = r.hget(key, "total_votes")  # Get vote count

        if vote_count:
            vote_counts[issue_id] = int(vote_count)

    return vote_counts

# Fetch vote counts
votes_per_issue = get_votes_per_issue()
df_redis = pd.DataFrame(votes_per_issue.items(), columns=['issue_id', 'redis_votes'])

100%|██████████| 100/100 [00:22<00:00,  4.49it/s]


In [None]:
df_redis

Unnamed: 0,issue_id,redis_votes
0,98,15
1,120,68
2,659,25
3,743,155
4,662,5
...,...,...
95,908,113
96,781,31
97,118,86
98,641,84


# 3. Snowflake Database
We push both tables from the previous steps to the `Snowflake` database.

In [None]:
sf_conn = snowflake.connector.connect(
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    account=SNOWFLAKE_ACCOUNT,
    warehouse=SNOWFLAKE_WAREHOUSE,
    database=SNOWFLAKE_DATABASE,
    schema=SNOWFLAKE_SCHEMA,
    ocsp_fail_open=False
)

sf_cursor = sf_conn.cursor()

In [None]:
# Create staging tables for issues and votes (if not exist)
create_issues_table = f"""
CREATE OR REPLACE TABLE issues_analytics (
    issue_id NUMBER,
    citizen_id NUMBER,
    description TEXT,
    category VARCHAR,
    priority_level NUMBER,
    latitude FLOAT,
    longitude FLOAT,
    status VARCHAR,
    created_at TIMESTAMP
)
"""
create_votes_table = f"""
CREATE OR REPLACE TABLE votes_analytics (
    vote_id NUMBER,
    citizen_id NUMBER,
    issue_id NUMBER,
    priority_vote NUMBER,
    created_at TIMESTAMP
)
"""

sf_cursor.execute("USE DATABASE SF_SAMPLE;")
sf_cursor.execute("USE SCHEMA urbanpulse;")

sf_cursor.execute(create_issues_table)
sf_cursor.execute(create_votes_table)

<snowflake.connector.cursor.SnowflakeCursor at 0x78985138c450>

In [None]:
write_pandas(sf_conn, df_issues, "ISSUES_ANALYTICS", quote_identifiers=False)

(True,
 1,
 100,
 [('fhdspclnsj/file0.txt', 'LOADED', 100, 100, 1, 0, None, None, None, None)])

In [None]:
create_redis_votes_table = f"""
CREATE OR REPLACE TABLE redis_votes_analytics (
    issue_id NUMBER,
    redis_votes NUMBER
)
"""
sf_cursor.execute(create_redis_votes_table)
write_pandas(sf_conn, df_redis, "REDIS_VOTES_ANALYTICS", quote_identifiers=False)

sf_conn.commit()
print("Data loaded into Snowflake.")

Data loaded into Snowflake.


In [None]:
create_data_mart = f"""
CREATE OR REPLACE VIEW issues_summary AS
SELECT
    i.issue_id,
    i.description,
    i.category,
    i.priority_level,
    i.status,
    COALESCE(r.redis_votes, 0) AS total_votes,
FROM issues_analytics i
LEFT JOIN redis_votes_analytics r ON i.issue_id = r.issue_id
GROUP BY i.issue_id, i.description, i.category, i.priority_level, i.status, r.redis_votes
ORDER BY total_votes DESC
"""
sf_cursor.execute(create_data_mart)
sf_conn.commit()
print("Analytical data mart (view) created.")

Analytical data mart (view) created.


# 4. Add more citizens to the Postgres Database

In [None]:
import random
import pandas as pd
from faker import Faker
from sqlalchemy import create_engine, text

# Initialize Faker for generating realistic data
fake = Faker()

# PostgreSQL connection
postgres_engine = create_engine(DATABASE_URL)

def generate_citizen():
    """Generates a citizen's details with trimmed values to fit database constraints."""
    first_name = fake.first_name()[:20]  # Ensure it fits in VARCHAR(20)
    last_name = fake.last_name()[:20]  # Ensure it fits in VARCHAR(20)
    full_name = f"{first_name} {last_name}"[:40]  # Ensure it fits in VARCHAR(40)
    sex = random.choice(["Male", "Female"])  # Assuming sex is VARCHAR(10)
    email = fake.email()[:50]  # Limit email to 50 characters
    contact_number = fake.phone_number()[:20]  # Limit contact number to 20 characters
    password = fake.password(length=12)[:20]  # Limit password to 20 characters
    address = fake.address()[:100]  # Assuming address is VARCHAR(100)

    return {
        "first_name": first_name,
        "last_name": last_name,
        "full_name": full_name,
        "sex": sex,
        "email": email,
        "contact_number": contact_number,
        "password": password,
        "address": address
    }

def insert_citizens(num_citizens=50):
    """Inserts 10 randomly generated citizens into the database."""
    insert_query = text("""
        INSERT INTO urbanpulse.citizen (first_name, last_name, full_name, sex, email, contact_number, password, address)
        VALUES (:first_name, :last_name, :full_name, :sex, :email, :contact_number, :password, :address)
    """)

    with postgres_engine.connect() as conn:
        for _ in range(num_citizens):
            citizen_data = generate_citizen()
            conn.execute(insert_query, citizen_data)

        conn.commit()

    print(f"✅ Successfully inserted {num_citizens} citizens.")

In [None]:
insert_citizens(num_citizens=100)

✅ Successfully inserted 100 citizens.


In [None]:
with postgres_engine.connect() as conn:
    num_citizens = conn.execute(text("SELECT COUNT(*) FROM urbanpulse.citizen")).scalar()
print(num_citizens)

262


# 5. Update vote counts in Redis database

In [None]:
import random
import redis
from tqdm import tqdm

# Define more distinct weight factors (ensuring all weights ≤ 1)
CATEGORY_WEIGHTS = {
    'Road Repair': 0.95,        # Very high priority
    'Waste Management': 0.40,   # Lower priority
    'Streetlights': 0.30,       # Low priority
    'Public Transport': 1.00,   # Highest priority
    'Water Supply': 0.60        # Medium priority
}

def reset_and_update_votes(df_issues, num_citizens):
    """Resets previous votes and updates Redis with weighted vote counts."""
    for _, row in tqdm(df_issues.iterrows()):
        issue_id = row["issue_id"]
        category = row["category"]  # Ensure 'category' exists in df_issues
        vote_key = f"{ISSUE_PREFIX}{issue_id}:votes"

        # Get weight factor for the category (default to 1 if missing)
        weight = CATEGORY_WEIGHTS.get(category, 1.0)

        # Generate weighted votes (scaled by weight but based on total citizens)
        new_vote_count = int(random.randint(0, num_citizens) * weight)

        # Reset previous vote count
        r.delete(vote_key)

        # Store the new weighted vote count in Redis
        r.hset(vote_key, "total_votes", new_vote_count)

    print(f"✅ Updated votes for {len(df_issues)} issues in Redis with weighted distribution.")

# Fetch the total number of citizens to base vote counts on
with postgres_engine.connect() as conn:
    num_citizens = conn.execute(text("SELECT COUNT(*) FROM urbanpulse.citizen")).scalar()

# Update vote counts in Redis for all issues in df_updated_issues
reset_and_update_votes(df_issues, num_citizens)

100it [00:44,  2.25it/s]

✅ Updated votes for 100 issues in Redis with weighted distribution.





In [None]:
votes_per_issue = get_votes_per_issue()
df_redis = pd.DataFrame(votes_per_issue.items(), columns=['issue_id', 'redis_votes'])

100%|██████████| 100/100 [00:22<00:00,  4.49it/s]


# 6. Push the updates to the Snowflake database

In [None]:
create_redis_votes_table = f"""
CREATE OR REPLACE TABLE redis_votes_analytics (
    issue_id NUMBER,
    redis_votes NUMBER
)
"""
sf_cursor.execute(create_redis_votes_table)
write_pandas(sf_conn, df_redis, "REDIS_VOTES_ANALYTICS", quote_identifiers=False)

sf_conn.commit()
print("Data loaded into Snowflake.")

Data loaded into Snowflake.


In [None]:
create_data_mart = f"""
CREATE OR REPLACE VIEW issues_summary AS
SELECT
    i.issue_id,
    i.description,
    i.category,
    i.priority_level,
    i.status,
    COALESCE(r.redis_votes, 0) AS total_votes,
FROM issues_analytics i
LEFT JOIN redis_votes_analytics r ON i.issue_id = r.issue_id
GROUP BY i.issue_id, i.description, i.category, i.priority_level, i.status, r.redis_votes
ORDER BY total_votes DESC
"""
sf_cursor.execute(create_data_mart)
sf_conn.commit()
print("Analytical data mart (view) created.")

Analytical data mart (view) created.
