In [29]:
!pip install pandas



# Import Libraries!

In [27]:
from sqlalchemy.orm import sessionmaker
import snowflake.connector
import redis
import pandas as pd
import matplotlib.pyplot as plt
import os
from sqlalchemy import create_engine, text
from dotenv import load_dotenv
from sqlalchemy import Column, Integer, String, Text, Float, TIMESTAMP
from sqlalchemy.ext.declarative import declarative_base
from snowflake.connector.pandas_tools import write_pandas


# Get environment variables

In [8]:
load_dotenv()

DATABASE_URL = os.getenv('DATABASE_URL')
REDIS_HOST = os.getenv('REDIS_HOST')
REDIS_PORT = int(os.getenv('REDIS_PORT'))
REDIS_USERNAME = os.getenv('REDIS_USERNAME')
REDIS_PASSWORD = os.getenv('REDIS_PASSWORD')
SNOWFLAKE_USER = os.getenv('SNOWFLAKE_USER')
SNOWFLAKE_PASSWORD = os.getenv('SNOWFLAKE_PASSWORD')
SNOWFLAKE_ACCOUNT = os.getenv('SNOWFLAKE_ACCOUNT')
SNOWFLAKE_WAREHOUSE = os.getenv('SNOWFLAKE_WAREHOUSE')
SNOWFLAKE_DATABASE = os.getenv('SNOWFLAKE_DATABASE')
SNOWFLAKE_SCHEMA = os.getenv('SNOWFLAKE_SCHEMA')

# OLTP setup

In [10]:
postgres_engine = create_engine(os.getenv("DATABASE_URL"))
with postgres_engine.connect() as conn:
    df_issues = pd.read_sql(text("SELECT * FROM urbanpulse.issue"), conn)
    df_votes = pd.read_sql(text("SELECT * FROM urbanpulse.vote"), conn)

print("Extracted issues:")
print(df_issues.head())
print("Extracted votes:")
print(df_votes.head())

Extracted issues:
   issue_id  citizen_id                    description        category  \
0         1           1            Broken streetlights  Infrastructure   
1         2           2  Public park needs maintenance     Environment   
2         4           1                        Issue 0  Infrastructure   
3         5           1                        Issue 1  Infrastructure   
4         6           1                        Issue 2  Infrastructure   

   priority_level  latitude  longitude   status                 created_at  
0               2   40.7128    -74.006  Pending 2025-03-04 07:24:24.420794  
1               3   40.7138    -74.007  Pending 2025-03-04 07:24:24.420805  
2               3   12.3400     56.780  Pending 2025-03-04 07:24:25.537125  
3               3   12.3400     56.780  Pending 2025-03-04 07:24:26.113725  
4               3   12.3400     56.780  Pending 2025-03-04 07:24:26.687319  
Extracted votes:
   vote_id  citizen_id  issue_id  priority_vote           

# Redis setup

In [12]:
r = redis.Redis(
    host=os.getenv("REDIS_HOST"),
    port=int(os.getenv("REDIS_PORT")),
    username=os.getenv("REDIS_USERNAME"),
    password=os.getenv("REDIS_PASSWORD"),
    decode_responses=True
)

# Snowflake setup

In [16]:
sf_conn = snowflake.connector.connect(
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    account=SNOWFLAKE_ACCOUNT,
    warehouse=SNOWFLAKE_WAREHOUSE,
    database=SNOWFLAKE_DATABASE,
    schema=SNOWFLAKE_SCHEMA,
    ocsp_fail_open=False
)

sf_cursor = sf_conn.cursor()

In [17]:
# For demonstration, assume we have one urban issue and want to capture its vote count.
issue_key = "issue:" + df_issues.iloc[0]["issue_id"].__str__()
redis_votes = r.hget(issue_key + ":votes", "total_votes")
redis_votes = int(redis_votes) if redis_votes is not None else 0

In [18]:
# Create a DataFrame to hold Redis analytics data.
df_redis = pd.DataFrame([{"issue_id": df_issues.iloc[0]["issue_id"], "redis_votes": redis_votes}])
print("Extracted Redis votes:")
print(df_redis)

Extracted Redis votes:
   issue_id  redis_votes
0         1            0


In [26]:
# Create staging tables for issues and votes (if not exist)
create_issues_table = f"""
CREATE OR REPLACE TABLE issues_analytics (
    issue_id NUMBER,
    citizen_id NUMBER,
    description TEXT,
    category VARCHAR,
    priority_level NUMBER,
    latitude FLOAT,
    longitude FLOAT,
    status VARCHAR,
    created_at TIMESTAMP
)
"""
create_votes_table = f"""
CREATE OR REPLACE TABLE votes_analytics (
    vote_id NUMBER,
    citizen_id NUMBER,
    issue_id NUMBER,
    priority_vote NUMBER,
    created_at TIMESTAMP
)
"""

sf_cursor.execute("USE DATABASE SF_SAMPLE;")
sf_cursor.execute("USE SCHEMA urbanpulse;") 

sf_cursor.execute(create_issues_table)
sf_cursor.execute(create_votes_table)

<snowflake.connector.cursor.SnowflakeCursor at 0x13b8052b0>

In [30]:
# Load data from DataFrames into Snowflake tables using write_pandas
write_pandas(sf_conn, df_issues, "ISSUES_ANALYTICS")
write_pandas(sf_conn, df_votes, "VOTES_ANALYTICS")


MissingDependencyError: Missing optional dependency: pandas

# Function from Task 2 to report an issue

In [24]:
def report_issue(citizen_id, description, category, priority_level, latitude, longitude):
    session = Session()
    issue = Issue(citizen_id=citizen_id, description=description, category=category,
                  priority_level=priority_level, latitude=latitude, longitude=longitude, status="Pending")
    session.add(issue)
    session.commit()
    issue_id = issue.issue_id
    session.close()
    return issue_id

# Function from Task 3 to create a vote

In [25]:
def create_vote(issue_id, user_id):
    issue_key = f"issue:{issue_id}"
    vote_key = f"vote:{issue_id}:{user_id}"
    expires_at = r.hget(issue_key, "expires_at")
    if not expires_at or time.time() > float(expires_at):
        return False
    if r.exists(vote_key):
        return False
    r.hincrby(issue_key + ":votes", "total_votes", 1)
    r.set(vote_key, "VOTED")
    return True

# Load functions (simplified from Step 1)

In [26]:
def load_oltp_to_snowflake():
    session = Session()
    sf_cursor = sf_conn.cursor()
    issues = session.execute("SELECT issue_id, citizen_id, description, category, priority_level, status, created_at FROM urbanpulse.issue").fetchall()
    sf_cursor.execute("TRUNCATE TABLE urbanpulse.issue")  # Reset for demo
    sf_cursor.executemany(
        "INSERT INTO urbanpulse.issue (issue_id, citizen_id, description, category, priority_level, status, created_at) VALUES (%s, %s, %s, %s, %s, %s, %s)",
        issues
    )
    sf_conn.commit()
    session.close()
    sf_cursor.close()

def load_redis_to_snowflake():
    sf_cursor = sf_conn.cursor()
    issues = []
    cursor = "0"
    while cursor != "0":
        cursor, keys = r.scan(cursor=cursor, match="issue:*", count=100)
        for key in keys:
            if key.endswith(":votes"):
                continue
            issue_id = key.split(":")[1]
            issue_data = r.hgetall(key)
            total_votes = r.hget(key + ":votes", "total_votes") or 0
            issues.append((issue_id, issue_data.get("title"), issue_data.get("description"), float(issue_data.get("expires_at")), int(total_votes)))
    sf_cursor.execute("TRUNCATE TABLE urbanpulse.redis_issues")  # Reset for demo
    sf_cursor.executemany(
        "INSERT INTO urbanpulse.redis_issues (issue_id, title, description, expires_at, total_votes) VALUES (%s, %s, %s, TO_TIMESTAMP(%s), %s)",
        issues
    )
    sf_conn.commit()
    sf_cursor.close()

# Demonstration

In [27]:
sf_cursor = sf_conn.cursor()

# Initial OLTP Analytics

In [29]:
print("Initial Citizen Engagement:")
initial = sf_cursor.execute("SELECT * FROM urbanpulse.citizen_engagement WHERE citizen_id = 1").fetchone()
print(f"Citizen 1 - Issues Reported: {initial[2]}, Votes Cast: {initial[3]}")

Initial Citizen Engagement:


ProgrammingError: 090105 (22000): Cannot perform SELECT. This session does not have a current database. Call 'USE DATABASE', or use a qualified name.

# Perform OLTP Operation: Report a new issue

In [None]:
report_issue(1, "New pothole", "Infrastructure", 2, 40.7128, -74.0060)
load_oltp_to_snowflake()

# Updated OLTP Analytics

In [None]:
print("\nUpdated Citizen Engagement after Reporting Issue:")
updated = sf_cursor.execute("SELECT * FROM urbanpulse.citizen_engagement WHERE citizen_id = 1").fetchone()
print(f"Citizen 1 - Issues Reported: {updated[2]}, Votes Cast: {updated[3]}")

# Initial Redis Analytics

In [None]:
print("\nInitial Top Redis Issue:")
initial_redis = sf_cursor.execute("SELECT * FROM urbanpulse.top_redis_issues LIMIT 1").fetchone()
print(f"Issue {initial_redis[0]} - Title: {initial_redis[1]}, Votes: {initial_redis[2]}"

# Perform Redis Operation: Cast votes

In [None]:
issue_id = initial_redis[0]
for i in range(5):
    create_vote(issue_id, f"user_new_{i}")
load_redis_to_snowflake()

# Updated Redis Analytics

In [None]:
print("\nUpdated Top Redis Issue after Voting:")
updated_redis = sf_cursor.execute("SELECT * FROM urbanpulse.top_redis_issues LIMIT 1").fetchone()
print(f"Issue {updated_redis[0]} - Title: {updated_redis[1]}, Votes: {updated_redis[2]}")

sf_conn.close()