# Extract Data from Task 2 and Task 3 Databases
## Task 2: SQL Database (OLTP)

In [1]:
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

# Load database URL from .env
load_dotenv()
database_url = os.getenv("DATABASE_URL")
engine = create_engine(database_url, echo=False)

# Fetch data from Issue table
def fetch_sql_issues():
    query = "SELECT issue_id, description, category, priority_level, status FROM urbanpulse.issue"
    return pd.read_sql(query, engine)

# Fetch data from Vote table
def fetch_sql_votes():
    query = "SELECT vote_id, citizen_id, issue_id, priority_vote FROM urbanpulse.vote"
    return pd.read_sql(query, engine)

## Task 3: Redis Database

In [4]:
import redis
import uuid

# Redis connection setup (assumes .env variables are set)
pool = redis.ConnectionPool(
    host=os.getenv('REDIS_HOST'),
    port=int(os.getenv('REDIS_PORT')),
    username=os.getenv('REDIS_USERNAME'),
    password=os.getenv('REDIS_PASSWORD'),
    decode_responses=True
)
r = redis.Redis(connection_pool=pool)

# Fetch Redis issues with vote totals
def fetch_redis_issues():
    issue_keys = r.keys("issue:*")
    issues = []
    for key in issue_keys:
        if not key.endswith(":votes"):
            issue_data = r.hgetall(key)
            issue_id = key.split(":")[1]
            vote_key = f"issue:{issue_id}:votes"
            total_votes = r.hget(vote_key, "total_votes") or 0
            issue_data['issue_id'] = issue_id
            issue_data['total_votes'] = int(total_votes)
            issues.append(issue_data)
    
    return pd.DataFrame(issues)

In [None]:
fetch_redis_issues()

## Snowflake Setup and Data Loading

In [7]:
import snowflake.connector
from snowflake.connector.pandas_tools import write_pandas

# Snowflake connection
conn = snowflake.connector.connect(
    user=os.getenv('SNOWFLAKE_USER'),
    password=os.getenv('SNOWFLAKE_PASSWORD'),
    account=os.getenv('SNOWFLAKE_ACCOUNT'),
    warehouse=os.getenv('SNOWFLAKE_WAREHOUSE'),
    database=os.getenv('SNOWFLAKE_DATABASE'),
    schema=os.getenv('SNOWFLAKE_SCHEMA'),
    ocsp_fail_open=False
)


# Load data into Snowflake
def load_to_snowflake(df, table_name):
    cursor = conn.cursor()
    cursor.execute(f"TRUNCATE TABLE {table_name}")  # Clear existing data
    success, nchunks, nrows, _ = write_pandas(conn, df, table_name.upper())
    print(f"Loaded {nrows} rows into {table_name}")
    cursor.close()

In [11]:
# Execute setup and initial load
sql_issues = fetch_sql_issues()
sql_issues

Unnamed: 0,issue_id,description,category,priority_level,status
0,1,Broken streetlights,Infrastructure,2,Pending
1,2,Public park needs maintenance,Environment,3,Pending
2,4,Issue 0,Infrastructure,3,Pending
3,5,Issue 1,Infrastructure,3,Pending
4,6,Issue 2,Infrastructure,3,Pending
...,...,...,...,...,...
1009,1012,Issue 996,Infrastructure,3,Pending
1010,1013,Issue 997,Infrastructure,3,Pending
1011,1014,Issue 998,Infrastructure,3,Pending
1012,1015,Issue 999,Infrastructure,3,Pending


In [12]:
sql_votes = fetch_sql_votes()
sql_votes

Unnamed: 0,vote_id,citizen_id,issue_id,priority_vote
0,1,1,1,5
1,2,2,2,4
2,3,1,1,3
3,4,1,1,3
4,5,1,1,3
...,...,...,...,...
1007,1003,1,1,3
1008,1005,1,1,3
1009,1007,1,1,3
1010,1009,1,1,3


In [25]:
redis_issues = fetch_redis_issues()
redis_issues

Unnamed: 0,issue_id,citizen_id,description,category,priority_level,latitude,longitude,status,created_at,total_votes,title,expires_at
0,b0d3a690-2ded-4260-84ae-4a21b4c006fd,cffc0f89-a0dd-4f4f-942f-e63dd0e1341f,Issue 8545,Infrastructure,1,12.34,56.78,Resolved,1741180087.749919,0,,
1,5649a175-2bc5-4a0c-bf47-6e33ba649063,c7b490e4-eefb-4243-b958-fb88ada8e532,Issue 2224,Infrastructure,1,12.34,56.78,Resolved,1741180015.620401,0,,
2,c1767261-e7e5-42d5-bcca-6ca927834196,db3b7996-7bae-48a1-96b6-de272bb61dc4,Issue 7647,Infrastructure,1,12.34,56.78,Resolved,1741180077.504979,0,,
3,86210901-c5c5-44aa-af85-67640d311197,a6a58143-752d-48cd-87e7-dde2baed90a1,Issue 700,Infrastructure,1,12.34,56.78,Resolved,1741179998.206822,0,,
4,3ed696e1-0c6d-4d10-98c0-eb0027a90e65,5bb20832-e67a-46c8-b75a-eb604c896d84,Issue 1975,Infrastructure,1,12.34,56.78,Resolved,1741180012.762829,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...
90,416c66a1-b05d-40ed-9ae2-428436e25e64,9ca397c6-4649-4c1a-b786-616aeb8d892d,Issue 7623,Infrastructure,1,12.34,56.78,Resolved,1741180077.2281191,0,,
91,7a6aaa08-974b-4cc5-adc8-a9a594905686,f3a8a9f0-cc9f-47c3-953b-350130142d19,Issue 4480,Infrastructure,1,12.34,56.78,Resolved,1741180041.3572428,0,,
92,878c6cfc-76ca-4756-9d89-9aeb22e06034,9519e5c1-7378-4aa0-9f5d-d56eeed7530f,Issue 3626,Infrastructure,1,12.34,56.78,Resolved,1741180031.609694,0,,
93,9eb3f5d0-ddf5-45dc-87ee-add1f4d961ce,,Description for issue 587,,,,,,,0,Traffic Issue 587,1741671479.581232


In [28]:
load_to_snowflake(sql_issues, 'sql_issues')
load_to_snowflake(sql_votes, 'sql_votes')
load_to_snowflake(redis_issues, 'redis_issues')

ProgrammingError: 090105 (22000): Cannot perform TRUNCATE. This session does not have a current database. Call 'USE DATABASE', or use a qualified name.