# Extract Data from Task 2 and Task 3 Databases
## Task 2: SQL Database (OLTP)

In [1]:
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

# Load database URL from .env
load_dotenv()
database_url = os.getenv("DATABASE_URL")
engine = create_engine(database_url, echo=False)

# Fetch data from Issue table
def fetch_sql_issues():
    query = "SELECT issue_id, description, category, priority_level, status FROM urbanpulse.issue"
    return pd.read_sql(query, engine)

# Fetch data from Vote table
def fetch_sql_votes():
    query = "SELECT vote_id, citizen_id, issue_id, priority_vote FROM urbanpulse.vote"
    return pd.read_sql(query, engine)

## Task 3: Redis Database

In [24]:
import redis
import uuid

# Redis connection setup (assumes .env variables are set)
pool = redis.ConnectionPool(
    host=os.getenv('REDIS_HOST'),
    port=int(os.getenv('REDIS_PORT')),
    username=os.getenv('REDIS_USERNAME'),
    password=os.getenv('REDIS_PASSWORD'),
    decode_responses=True
)
r = redis.Redis(connection_pool=pool)

# Fetch Redis issues with vote totals
def fetch_redis_issues():
    issue_keys = r.keys("issue:*")
    issues = []
    for key in issue_keys[:100]:
        if not key.endswith(":votes"):
            issue_data = r.hgetall(key)
            issue_id = key.split(":")[1]
            vote_key = f"issue:{issue_id}:votes"
            total_votes = r.hget(vote_key, "total_votes") or 0
            issue_data['issue_id'] = issue_id
            issue_data['total_votes'] = int(total_votes)
            issues.append(issue_data)
    
    return pd.DataFrame(issues)

In [22]:
fetch_redis_issues()

13076


## Snowflake Setup and Data Loading

In [7]:
import snowflake.connector
from snowflake.connector.pandas_tools import write_pandas

# Snowflake connection
conn = snowflake.connector.connect(
    user=os.getenv('SNOWFLAKE_USER'),
    password=os.getenv('SNOWFLAKE_PASSWORD'),
    account=os.getenv('SNOWFLAKE_ACCOUNT'),
    warehouse=os.getenv('SNOWFLAKE_WAREHOUSE'),
    database=os.getenv('SNOWFLAKE_DATABASE'),
    schema=os.getenv('SNOWFLAKE_SCHEMA'),
    ocsp_fail_open=False
)


# Load data into Snowflake
def load_to_snowflake(df, table_name):
    cursor = conn.cursor()
    cursor.execute(f"TRUNCATE TABLE {table_name}")  # Clear existing data
    success, nchunks, nrows, _ = write_pandas(conn, df, table_name.upper())
    print(f"Loaded {nrows} rows into {table_name}")
    cursor.close()

In [11]:
# Execute setup and initial load
sql_issues = fetch_sql_issues()
sql_issues

Unnamed: 0,issue_id,description,category,priority_level,status
0,1,Broken streetlights,Infrastructure,2,Pending
1,2,Public park needs maintenance,Environment,3,Pending
2,4,Issue 0,Infrastructure,3,Pending
3,5,Issue 1,Infrastructure,3,Pending
4,6,Issue 2,Infrastructure,3,Pending
...,...,...,...,...,...
1009,1012,Issue 996,Infrastructure,3,Pending
1010,1013,Issue 997,Infrastructure,3,Pending
1011,1014,Issue 998,Infrastructure,3,Pending
1012,1015,Issue 999,Infrastructure,3,Pending


In [12]:
sql_votes = fetch_sql_votes()
sql_votes

Unnamed: 0,vote_id,citizen_id,issue_id,priority_vote
0,1,1,1,5
1,2,2,2,4
2,3,1,1,3
3,4,1,1,3
4,5,1,1,3
...,...,...,...,...
1007,1003,1,1,3
1008,1005,1,1,3
1009,1007,1,1,3
1010,1009,1,1,3


In [15]:
redis_issues = fetch_redis_issues()
redis_issues

KeyboardInterrupt: 

In [10]:

load_to_snowflake(sql_issues, 'sql_issues')
load_to_snowflake(sql_votes, 'sql_votes')
load_to_snowflake(redis_issues, 'redis_issues')

KeyboardInterrupt: 