In [1]:
import psycopg2
from psycopg2 import sql
import numpy as np
import pandas as pd
from psycopg2.extensions import adapt, register_adapter, AsIs
from sqlalchemy import create_engine, text
import getpass

In [2]:
# Define the file paths
file_paths = ['2018_Tenure_by_Plumbing.csv', 
              '2019_Tenure_by_Plumbing.csv',
              '2020_Tenure_by_Plumbing.csv', 
              '2021_Tenure_by_Plumbing.csv', 
              '2022_Tenure_by_Plumbing.csv']

# Reading the data files into dataframes
dataframes = [pd.read_csv(file) for file in file_paths]

# Concatenate all the dataframes into one
Tenure_by_Plumbing = pd.concat(dataframes, ignore_index=True)

# Save the merged dataframe to a new CSV file
merged_file_path = 'Tenure_by_Plumbing.csv'
Tenure_by_Plumbing.to_csv(merged_file_path, index=False)

# Provide the path of the merged file
merged_file_path

'Tenure_by_Plumbing.csv'

In [None]:
database = "f24t03"
user     = input("Type username (pawprint) and hit enter: ")
password = getpass.getpass("Type password and hit enter: ")

connection = psycopg2.connect(database = database,
                              user     = user,
                              host     = 'pgsql',
                              password = password)

Type username (pawprint) and hit enter: jsmm8
Type password and hit enter: ········


In [4]:
with connection, connection.cursor() as cursor:
    cursor.execute("DROP TABLE IF EXISTS tenure_by_plumbing;")
    

    cursor.execute(
        '''
        CREATE TABLE IF NOT EXISTS tenure_by_plumbing (
    geo_id TEXT,
    block_group TEXT,
    census_tract TEXT,
    county TEXT,
    state TEXT,
    est_total NUMERIC,
    moe_total NUMERIC,
    est_own_occ NUMERIC,
    moe_own_occ NUMERIC,
    est_own_occ_plumb_complete NUMERIC,
    moe_own_occ_plumb_complete NUMERIC,
    est_own_occ_plumb_lack NUMERIC,
    moe_own_occ_plumb_lack NUMERIC,
    est_rent_occ NUMERIC,
    moe_rent_occ NUMERIC,
    est_rent_occ_plumb_complete NUMERIC,
    moe_rent_occ_plumb_complete NUMERIC,
    est_rent_occ_plumb_lack NUMERIC,
    moe_rent_occ_plumb_lack NUMERIC,
    year INT,
    PRIMARY KEY (geo_id, year)
    );
    '''
    )

In [None]:
# Securely getting the password
mypasswd = getpass.getpass("Enter your database password: ")

# Database connection parameters
username = 'jsmm8'
host = 'pgsql'
database = 'f24t03'

# Constructing the connection string
conn_string = f"postgresql+psycopg2://{username}:{mypasswd}@{host}/{database}"

# Create the SQLAlchemy engine
try:
    engine = create_engine(conn_string)
    print("Database connection established successfully.")
except Exception as conn_err:
    print(f"Database connection error: {conn_err}")

# CSV file to be imported
tenure_by_plumbing = 'Tenure_by_Plumbing.csv'

# Chunk size for batch import
chunk_size = 500

try:
    # Reading and uploading the CSV in chunks
    for chunk in pd.read_csv(tenure_by_plumbing, chunksize=chunk_size):
        # Load the data to PostgreSQL (append data in chunks)
        chunk.to_sql('tenure_by_plumbing', engine, if_exists='append', index=False)
        print(f"Uploaded a chunk of {len(chunk)} records to the database.")
    
    print("CSV data uploaded successfully.")

except pd.errors.EmptyDataError:
    print("The CSV file is empty. Please check the file contents.")
except pd.errors.ParserError as parse_err:
    print(f"Error parsing CSV file: {parse_err}")
except Exception as e:
    print(f"An error occurred: {e}")

Enter your database password: ········
Database connection established successfully.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records t

In [6]:
connection.close()

In [7]:
# SQL query to grant privileges
grant_privileges_query = "GRANT ALL PRIVILEGES ON TABLE tenure_by_plumbing TO ypd5yb, remcmf, sgdky;"

with engine.connect() as connection:
        connection.execute(text(grant_privileges_query))
        print("Privileges granted successfully.")

Privileges granted successfully.


In [8]:
select_query = "SELECT * FROM tenure_by_plumbing WHERE year = 2022 LIMIT 10;" 

with engine.connect() as connection:
    result = connection.execute(text(select_query))
    df = pd.DataFrame(result.fetchall(), columns=result.keys())
df
print("Query executed successfully.")

Query executed successfully.


In [9]:
select_query = "SELECT * FROM tenure_by_plumbing WHERE year = 2018 LIMIT 10;" 

with engine.connect() as connection:
    result = connection.execute(text(select_query))
    df = pd.DataFrame(result.fetchall(), columns=result.keys())
df
print("Query executed successfully.")

Query executed successfully.
