In [1]:
import psycopg2
from psycopg2 import sql
import numpy as np
import pandas as pd
from psycopg2.extensions import adapt, register_adapter, AsIs
from sqlalchemy import create_engine, text
import getpass

In [2]:
# Define the file paths
file_paths = ['2018_Tenure_by_Bedrooms.csv', 
              '2019_Tenure_by_Bedrooms.csv',
              '2020_Tenure_by_Bedrooms.csv', 
              '2021_Tenure_by_Bedrooms.csv', 
              '2022_Tenure_by_Bedrooms.csv']

# Reading the data files into dataframes
dataframes = [pd.read_csv(file) for file in file_paths]

# Concatenate all the dataframes into one
Tenure_by_Bedrooms = pd.concat(dataframes, ignore_index=True)

# Save the merged dataframe to a new CSV file
merged_file_path = 'Tenure_by_Bedrooms.csv'
Tenure_by_Bedrooms.to_csv(merged_file_path, index=False)

# Provide the path of the merged file
merged_file_path

'Tenure_by_Bedrooms.csv'

In [None]:
database = "f24t03"
user     = input("Type username (pawprint) and hit enter: ")
password = getpass.getpass("Type password and hit enter: ")

connection = psycopg2.connect(database = database,
                              user     = user,
                              host     = 'pgsql',
                              password = password)

Type username (pawprint) and hit enter: jsmm8
Type password and hit enter: ········


In [4]:
with connection, connection.cursor() as cursor:
    cursor.execute("DROP TABLE IF EXISTS tenure_by_bedrooms;")
    

    cursor.execute(
        '''
        CREATE TABLE IF NOT EXISTS tenure_by_bedrooms (
    geo_id TEXT,
    block_group TEXT,
    census_tract TEXT,
    county TEXT,
    state TEXT,
    est_total NUMERIC,
    moe_total NUMERIC,
    est_own_occ NUMERIC,
    moe_own_occ NUMERIC,
    est_own_occ_0br NUMERIC,
    moe_own_occ_0br NUMERIC,
    est_own_occ_1br NUMERIC,
    moe_own_occ_1br NUMERIC,
    est_own_occ_2br NUMERIC,
    moe_own_occ_2br NUMERIC,
    est_own_occ_3br NUMERIC,
    moe_own_occ_3br NUMERIC,
    est_own_occ_4br NUMERIC,
    moe_own_occ_4br NUMERIC,
    est_own_occ_5br_plus NUMERIC,
    moe_own_occ_5br_plus NUMERIC,
    est_rent_occ NUMERIC,
    moe_rent_occ NUMERIC,
    est_rent_occ_0br NUMERIC,
    moe_rent_occ_0br NUMERIC,
    est_rent_occ_1br NUMERIC,
    moe_rent_occ_1br NUMERIC,
    est_rent_occ_2br NUMERIC,
    moe_rent_occ_2br NUMERIC,
    est_rent_occ_3br NUMERIC,
    moe_rent_occ_3br NUMERIC,
    est_rent_occ_4br NUMERIC,
    moe_rent_occ_4br NUMERIC,
    est_rent_occ_5br_plus NUMERIC,
    moe_rent_occ_5br_plus NUMERIC,
    year INT,
    PRIMARY KEY (geo_id, year)
    );
    '''
    )

In [None]:
# Securely getting the password
mypasswd = getpass.getpass("Enter your database password: ")

# Database connection parameters
username = 'jsmm8'
host = 'pgsql'
database = 'f24t03'

# Constructing the connection string
conn_string = f"postgresql+psycopg2://{username}:{mypasswd}@{host}/{database}"

# Create the SQLAlchemy engine
try:
    engine = create_engine(conn_string)
    print("Database connection established successfully.")
except Exception as conn_err:
    print(f"Database connection error: {conn_err}")

# CSV file to be imported
tenure_by_bedrooms = 'Tenure_by_Bedrooms.csv'

# Chunk size for batch import
chunk_size = 500

try:
    # Reading and uploading the CSV in chunks
    for chunk in pd.read_csv(tenure_by_bedrooms, chunksize=chunk_size):
        # Load the data to PostgreSQL (append data in chunks)
        chunk.to_sql('tenure_by_bedrooms', engine, if_exists='append', index=False)
        print(f"Uploaded a chunk of {len(chunk)} records to the database.")
    
    print("CSV data uploaded successfully.")

except pd.errors.EmptyDataError:
    print("The CSV file is empty. Please check the file contents.")
except pd.errors.ParserError as parse_err:
    print(f"Error parsing CSV file: {parse_err}")
except Exception as e:
    print(f"An error occurred: {e}")

Enter your database password: ········
Database connection established successfully.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records t

In [6]:
# SQL query to grant privileges
grant_privileges_query = "GRANT ALL PRIVILEGES ON TABLE tenure_by_bedrooms TO ypd5yb, remcmf, sgdky;"

with engine.connect() as connection:
        connection.execute(text(grant_privileges_query))
        print("Privileges granted successfully.")

Privileges granted successfully.


In [7]:
connection.close()

In [8]:
select_query = "SELECT * FROM tenure_by_bedrooms WHERE year = 2022 LIMIT 10;" 

with engine.connect() as connection:
    result = connection.execute(text(select_query))
    df = pd.DataFrame(result.fetchall(), columns=result.keys())
print("Query executed successfully.")
df

Query executed successfully.


Unnamed: 0,geo_id,block_group,census_tract,county,state,est_total,moe_total,est_own_occ,moe_own_occ,est_own_occ_0br,...,moe_rent_occ_1br,est_rent_occ_2br,moe_rent_occ_2br,est_rent_occ_3br,moe_rent_occ_3br,est_rent_occ_4br,moe_rent_occ_4br,est_rent_occ_5br_plus,moe_rent_occ_5br_plus,year
0,1500000US290019501001,Block Group 1,Census Tract 9501,Adair County,Missouri,427,74,405,77,0,...,12,12,15,10,16,0,12,0,12,2022
1,1500000US290019501002,Block Group 2,Census Tract 9501,Adair County,Missouri,612,100,523,89,0,...,12,32,38,57,44,0,12,0,12,2022
2,1500000US290019501003,Block Group 3,Census Tract 9501,Adair County,Missouri,317,64,275,55,0,...,5,2,3,22,21,6,10,0,12,2022
3,1500000US290019502001,Block Group 1,Census Tract 9502,Adair County,Missouri,322,70,273,64,7,...,12,21,20,10,11,5,8,0,12,2022
4,1500000US290019502002,Block Group 2,Census Tract 9502,Adair County,Missouri,378,88,332,79,0,...,6,23,30,7,7,12,19,0,12,2022
5,1500000US290019502003,Block Group 3,Census Tract 9502,Adair County,Missouri,417,90,345,91,0,...,13,18,17,35,28,10,15,0,12,2022
6,1500000US290019503001,Block Group 1,Census Tract 9503,Adair County,Missouri,298,76,210,64,0,...,16,60,48,9,14,0,12,0,12,2022
7,1500000US290019503002,Block Group 2,Census Tract 9503,Adair County,Missouri,272,102,165,76,0,...,49,54,62,0,12,0,12,0,12,2022
8,1500000US290019503003,Block Group 3,Census Tract 9503,Adair County,Missouri,433,177,60,44,0,...,58,255,133,28,46,10,16,29,44,2022
9,1500000US290019503004,Block Group 4,Census Tract 9503,Adair County,Missouri,296,108,10,15,0,...,60,93,54,7,13,0,12,11,17,2022


In [9]:
select_query = "SELECT * FROM tenure_by_bedrooms WHERE year = 2018 LIMIT 10;" 

with engine.connect() as connection:
    result = connection.execute(text(select_query))
    df = pd.DataFrame(result.fetchall(), columns=result.keys())
print("Query executed successfully.")
df

Query executed successfully.


Unnamed: 0,geo_id,block_group,census_tract,county,state,est_total,moe_total,est_own_occ,moe_own_occ,est_own_occ_0br,...,moe_rent_occ_1br,est_rent_occ_2br,moe_rent_occ_2br,est_rent_occ_3br,moe_rent_occ_3br,est_rent_occ_4br,moe_rent_occ_4br,est_rent_occ_5br_plus,moe_rent_occ_5br_plus,year
0,1500000US290019501001,Block Group 1,Census Tract 9501,Adair County,Missouri,547,71,522,72,0,...,11,8,8,17,18,0,11,0,11,2018
1,1500000US290019501002,Block Group 2,Census Tract 9501,Adair County,Missouri,466,67,405,61,0,...,11,26,27,20,20,15,18,0,11,2018
2,1500000US290019501003,Block Group 3,Census Tract 9501,Adair County,Missouri,332,54,293,55,0,...,6,6,7,24,16,0,11,0,11,2018
3,1500000US290019502001,Block Group 1,Census Tract 9502,Adair County,Missouri,314,50,256,47,0,...,10,21,17,12,12,0,11,0,11,2018
4,1500000US290019502002,Block Group 2,Census Tract 9502,Adair County,Missouri,331,55,294,52,0,...,11,13,20,11,11,13,13,0,11,2018
5,1500000US290019502003,Block Group 3,Census Tract 9502,Adair County,Missouri,436,69,347,67,0,...,11,57,50,13,12,19,31,0,11,2018
6,1500000US290019503001,Block Group 1,Census Tract 9503,Adair County,Missouri,286,70,248,72,0,...,11,24,28,14,18,0,11,0,11,2018
7,1500000US290019503002,Block Group 2,Census Tract 9503,Adair County,Missouri,272,66,134,46,0,...,28,24,26,85,61,0,11,0,11,2018
8,1500000US290019503003,Block Group 3,Census Tract 9503,Adair County,Missouri,450,113,113,68,0,...,61,164,81,67,61,0,11,8,12,2018
9,1500000US290019503004,Block Group 4,Census Tract 9503,Adair County,Missouri,415,94,41,33,0,...,54,85,54,9,14,0,11,30,39,2018
