In [1]:
import psycopg2
from psycopg2 import sql
import numpy as np
import pandas as pd
from psycopg2.extensions import adapt, register_adapter, AsIs
from sqlalchemy import create_engine, text
import getpass

In [2]:
# Define the file paths
file_paths = ['ethnicity_2018.csv', 
              'ethnicity_2019.csv',
              'ethnicity_2020.csv', 
              'ethnicity_2021.csv', 
              'ethnicity_2022.csv']

# Reading the data files into dataframes
dataframes = [pd.read_csv(file) for file in file_paths]

# Concatenate all the dataframes into one
ethnicity = pd.concat(dataframes, ignore_index=True)

# Save the merged dataframe to a new CSV file
merged_file_path = 'ethnicity.csv'
ethnicity.to_csv(merged_file_path, index=False)

# Provide the path of the merged file
merged_file_path

'ethnicity.csv'

In [None]:
database = "f24t03"
user     = input("Type username (pawprint) and hit enter: ")
password = getpass.getpass("Type password and hit enter: ")

connection = psycopg2.connect(database = database,
                              user     = user,
                              host     = 'pgsql',
                              password = password)

Type username (pawprint) and hit enter: jsmm8
Type password and hit enter: ········


In [4]:
with connection, connection.cursor() as cursor:
    cursor.execute("DROP TABLE IF EXISTS ethnicity;")
    

    cursor.execute(
        '''
        CREATE TABLE ethnicity (
    geo_id TEXT,
    block_group TEXT,
    census_tract TEXT,
    county TEXT,
    state TEXT,
    est_total INTEGER,
    moe_total INTEGER,
    est_non_his_lat INTEGER,
    moe_non_his_lat INTEGER,
    est_non_his_lat_white INTEGER,
    moe_non_his_lat_white INTEGER,
    est_non_his_lat_black INTEGER,
    moe_non_his_lat_black INTEGER,
    est_non_his_lat_native INTEGER,
    moe_non_his_lat_native INTEGER,
    est_non_his_lat_asian INTEGER,
    moe_non_his_lat_asian INTEGER,
    est_non_his_lat_pac_isl INTEGER,
    moe_non_his_lat_pac_isl INTEGER,
    est_non_his_lat_other INTEGER,
    moe_non_his_lat_other INTEGER,
    est_non_his_lat_multi INTEGER,
    moe_non_his_lat_multi INTEGER,
    est_non_his_lat_multi_other INTEGER,
    moe_non_his_lat_multi_other INTEGER,
    est_non_his_lat_multi_excl_other INTEGER,
    moe_non_his_lat_multi_excl_other INTEGER,
    est_his_lat INTEGER,
    moe_his_lat INTEGER,
    est_his_lat_white INTEGER,
    moe_his_lat_white INTEGER,
    est_his_lat_black INTEGER,
    moe_his_lat_black INTEGER,
    est_his_lat_native INTEGER,
    moe_his_lat_native INTEGER,
    est_his_lat_asian INTEGER,
    moe_his_lat_asian INTEGER,
    est_his_lat_pac_isl INTEGER,
    moe_his_lat_pac_isl INTEGER,
    est_his_lat_other INTEGER,
    moe_his_lat_other INTEGER,
    est_his_lat_multi INTEGER,
    moe_his_lat_multi INTEGER,
    est_his_lat_multi_other INTEGER,
    moe_his_lat_multi_other INTEGER,
    est_his_lat_multi_excl_other INTEGER,
    moe_his_lat_multi_excl_other INTEGER,
    year INTEGER,
    PRIMARY KEY (geo_id, year)
    );
        '''
    )

In [None]:
# Securely getting the password
mypasswd = getpass.getpass("Enter your database password: ")

# Database connection parameters
username = 'jsmm8'
host = 'pgsql'
database = 'f24t03'

# Constructing the connection string
conn_string = f"postgresql+psycopg2://{username}:{mypasswd}@{host}/{database}"

# Create the SQLAlchemy engine
engine = create_engine(conn_string)

# CSV file to be imported
ethnicity = 'ethnicity.csv'

# Chunk size for batch import
chunk_size = 500

try:
    # Reading and uploading the CSV in chunks
    for chunk in pd.read_csv(ethnicity, chunksize=chunk_size):
        # Load the data to PostgreSQL (append data in chunks)
        chunk.to_sql('ethnicity', engine, if_exists='append', index=False)
        print(f"Uploaded a chunk of {len(chunk)} records to the database.")
    
    print("CSV data uploaded successfully.")

except Exception as e:
    print(f"An error occurred: {e}")

Enter your database password: ········
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 records to the database.
Uploaded a chunk of 500 record

In [6]:
connection.close()

In [7]:
# SQL query to grant privileges
grant_privileges_query = "GRANT ALL PRIVILEGES ON TABLE ethnicity TO ypd5yb, remcmf, sgdky;"

with engine.connect() as connection:
        connection.execute(text(grant_privileges_query))
        print("Privileges granted successfully.")

Privileges granted successfully.


In [8]:
select_query = "SELECT * FROM ethnicity LIMIT 10;" 

with engine.connect() as connection:
    result = connection.execute(text(select_query))
    df = pd.DataFrame(result.fetchall(), columns=result.keys())
print("Query executed successfully.")
df

Query executed successfully.


Unnamed: 0,geo_id,block_group,census_tract,county,state,est_total,moe_total,est_non_his_lat,moe_non_his_lat,est_non_his_lat_white,...,moe_his_lat_pac_isl,est_his_lat_other,moe_his_lat_other,est_his_lat_multi,moe_his_lat_multi,est_his_lat_multi_other,moe_his_lat_multi_other,est_his_lat_multi_excl_other,moe_his_lat_multi_excl_other,year
0,1500000US290019501001,Block Group 1,Census Tract 9501,Adair County,Missouri,1698,268,1698,268,1652,...,11,0,11,0,11,0,11,0,11,2018
1,1500000US290019501002,Block Group 2,Census Tract 9501,Adair County,Missouri,1189,188,1189,188,1177,...,11,0,11,0,11,0,11,0,11,2018
2,1500000US290019501003,Block Group 3,Census Tract 9501,Adair County,Missouri,905,196,903,195,901,...,11,0,11,0,11,0,11,0,11,2018
3,1500000US290019502001,Block Group 1,Census Tract 9502,Adair County,Missouri,724,133,712,131,683,...,11,12,17,0,11,0,11,0,11,2018
4,1500000US290019502002,Block Group 2,Census Tract 9502,Adair County,Missouri,831,162,770,151,762,...,11,4,6,0,11,0,11,0,11,2018
5,1500000US290019502003,Block Group 3,Census Tract 9502,Adair County,Missouri,1063,190,1031,175,995,...,11,6,11,26,39,26,39,0,11,2018
6,1500000US290019503001,Block Group 1,Census Tract 9503,Adair County,Missouri,760,253,760,253,759,...,11,0,11,0,11,0,11,0,11,2018
7,1500000US290019503002,Block Group 2,Census Tract 9503,Adair County,Missouri,734,296,734,296,718,...,11,0,11,0,11,0,11,0,11,2018
8,1500000US290019503003,Block Group 3,Census Tract 9503,Adair County,Missouri,998,291,971,274,971,...,11,0,11,0,11,0,11,0,11,2018
9,1500000US290019503004,Block Group 4,Census Tract 9503,Adair County,Missouri,737,179,737,179,597,...,11,0,11,0,11,0,11,0,11,2018
