# Uploading Shapefiles to PostGres Database

## Creating Middle School Layer

In [None]:
pip install geopandas fiona

In [None]:
import geopandas as gpd

gdb_path = "Springfield.gdb/Springfield.gdb"

schools = gpd.read_file(gdb_path, layer="Schools")

# Filter rows where NAME contains "middle"
middle_school = schools[schools["NAME"].str.contains("middle", case=False, na=False)]

# # Save the new layer back into the same geodatabase
# middle_school.to_file(gdb_path, layer="middle_school", driver="OpenFileGDB")

# Alternatively, save as a shapefile if needed
middle_school.to_file("middle_schools.shp")

## Viewing all relevant layers

In [1]:
import geopandas as gpd

# Load neighborhood service areas geodb into table
gdb_path = 'Springfield.gdb/Springfield.gdb'

# Check the layers in the geodatabase
layers = gpd.io.file.fiona.listlayers(gdb_path)
print("Layers found in GDB:", layers)

sub_layers = ["single_family_point", "fire", "police", "early_childhood_school", "elementary_school", "high_school", "hospital", "nursing_home", "public_health", "urgentcare", "trailheads", "single_family_point_15"]




Layers found in GDB: ['Address_Points', 'Street_Centerline', 'City_Limits', 'Legal_Tracts_of_Record', 'Urban_Service_Area', 'Watershed_Sub_Basins', 'Neighborhood_Organizations', 'Neighborhood_Service_Areas', 'Schools', 'Bike_Routes', 'Greenway_Trails', 'Park_Trails', 'Parks', 'The_Link', 'Trailheads', 'Sewer_Basins', 'Sewer_Districts', 'Sewer_Lines', 'Stormwater_Detention_Basins', 'Stormwater_Inlets', 'Stormwater_Pipes', 'Elevation_Benchmark', 'Map_Grid', 'Subdivisions', 'Zoning', 'FEMA_Base_Flood_Elevations', 'FEMA_Floodplain', 'FEMA_Floodline', 'Sinkhole_Boundaries', 'Tree_Inventory', 'Fire_Districts', 'Police_and_Fire_Stations', 'Law_Enforcement_Zones', 'BASE_TRY', 'first_cut', 'Residental', 'Export_2', 'point_in_polygon', 'intersect', 'verify_1', 'single_family', 'fire', 'police', 'single_family_point', 'R_SF_point', 'bike_lane', 'shared_bike_lane', 'signed_bike_route', 'signed_bike_share_road', 'paved_greenway', 'unpaved_greenway', 'water_greenway', 'asphalt_park_trail', 'gravel_p

In [2]:
import geopandas as gpd
import pandas as pd

gdb_path = 'Springfield.gdb/Springfield.gdb'
sub_layers = ["single_family_point", "fire", "police", "early_childhood_school", "elementary_school", "high_school", "hospital", "nursing_home", "public_health", "urgentcare", "trailheads"]


for layer in sub_layers:
    print(f"\n=== {layer.upper()} ===")
    try:
        gdf = gpd.read_file(gdb_path, layer=layer)
        # Get the column names and data types
        dtypes = pd.DataFrame(gdf.dtypes, columns=["dtype"])
        print(dtypes)
    except Exception as e:
        print(f"Error reading {layer}: {e}")



=== SINGLE_FAMILY_POINT ===
                   dtype
Join_Count         int64
TARGET_FID         int64
OBJECTID           int64
PRIM_NUM          object
SUB_NUM           object
PRE_DIR           object
STR_NAM           object
STR_TYPE          object
CITY              object
STATE             object
ZIP5              object
ADDRESS           object
BLOCK             object
NSA               object
BDS_ZONE          object
ZONING            object
PD_NUM             int64
OBJECTID_12        int64
PRIM_NUM_1        object
SUB_NUM_1         object
PRE_DIR_1         object
STR_NAM_1         object
STR_TYPE_1        object
CITY_1            object
STATE_1           object
ZIP5_1            object
ADDRESS_1         object
BLOCK_1           object
NSA_1             object
BDS_ZONE_1        object
ZONING_1          object
PD_NUM_1           int64
verify             int64
NEAR_FID           int64
NEAR_DIST        float64
n_fire           float64
n_flood          float64
n_police         floa

In [5]:
import fiona

with fiona.Env():
    layers = fiona.listlayers(gdb_path)
    print("Available layers:", layers)


Available layers: ['Address_Points', 'Street_Centerline', 'City_Limits', 'Legal_Tracts_of_Record', 'Urban_Service_Area', 'Watershed_Sub_Basins', 'Neighborhood_Organizations', 'Neighborhood_Service_Areas', 'Schools', 'Bike_Routes', 'Greenway_Trails', 'Park_Trails', 'Parks', 'The_Link', 'Trailheads', 'Sewer_Basins', 'Sewer_Districts', 'Sewer_Lines', 'Stormwater_Detention_Basins', 'Stormwater_Inlets', 'Stormwater_Pipes', 'Elevation_Benchmark', 'Map_Grid', 'Subdivisions', 'Zoning', 'FEMA_Base_Flood_Elevations', 'FEMA_Floodplain', 'FEMA_Floodline', 'Sinkhole_Boundaries', 'Tree_Inventory', 'Fire_Districts', 'Police_and_Fire_Stations', 'Law_Enforcement_Zones', 'BASE_TRY', 'first_cut', 'Residental', 'Export_2', 'point_in_polygon', 'intersect', 'verify_1', 'single_family', 'fire', 'police', 'single_family_point', 'R_SF_point', 'bike_lane', 'shared_bike_lane', 'signed_bike_route', 'signed_bike_share_road', 'paved_greenway', 'unpaved_greenway', 'water_greenway', 'asphalt_park_trail', 'gravel_park

## Fixing Layers with Errors

In [10]:
# layers: hospital, public_health, and urgentcare had the following error, so needed to create a clean file
# General error for : maximum recursion depth exceeded while calling a Python object


import fiona
from shapely.geometry import shape
import pandas as pd
import geopandas as gpd

records = []

try:
    with fiona.Env(), fiona.open(gdb_path, layer="urgentcare") as src:
        for rec in src:
            records.append({
                **rec["properties"],
                "geometry": shape(rec["geometry"])
            })
    gdf = gpd.GeoDataFrame(records, crs=src.crs)
    print(gdf.head())
except Exception as e:
    print(f"Error: {e}")
    
gdf.to_file("urgentcare_clean.shp")



   OBJECTID                        geometry  objectid_1        id  \
0        39  POINT (471678.131 4121576.157)        1165  10197944   
1        52  POINT (475307.500 4112114.646)        1238  10197938   
2        77  POINT (472384.583 4122164.334)        1490  10192164   
3       108  POINT (473635.914 4110497.472)        1851  10192286   
4       119  POINT (457018.441 4107586.444)        1965  10468016   

                                              name     telephone  \
0          SAINT JOHN'S URGENT CARE - WEST KEARNEY  417-869-6191   
1  SAINT JOHN'S URGENT CARE - SMITH-GLYNN-CALLAWAY  417-888-5666   
2           FAMILY MEDICAL WALK-IN CLINICS - NORTH  417-866-5550   
3           FAMILY MEDICAL WALK-IN CLINICS - SOUTH  417-890-5550   
4        FAMILY MEDICAL WALK-IN CLINICS - REPUBLIC  417-732-6277   

                             address address2         city state  ...  \
0           2120 WEST KEARNEY STREET           SPRINGFIELD    MO  ...   
1         3231 SOUTH NATIONAL 

In [18]:
import geopandas as gpd

def print_layer_info(shapefile_path, label):
    try:
        gdf = gpd.read_file(shapefile_path)
        print(f"\n=== {label.upper()} ===")
        print(gdf.dtypes.to_frame("dtype"))
    except Exception as e:
        print(f"Failed to read {label}: {e}")

print_layer_info("urgentcare_clean.shp", "urgentcare")


=== URGENTCARE ===
               dtype
OBJECTID       int64
objectid_1     int64
id             int64
name          object
telephone     object
address       object
address2      object
city          object
state         object
zip            int64
zipp4          int64
county        object
fips           int64
directions    object
emergtitle    object
emergtel      object
emergext      object
contdate      object
conthow       object
geodate       object
geohow        object
hsipthemes    object
naicscode      int64
naicsdescr    object
geolinkid      int64
x            float64
y            float64
st_vendor     object
st_version    object
geoprec       object
phoneloc      object
qc_qa         object
ucaoa_id      object
geometry    geometry


## Uploaded all relevant shapefiles into database

In [17]:
import geopandas as gpd
import pandas as pd
import psycopg2
import psycopg2.extras
import getpass

# Load the GeoDataFrame 
# gdb_path = 'Springfield.gdb/Springfield.gdb'
# layer = "trailheads"
# gdf = gpd.read_file(gdb_path, layer=layer)

gdf = gpd.read_file("urgentcare_clean.shp")
print(gdf.crs)
# gdf.set_crs(epsg=26915, inplace=True)
gdf.crs = {'init': 'epsg:26915'}
print(gdf.crs)
gdf["geom_wkt"] = gdf.geometry.apply(lambda x: x.wkt if x else None)

{}
{'init': 'epsg:26915'}


In [None]:
# Connect to PostgreSQL
database = "f24t03"
user = input("Type username (pawprint) and hit enter: ")
password = getpass.getpass("Type password and hit enter: ")
host = "pgsql"

Type username (pawprint) and hit enter: remcmf
Type password and hit enter: ········


In [20]:
try:
    conn = psycopg2.connect(database=database, user=user, host=host, password=password)
    cursor = conn.cursor()
    print("Connected to the database.")

    # Drop/Create Table
    table_name = "springfield_urgentcare"
    cursor.execute(f"DROP TABLE IF EXISTS {table_name};")
    print(f"Dropped table {table_name} if it existed.")

    create_table_query = f"""
    CREATE TABLE {table_name} (
        OBJECTID INTEGER,
        objectid_1 INTEGER,
        id INTEGER,
        name TEXT,
        telephone TEXT,
        address TEXT,
        address2 TEXT,
        city TEXT,
        state TEXT,
        zip INTEGER,
        zipp4 INTEGER,
        county TEXT,
        fips INTEGER,
        directions TEXT,
        emergtitle TEXT,
        emergtel TEXT,
        emergext TEXT,
        contdate TEXT,
        conthow TEXT,
        geodate TEXT,
        geohow TEXT,
        hsipthemes TEXT,
        naicscode INTEGER,
        naicsdescr TEXT,
        geolinkid INTEGER,
        x DOUBLE PRECISION,
        y DOUBLE PRECISION,
        st_vendor TEXT,
        st_version TEXT,
        geoprec TEXT,
        phoneloc TEXT,
        qc_qa TEXT,
        ucaoa_id TEXT,
        geom GEOMETRY(Point, 26915)
    );

    """
    cursor.execute(create_table_query)
    conn.commit()
    print(f"Created table {table_name}.")

    # Chunked Insert
    chunk_size = 500
    df = gdf.drop(columns="geometry")  # use WKT instead

    for start in range(0, len(df), chunk_size):
        chunk = df.iloc[start:start + chunk_size].copy()
        columns = list(chunk.columns)
        columns_sql = [col.lower() if col != 'geom_wkt' else 'geom' for col in columns]

        values = []
        for _, row in chunk.iterrows():
            val_row = []
            for col in columns:
                val = row[col]
                if pd.isna(val):
                    val_row.append(None)
                elif col == "geom_wkt":
                    val_row.append(f"SRID=26915;{val}")
                else:
                    val_row.append(val)
            values.append(val_row)

        insert_query = f"INSERT INTO {table_name} ({', '.join(columns_sql)}) VALUES %s"
        template = "(" + ", ".join([
            "ST_GeomFromText(%s, 26915)" if col == "geom_wkt" else "%s" for col in columns
        ]) + ")"

        psycopg2.extras.execute_values(
            cursor,
            insert_query,
            values,
            template=template
        )
        conn.commit()
        print(f"Inserted chunk of {len(chunk)} records.")

    # Confirm Upload
    cursor.execute(f"SELECT COUNT(*) FROM {table_name};")
    count = cursor.fetchone()[0]
    print(f"Data inserted successfully into {table_name}. Row count: {count}")

except Exception as e:
    print("Error:", e)

finally:
    if cursor:
        cursor.close()
    if conn:
        conn.close()


Connected to the database.
Dropped table springfield_urgentcare if it existed.
Created table springfield_urgentcare.
Inserted chunk of 6 records.
Data inserted successfully into springfield_urgentcare. Row count: 6


In [21]:
from sqlalchemy import create_engine,text

grant_privileges_query = "GRANT ALL PRIVILEGES ON TABLE springfield_urgentcare TO ypd5yb, jsmm8, remcmf, sgdky;"

engine = create_engine(f'postgresql+psycopg2://{user}:{password}@{host}/{database}')

with engine.connect() as connection:
    connection.execute(text(grant_privileges_query))
    print("Privileges granted successfully.")

Privileges granted successfully.


In [None]:
import psycopg2
import pandas as pd
from sqlalchemy import create_engine
import getpass

database = 'f24t03'
user = input("Type username (pawprint) and hit enter: ")
password = getpass.getpass("Type password and hit enter: ")

engine = create_engine(f'postgresql://{user}:{password}@pgsql/{database}')

try:
    query = "SELECT * FROM springfield_urgentcare;"
    df_db = pd.read_sql(query, engine) 

finally:
    engine.dispose()
    print("Database connection closed.")

Type username (pawprint) and hit enter: remcmf
Type password and hit enter: ········
Database connection closed.


In [23]:
df_db.head()

Unnamed: 0,objectid,objectid_1,id,name,telephone,address,address2,city,state,zip,...,geolinkid,x,y,st_vendor,st_version,geoprec,phoneloc,qc_qa,ucaoa_id,geom
0,39,1165,10197944,SAINT JOHN'S URGENT CARE - WEST KEARNEY,417-869-6191,2120 WEST KEARNEY STREET,,SPRINGFIELD,MO,65803,...,124916561,-93.319316,37.240284,NAVTEQ,2006Q3,ONENTITY,t,TGS,,01010000202369000031DD2486F8C91C41E0BE0E14F471...
1,52,1238,10197938,SAINT JOHN'S URGENT CARE - SMITH-GLYNN-CALLAWAY,417-888-5666,3231 SOUTH NATIONAL AVENUE,,SPRINGFIELD,MO,65807,...,124953171,-93.278084,37.155101,NAVTEQ,2008Q1,BLOCKFACE,t,TGS,,010100002023690000B0A44E00AE021D41304CA652795F...
2,77,1490,10192164,FAMILY MEDICAL WALK-IN CLINICS - NORTH,417-866-5550,2619 NORTH KANSAS EXRPESSWAY,,SPRINGFIELD,MO,65803,...,96624783,-93.311373,37.245607,NAVTEQ,2008Q1,BLOCKFACE,t,TGS,,010100002023690000F085C95402D51C4160E5D02A1A73...
3,108,1851,10192286,FAMILY MEDICAL WALK-IN CLINICS - SOUTH,417-890-5550,4049 SOUTH CAMPBELL AVENUE,,SPRINGFIELD,MO,65807,...,124953197,-93.296852,37.140478,NAVTEQ,2006Q4,ONENTITY,t,TGS,UC_2264,010100002023690000810B24A88FE81C418F5374BC505C...
4,119,1965,10468016,FAMILY MEDICAL WALK-IN CLINICS - REPUBLIC,417-732-6277,281 UNITED STATES HIGHWAY 60 WEST,,REPUBLIC,MO,65738,...,124952039,-93.483788,37.113622,NAVTEQ,2008Q1,ONENTITY,t,TGS,,010100002023690000CFEEC9C3E9E41B41D044D838A156...
