# Calgary Communities: Where would you want to live?
## Data 604 - Fall 2025 - Group 1 Data Import and Initial Cleaning - APPENDIX J
### Project Notebook 1: This notebook covers the steps used to pull data from the City of Calgary using the Socrata Open Data API (SODA) and load into  Pandas dataframe objects. Secondarily, the Pandas dataframes are cleaned and loaded into our database via SQL Alchemy.
### Authors: Thanusha Balasubramanian, Elise Beaupre

### Section 1: Load Data into pandas dataframes using SODA

#### 1.1 Pull Community Points Dataset

In [1]:
import pandas as pd
from sodapy import Socrata

# Read a text file with the app token for Thanusha
with open("Calgary_App_Token.txt", "r", encoding="utf-8") as f:
    App_Token = f.read().strip() 

# Use Access token to pull tables
client = Socrata("data.calgary.ca", App_Token)

# Get Comunity Points Data
com_points = client.get("j9ps-fyst", limit = 100000)
com_points_df = pd.DataFrame.from_records(com_points)
com_points_df

Unnamed: 0,class,class_code,comm_code,name,sector,srg,comm_structure,longitude,latitude,point,:@computed_region_qeuu_piif,:@computed_region_xxr9_iwz2
0,Residential,1,BED,BEDDINGTON HEIGHTS,NORTH,ESTABLISHED,1960s/1970s,-114.08502139544244,51.13163280873361,"{'type': 'Point', 'coordinates': [-114.0850213...",88,4
1,Residential,1,EVN,EVANSTON,NORTH,COMPLETE,2010s,-114.1124526074949,51.17109493109596,"{'type': 'Point', 'coordinates': [-114.1124526...",161,2
2,Residential,1,KIL,KILLARNEY/GLENGARRY,CENTRE,ESTABLISHED,1950s,-114.13172726984385,51.031548429038665,"{'type': 'Point', 'coordinates': [-114.1317272...",115,8
3,Residential,1,BRA,BRAESIDE,SOUTH,ESTABLISHED,1960s/1970s,-114.10636591786145,50.955992888964275,"{'type': 'Point', 'coordinates': [-114.1063659...",277,11
4,Residential,1,BLM,BELMONT,SOUTH,DEVELOPING,BUILDING OUT,-114.055251748252,50.86868365691495,"{'type': 'Point', 'coordinates': [-114.0552517...",18,13
...,...,...,...,...,...,...,...,...,...,...,...,...
307,Residential,1,WHI,WHITEHORN,NORTHEAST,ESTABLISHED,1960s/1970s,-113.97006821006156,51.088779320674604,"{'type': 'Point', 'coordinates': [-113.9700682...",280,10
308,Residential,1,DAL,DALHOUSIE,NORTHWEST,ESTABLISHED,1960s/1970s,-114.15815301369994,51.1094101224059,"{'type': 'Point', 'coordinates': [-114.1581530...",299,4
309,Major Park,3,FPK,FISH CREEK PARK,SOUTH,,PARKS,-114.02652092583433,50.909602044237715,"{'type': 'Point', 'coordinates': [-114.0265209...",207,14
310,Residual Sub Area,4,12B,12B,SOUTHEAST,FUTURE,UNDEVELOPED,-113.8986862332021,50.93923352479981,"{'type': 'Point', 'coordinates': [-113.8986862...",58,12


#### 1.2 Pull Community Services (Ammenities) Dataset

In [2]:
import pandas as pd
from sodapy import Socrata

# Read a text file with the app token for Thanusha
with open("Calgary_App_Token.txt", "r", encoding="utf-8") as f:
    App_Token = f.read().strip() 

# Use Access token to pull tables
client = Socrata("data.calgary.ca", App_Token)

## Get Community Services
com_service = client.get("x34e-bcjz", limit = 100000)
com_service_df = pd.DataFrame.from_records(com_service)
com_service_df

Unnamed: 0,type,name,address,comm_code,point,:@computed_region_hq2j_w7j9,:@computed_region_kxmf_bzkv,:@computed_region_4b54_tmc4,:@computed_region_4a3i_ccfj,:@computed_region_p8tp_5dkv
0,Community Centre,Rosemont Community Centre,2807 10 ST NW,CAP,"{'type': 'Point', 'coordinates': [-114.0860518...",51,63,7,2,7
1,Attraction,WinSport's Canada Olympic Park,88 Canada Olympic RD SW,COP,"{'type': 'Point', 'coordinates': [-114.2154212...",40,4,13,1,14
2,Community Centre,Mid-Sun Community Centre,50 Midpark RI SE,MID,"{'type': 'Point', 'coordinates': [-114.0566394...",161,263,6,3,13
3,Attraction,Arts Commons,205 8 AV SE,DNC,"{'type': 'Point', 'coordinates': [-114.0600698...",160,262,7,3,7
4,Community Centre,Willow Park / Mapleridge Community Centre,680 Acadia DR SE,MPL,"{'type': 'Point', 'coordinates': [-114.0433533...",216,93,6,3,5
...,...,...,...,...,...,...,...,...,...,...
206,Community Centre,Forest Heights/ Fonda Community Centre,4909 Forego AV SE,FHT,"{'type': 'Point', 'coordinates': [-113.9630312...",166,194,12,3,11
207,Community Centre,Cambrian Heights Community Centre,600 Northmount DR NW,CAM,"{'type': 'Point', 'coordinates': [-114.0873963...",66,276,7,2,4
208,Community Centre,Banff Trail Community Centre,2115 20 AV NW,BNF,"{'type': 'Point', 'coordinates': [-114.1089361...",183,153,7,2,7
209,Community Centre,Rosscarrock Community Centre,4411 10 AV SW,RCK,"{'type': 'Point', 'coordinates': [-114.1507671...",36,281,14,1,2


#### 1.3 Pull Community Crime Stats

In [3]:
import pandas as pd
from sodapy import Socrata

# Read a text file with the app token for Thanusha
with open("Calgary_App_Token.txt", "r", encoding="utf-8") as f:
    App_Token = f.read().strip() 

# Use Access token to pull tables
client = Socrata("data.calgary.ca", App_Token)

## Get Comunity Crime Stats
com_crime = client.get("78gh-n26t", limit = 100000)
com_crime_df = pd.DataFrame.from_records(com_crime)
com_crime_df

Unnamed: 0,community,category,crime_count,year,month
0,01B,Assault (Non-domestic),1,2022,11
1,01B,Break & Enter - Commercial,1,2019,6
2,01B,Break & Enter - Commercial,1,2019,8
3,01B,Break & Enter - Commercial,2,2020,3
4,01B,Break & Enter - Commercial,2,2020,7
...,...,...,...,...,...
77130,CORNERSTONE,Theft OF Vehicle,2,2022,4
77131,CORNERSTONE,Theft OF Vehicle,2,2022,6
77132,CORNERSTONE,Theft OF Vehicle,2,2022,7
77133,CORNERSTONE,Theft OF Vehicle,1,2022,9


#### 1.4 Pull Current Year (2025) Property Assessments

In [4]:
import pandas as pd
from sodapy import Socrata

# Read a text file with the app token for Thanusha
with open("Calgary_App_Token.txt", "r", encoding="utf-8") as f:
    App_Token = f.read().strip() 

# Use Access token to pull tables
client = Socrata("data.calgary.ca", App_Token)

## Get Current Property Assessments
curr_prop = client.get("4bsw-nn7w", limit = 1000000)
curr_prop_df = pd.DataFrame.from_records(curr_prop)
curr_prop_df

Unnamed: 0,roll_year,roll_number,address,assessed_value,assessment_class,assessment_class_description,nr_assessed_value,comm_code,comm_name,land_use_designation,...,land_size_sm,land_size_sf,land_size_ac,mod_date,sub_property_use,multipolygon,unique_key,year_of_construction,re_assessed_value,fl_assessed_value
0,2025,203304043,10110 1 AV SW,42500.0,NR,Non-residential,42500.0,OPH,OSPREY HILL,M-2,...,690.0,7427.0,0.17,2025-01-10T00:00:00.000Z,ATR,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",2025203304043960690815,,,
1,2025,203304035,10258 1 AV SW,12500.0,NR,Non-residential,12500.0,OPH,OSPREY HILL,S-UN,...,210.0,2260.0,0.05,2025-01-10T00:00:00.000Z,ATR,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",2025203304035960690816,,,
2,2025,203304027,10320 1 AV SW,5000.0,NR,Non-residential,5000.0,OPH,OSPREY HILL,"S-SPR,S-UN",...,90.0,969.0,0.02,2025-01-10T00:00:00.000Z,ATR,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",2025203304027960690817,,,
3,2025,203304019,10360 1 AV SW,10000.0,NR,Non-residential,10000.0,OPH,OSPREY HILL,R-G,...,170.0,1830.0,0.04,2025-01-10T00:00:00.000Z,ATR,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",2025203304019960690818,,,
4,2025,203304001,10480 1 AV SW,50000.0,NR,Non-residential,50000.0,OPH,OSPREY HILL,M-1,...,820.0,8827.0,0.2,2025-01-10T00:00:00.000Z,ATR,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",2025203304001960690812,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
588820,2025,203369491,94 SETONVISTA GV SE,33500.0,RE,Residential,,RRC,RICARDO RANCH,R-Gm,...,229.5,2470.0,0.06,2025-04-23T00:00:00.000Z,,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",2025203369491960684225,,33500.0,
588821,2025,203369780,95 SETONVISTA GV SE,41500.0,RE,Residential,,RRC,RICARDO RANCH,R-G,...,301.3,3243.0,0.07,2025-04-23T00:00:00.000Z,,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",2025203369780960684178,,41500.0,
588822,2025,203369053,97 SETONVISTA GA SE,34500.0,RE,Residential,,RRC,RICARDO RANCH,R-Gm,...,236.3,2544.0,0.06,2025-04-23T00:00:00.000Z,,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",2025203369053960684248,,34500.0,
588823,2025,203369509,98 SETONVISTA GV SE,33500.0,RE,Residential,,RRC,RICARDO RANCH,R-Gm,...,229.5,2470.0,0.06,2025-04-23T00:00:00.000Z,,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",2025203369509960684224,,33500.0,


#### 1.5 Pull Historical Property Assessments

As the historical property assessments dataset is very large (Millions of rows of data), in order to pull the information, we ran a SOSQL query directly on the City of Calgary database, and used the associated endpoint with our query. We filtered using the similart conditions as the query we used to summarize the current year property assessments:
 - SELECT roll_year, comm_code, comm_name, median_re_assessed_value
 - WHERE assessment_class = 'RE'
 - AND re_assessed_value IS NOT NULL
 - AND land_use_designation NOT LIKE 'DC%' AND land_use_designation NOT LIKE 'C%'
 - AND land_use_designation NOT LIKE 'S%' AND land_use_designation NOT LIKE 'I%'
 - GROUP BY roll_year, comm_code

In [5]:
import requests
import pandas as pd
from io import StringIO

# Read a text file with the app token for Thanusha
with open("Calgary_App_Token.txt", "r", encoding="utf-8") as f:
    App_Token = f.read().strip() 

# Your query endpoint
url = "https://data.calgary.ca/api/v3/views/4ur7-wsgc/query.csv?query=SELECT%0A%20%20%60roll_year%60%2C%0A%20%20%60comm_code%60%2C%0A%20%20%60comm_name%60%2C%0A%20%20median(%60re_assessed_value%60)%0AWHERE%0A%20%20caseless_eq(%60assessment_class%60%2C%20%22RE%22)%0A%20%20AND%20%60re_assessed_value%60%20IS%20NOT%20NULL%0A%20%20AND%20%60land_use_designation%60%20NOT%20LIKE%20%22DC%25%22%0A%20%20AND%20%60land_use_designation%60%20NOT%20LIKE%20%22C%25%22%0A%20%20AND%20%60land_use_designation%60%20NOT%20LIKE%20%22S%25%22%0A%20%20AND%20%60land_use_designation%60%20NOT%20LIKE%20%22I%25%22%0AGROUP%20BY%20%60roll_year%60%2C%20%60comm_code%60%2C%20%60comm_name%60"
 
# Add your App Token here
headers = {"X-App-Token": App_Token}
 
# Make the request
response = requests.get(url, headers=headers)
response.raise_for_status()
 
# Load CSV into pandas
csv_data = StringIO(response.text)
Historical_Property_Assessments = pd.read_csv(csv_data)
Historical_Property_Assessments

Unnamed: 0,roll_year,comm_code,comm_name,median_re_assessed_value
0,2017,02C,02C,189250.0
1,2017,12A,12A,316250.0
2,2017,12C,12C,697950.0
3,2017,12I,12I,424340.0
4,2017,ABB,ABBEYDALE,304000.0
...,...,...,...,...
1650,2024,WND,WINDSOR PARK,205000.0
1651,2024,WOO,WOODLANDS,548500.0
1652,2024,WSP,WEST SPRINGS,812500.0
1653,2024,WWO,WOLF WILLOW,522000.0


### Section 2: Clean the Dataframes

#### 2.1 Clean the community points data frame

In [6]:
import pandas as pd

# start from your existing DataFrame com_points_df
cleaned_com_points = com_points_df.copy()

# drop the original point column if present
if "point" in cleaned_com_points.columns:
    cleaned_com_points = cleaned_com_points.drop(columns=["point"])

# remove leading @ from column names (common with Socrata computed_region fields)
cleaned_com_points = cleaned_com_points.rename(columns=lambda c: c.lstrip("@"))

# drop any column whose name contains 'computed_region' (case-insensitive)
cols_to_drop = [c for c in cleaned_com_points.columns if "computed_region" in c.lower()]
if cols_to_drop:
    cleaned_com_points = cleaned_com_points.drop(columns=cols_to_drop)

# normalize common empty markers to pandas NA so they become SQL NULLs later
cleaned_com_points = cleaned_com_points.replace({"": pd.NA, "NA": pd.NA, "N/A": pd.NA})

# convert every column to pandas StringDtype (keeps missing as <NA>) — do this first to
# preserve original raw text, then convert specific columns to numeric types below
cleaned_com_points = cleaned_com_points.astype("string")

# convert class_code to nullable integer (Int64). Non-numeric values become <NA>.
cleaned_com_points["class_code"] = pd.to_numeric(
    cleaned_com_points["class_code"], errors="coerce"
).astype("Int64")

# convert longitude and latitude to float64 (precision for coordinates).
cleaned_com_points["longitude"] = pd.to_numeric(
    cleaned_com_points["longitude"], errors="coerce"
).astype("float64")
cleaned_com_points["latitude"] = pd.to_numeric(
    cleaned_com_points["latitude"], errors="coerce"
).astype("float64")

# quick check
cleaned_com_points.dtypes


class             string[python]
class_code                 Int64
comm_code         string[python]
name              string[python]
sector            string[python]
srg               string[python]
comm_structure    string[python]
longitude                float64
latitude                 float64
dtype: object

#### 2.2 Clean the community services data frame

In [7]:
import pandas as pd

# start from your existing DataFrame com_service_df
cleaned_com_service = com_service_df.copy()

# 1) drop the original point column if present
if "point" in cleaned_com_service.columns:
    cleaned_com_service = cleaned_com_service.drop(columns=["point"])

# 2) remove leading @ from column names (common with Socrata computed_region fields)
cleaned_com_service = cleaned_com_service.rename(columns=lambda c: c.lstrip("@"))

# 3) drop any column whose name contains 'computed_region' (case-insensitive)
cols_to_drop = [c for c in cleaned_com_service.columns if "computed_region" in c.lower()]
if cols_to_drop:
    cleaned_com_service = cleaned_com_service.drop(columns=cols_to_drop)

# 4) normalize common empty markers to pandas NA so they become SQL NULLs later
cleaned_com_service = cleaned_com_service.replace({"": pd.NA, "NA": pd.NA, "N/A": pd.NA})

# 5) convert every column to pandas StringDtype (keeps missing as <NA>) — preserves raw text
cleaned_com_service = cleaned_com_service.astype("string")

# 6) convert class_code to nullable integer (Int64). Non-numeric values become <NA>.
if "class_code" in cleaned_com_service.columns:
    cleaned_com_service["class_code"] = pd.to_numeric(
        cleaned_com_service["class_code"], errors="coerce"
    ).astype("Int64")

# quick check
cleaned_com_service.dtypes

type         string[python]
name         string[python]
address      string[python]
comm_code    string[python]
dtype: object

#### 2.3 Clean the community crime statistics data frame

In [8]:
import pandas as pd

# start from your existing DataFrame com_crime_df
cleaned_com_crime = com_crime_df.copy()

# drop the original point column if present
if "point" in cleaned_com_crime.columns:
    cleaned_com_crime = cleaned_com_crime.drop(columns=["point"])

# remove leading @ from column names (common with Socrata computed_region fields)
cleaned_com_crime = cleaned_com_crime.rename(columns=lambda c: c.lstrip("@"))

# drop any column whose name contains 'computed_region' (case-insensitive)
cols_to_drop = [c for c in cleaned_com_crime.columns if "computed_region" in c.lower()]
if cols_to_drop:
    cleaned_com_crime = cleaned_com_crime.drop(columns=cols_to_drop)

# normalize common empty markers to pandas NA so they become SQL NULLs later
cleaned_com_crime = cleaned_com_crime.replace({"": pd.NA, "NA": pd.NA, "N/A": pd.NA})

# convert every column to pandas StringDtype to preserve raw text and missingness
cleaned_com_crime = cleaned_com_crime.astype("string")

# coerce numeric columns to appropriate nullable integer dtypes
# - crime_count should be an integer count
# - year and month should be integers if they represent numeric values
for col in ("crime_count", "year", "month"):
    if col in cleaned_com_crime.columns:
        cleaned_com_crime[col] = pd.to_numeric(
            cleaned_com_crime[col], errors="coerce"
        ).astype("Int64")

# quick check
cleaned_com_crime.dtypes


community      string[python]
category       string[python]
crime_count             Int64
year                    Int64
month                   Int64
dtype: object

#### 2.4 Clean the property assessments data frame

In [9]:
import pandas as pd
import numpy as np

# start from your existing DataFrame curr_prop_df
cleaned_curr_prop = curr_prop_df.copy()

# 1) drop spatial or raw geometry and point columns if present
for col in ("point", "multipolygon", "geometry"):
    if col in cleaned_curr_prop.columns:
        cleaned_curr_prop = cleaned_curr_prop.drop(columns=[col])

# 2) remove leading @ from column names and normalize case
cleaned_curr_prop = cleaned_curr_prop.rename(columns=lambda c: c.lstrip("@").strip())

# 3) drop any column whose name contains 'computed_region' (case-insensitive)
cols_to_drop = [c for c in cleaned_curr_prop.columns if "computed_region" in c.lower()]
if cols_to_drop:
    cleaned_curr_prop = cleaned_curr_prop.drop(columns=cols_to_drop)

# 4) normalize common empty markers to pandas NA so they become SQL NULLs later
cleaned_curr_prop = cleaned_curr_prop.replace({"": pd.NA, "NA": pd.NA, "N/A": pd.NA, "null": pd.NA})

# 5) convert general textual columns to pandas StringDtype to preserve missingness
#    but avoid overwriting columns that should remain numeric
numeric_expected = {
    "roll_year": "Int64",
    "year_of_construction": "Int64",
    "assessed_value": "Float64",
    "nr_assessed_value": "Float64",
    "re_assessed_value": "Float64",
    "fl_assessed_value": "Float64",
    "land_size_sm": "Float64",
    "land_size_sf": "Float64",
    "land_size_ac": "Float64",
}

skip_cols = set(numeric_expected.keys()) | {"roll_number"}
for c in cleaned_curr_prop.columns:
    if c not in skip_cols:
        cleaned_curr_prop[c] = cleaned_curr_prop[c].astype("string")

# 6) coerce numeric columns to appropriate nullable dtypes (assumes values are already plain numbers)
for col, dtype in numeric_expected.items():
    if col in cleaned_curr_prop.columns:
        cleaned_curr_prop[col] = pd.to_numeric(
            cleaned_curr_prop[col], errors="coerce"
        ).astype(dtype)

# 7) ensure roll_number remains a clean string (no leading/trailing spaces)
if "roll_number" in cleaned_curr_prop.columns:
    cleaned_curr_prop["roll_number"] = cleaned_curr_prop["roll_number"].astype("string").str.strip()

# 8) final quick checks (view dtypes and a few rows)
cleaned_curr_prop.dtypes


roll_year                                Int64
roll_number                     string[python]
address                         string[python]
assessed_value                         Float64
assessment_class                string[python]
assessment_class_description    string[python]
nr_assessed_value                      Float64
comm_code                       string[python]
comm_name                       string[python]
land_use_designation            string[python]
property_type                   string[python]
land_size_sm                           Float64
land_size_sf                           Float64
land_size_ac                           Float64
mod_date                        string[python]
sub_property_use                string[python]
unique_key                      string[python]
year_of_construction                     Int64
re_assessed_value                      Float64
fl_assessed_value                      Float64
dtype: object

### Section 3: Load the Cleaned Dataframes into the SQL database using SQL Alchemy

#### 3.1 Get SQL Version

In [10]:
import sqlalchemy as sq
sq.__version__

'2.0.43'

#### 3.2 Create the Engine for the SQL connection

In [11]:
from sqlalchemy import create_engine

# assuming the database password is in your current directory (which it is on Cloudlabs by default)
with open("dbpasswd", "r", encoding="utf-8") as f:
    PWD = f.read().strip() 

USER = "student"
DB   = "student"

engine = create_engine(
    f"mysql+mysqlconnector://{USER}:{PWD}@127.0.0.1:3306/{DB}"
)

#### 3.3 Load Community Points, Community Services and Community Crime cleaned dataframes into the SQL tables
These are done together, as they are smaller tables and do not need to be split into chunks for loading

In [12]:
##load the community points data (overwrite any exsisting tables)
cleaned_com_points.to_sql('Community_Points', con=engine, if_exists="replace", index=False)

##load the community services data (overwrite any exsisting tables)
cleaned_com_service.to_sql('Community_Service', con=engine, if_exists="replace", index=False)

##load the community crime data (overwrite any exsisting tables)
cleaned_com_crime.to_sql('Community_Crime', con=engine, if_exists="replace", index=False)

-1

#### 3.4 Load the Historical Property Assessments into SQL tables 

In [13]:
Historical_Property_Assessments.to_sql('Historical_Property_Assessments', con=engine, if_exists="replace", index=False)

-1

#### 3.5 Load the cleaned Current Year Property Assessments into SQL tables 
This is completed in chunks as the table is very large (over 500 k  rows of data). 

In [14]:
import math

# parameters
table_name = "Current_Property_Assessments"
chunksize = 25000

# ensure table is created fresh once, then append chunks
# create empty table first by writing zero rows (preserves dtypes chosen by pandas)
cleaned_curr_prop.head(0).to_sql(table_name, con=engine, if_exists="replace", index=False)

nrows = len(cleaned_curr_prop)
nchunks = math.ceil(nrows / chunksize)

for i, start in enumerate(range(0, nrows, chunksize), start=1):
    chunk = cleaned_curr_prop.iloc[start : start + chunksize]
    chunk.to_sql(table_name, con=engine, if_exists="append", index=False, method="multi")
    print(f"Chunk {i}/{nchunks} written ({min(start + chunksize, nrows)}/{nrows} rows)")
print("All chunks written.")

Chunk 1/24 written (25000/588825 rows)
Chunk 2/24 written (50000/588825 rows)
Chunk 3/24 written (75000/588825 rows)
Chunk 4/24 written (100000/588825 rows)
Chunk 5/24 written (125000/588825 rows)
Chunk 6/24 written (150000/588825 rows)
Chunk 7/24 written (175000/588825 rows)
Chunk 8/24 written (200000/588825 rows)
Chunk 9/24 written (225000/588825 rows)
Chunk 10/24 written (250000/588825 rows)
Chunk 11/24 written (275000/588825 rows)
Chunk 12/24 written (300000/588825 rows)
Chunk 13/24 written (325000/588825 rows)
Chunk 14/24 written (350000/588825 rows)
Chunk 15/24 written (375000/588825 rows)
Chunk 16/24 written (400000/588825 rows)
Chunk 17/24 written (425000/588825 rows)
Chunk 18/24 written (450000/588825 rows)
Chunk 19/24 written (475000/588825 rows)
Chunk 20/24 written (500000/588825 rows)
Chunk 21/24 written (525000/588825 rows)
Chunk 22/24 written (550000/588825 rows)
Chunk 23/24 written (575000/588825 rows)
Chunk 24/24 written (588825/588825 rows)
All chunks written.


In [15]:
engine.dispose()