# Feature Engineering - FEMA Large Disasters with Demographics

In [1]:
# Basic libraries
import geopandas as gpd
import pandas as pd
import numpy as np
import warnings

import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# Options
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

### Load FEMA Data

In [3]:
# Load FEMA data
flData = pd.read_csv("../data/open-fema/FEMA-Large-DR-4337-FL.csv")
txData = pd.read_csv("../data/open-fema/FEMA-Large-DR-4332-TX.csv")
prData = pd.read_csv("../data/open-fema/FEMA-Large-DR-4339-PR.csv")
ncData = pd.read_csv("../data/open-fema/FEMA-Large-DR-4393-NC.csv")

In [4]:
# Combine FL + PR + TX datasets
femaDf = pd.concat([flData, txData, prData, ncData])
print(f"There are {len(femaDf)} records in the dataframe.")

There are 4794157 records in the dataframe.


In [5]:
# Look at data types for each of the columns
femaDf.dtypes

disasterNumber                       int64
damagedCity                         object
damagedStateAbbreviation            object
damagedZipCode                     float64
householdComposition                 int64
grossIncome                        float64
specialNeeds                          bool
ownRent                             object
residenceType                       object
homeOwnersInsurance                   bool
floodInsurance                        bool
inspected                             bool
rpfvl                              float64
habitabilityRepairsRequired         object
destroyed                             bool
waterLevel                         float64
floodDamage                           bool
foundationDamage                      bool
foundationDamageAmount             float64
roofDamage                            bool
roofDamageAmount                   float64
tsaEligible                           bool
tsaCheckedIn                          bool
rentalAssis

In [6]:
femaDf.drop_duplicates(inplace=True)
print(f"There are {len(femaDf)} records after dropping any duplicates.")

There are 4794157 records after dropping any duplicates.


In [7]:
# Change incorrect datatype
femaDf['habitabilityRepairsRequired'] = femaDf['habitabilityRepairsRequired'].astype('bool')
femaDf['primaryResidence'] = femaDf['primaryResidence'].astype('bool')
femaDf['censusBlockId'] = femaDf['censusBlockId'].astype('str').replace('\.0', '', regex=True)
print(f"habitabilityRepairsRequired datatype: {femaDf['habitabilityRepairsRequired'].dtype}")
print(f"primaryResidence datatype: {femaDf['primaryResidence'].dtype}")
print(f"censusBlockId datatype: {femaDf['censusBlockId'].dtype}")

habitabilityRepairsRequired datatype: bool
primaryResidence datatype: bool
censusBlockId datatype: object


In [8]:
# Create census tract IDs for FEMA dataset
femaDf['censusTractId'] = femaDf.apply(lambda row: row.censusBlockId[0:11], axis=1)
femaDf[['censusBlockId', 'censusTractId']].head()

Unnamed: 0,censusBlockId,censusTractId
0,120111105011006,12011110501
1,120860011032003,12086001103
2,120830012043000,12083001204
3,120860135002000,12086013500
4,120090641241018,12009064124


### Load Census Demographics

In [9]:
# Load demographics data
demoDf = pd.read_csv("../data/census-tract/census-tract-demographics.csv")
demoDf.head(3)

Unnamed: 0,censusid,gisjoin,tractid,tractname,county,state,total_population,male_population,female_population,total_population_edu,high_school_grad,bachelors_degree,graduate_prof_degree,high_school_grad_rate,bachelors_degree_rate,graduate_prof_degree_rate,bachelors_plus_degree_rate,total_population_employ,labor_force_total,civilian_labor_force,employed_labor_force,unemployed_labor_force,armed_forces_labor_force,not_labor_force_total,labor_force_rate,civilian_labor_rate,employed_labor_rate,unemployed_labor_rate,armed_forces_labor_rate,not_labor_force_rate,total_population_income,income_less_10k,income_10k_15k,income_15k_20k,income_20k_25k,income_25k_30k,income_30k_35k,income_35k_40k,income_40k_45k,income_45k_50k,income_50k_60k,income_60k_75k,income_75k_100k,income_100k_125k,income_125k_150k,income_150k_200k,income_200k_more,median_earnings_total,median_earnings_male,median_earnings_male_fulltime,median_earnings_male_other,median_earnings_female,median_earnings_female_fulltime,median_earnings_female_other,total_households_poverty,below_poverty,below_poverty_family,below_poverty_family_married,below_poverty_family_other,below_poverty_family_other_male_no_spouse,below_poverty_family_other_female_no_spouse,below_poverty_nonfamily,below_poverty_nonfamily_male,below_poverty_nonfamily_female,above_poverty,above_poverty_family,above_poverty_family_married,above_poverty_family_other,above_poverty_family_other_male_no_spouse,above_poverty_family_other_female_no_spouse,above_poverty_nonfamily,above_poverty_nonfamily_male,above_poverty_nonfamily_female,below_poverty_rate,below_poverty_family_rate,below_poverty_family_married_rate,below_poverty_family_other_rate,below_poverty_family_other_male_no_spouse_rate,below_poverty_family_other_female_no_spouse_rate,below_poverty_nonfamily_rate,below_poverty_nonfamily_male_rate,below_poverty_nonfamily_female_rate,above_poverty_rate,above_poverty_family_rate,above_poverty_family_married_rate,above_poverty_family_other_rate,above_poverty_family_other_male_no_spouse_rate,above_poverty_family_other_female_no_spouse_rate,above_poverty_nonfamily_rate,above_poverty_nonfamily_male_rate,above_poverty_nonfamily_female_rate,total_population_assist,with_assistance,without_assistance,with_assistance_rate,without_assistance_rate,total_housing_units,total_housing_units_rate,occupied_housing_units,occupied_housing_units_rate,vacant_housing_units,vacant_housing_units_rate,homeowner_vacancy_rate,homeowner_vacancy_rate_rate,rental_vacancy_rate,rental_vacancy_rate_rate,built_total_housing_units,built_total_housing_units_rate,built_2014_or_later,built_2014_or_later_rate,built_2010_to_2013,built_2010_to_2013_rate,built_2000_to_2009,built_2000_to_2009_rate,built_1990_to_1999,built_1990_to_1999_rate,built_1980_to_1989,built_1980_to_1989_rate,built_1970_to_1979,built_1970_to_1979_rate,built_1960_to_1969,built_1960_to_1969_rate,built_1950_to_1959,built_1950_to_1959_rate,built_1940_to_1949,built_1940_to_1949_rate,built_1939_or_earlier,built_1939_or_earlier_rate,tenure_occupied_housing_units,tenure_occupied_housing_units_rate,owner_occupied,owner_occupied_rate,renter_occupied,renter_occupied_rate,avg_size_owner_unit,avg_size_owner_unit_rate,avg_size_renter_unit,avg_size_of_renter__unit_rate,moved_occupied_housing_unit,moved_occupied_housing_unit_rate,moved_in_2015_or_later,moved_in_2015_or_later_rate,moved_in_2010_to_2014,moved_in_2010_to_2014_rate,moved_in_2000_to_2009,moved_in_2000_to_2009_rate,moved_in_1990_to_1999,moved_in_1990_to_1999_rate,moved_in_1980_to_1989,moved_in_1980_to_1989_rate,moved_in_1979_and_earlier,moved_in_1979_and_earlier_rate,lacking_complete_plumbing_facilities,lacking_complete_plumbing_facilities_rate,lacking_complete_kitchen_facilities,lacking_complete_kitchen_facilities_rate,no_telephone_service_available,no_telephone_service_available_rate,value_less_50k,value_less_50k_rate,value_50k_100k,value_50k_100k_rate,value_100k_150k,value_100k_150k_rate,value_150k_200k,value_150k_200k_rate,value_200k_300k,value_200k_300k_rate,value_300k_500k,value_300k_500k_rate,value_500k_1M,value_500k_1M_rate,value_1M_more,value_1M_more_rate,median_value,median_value_rate,mortgage,mortgage_rate,mortgage_less_20_percent,mortgage_less_20_percent_rate,mortgage_20_25_percent,mortgage_20_25_percent_rate,mortgage_25_30_percent,mortgage_25_30_percent_rate,mortgage_30_35_percent,mortgage_30_35_percent_rate,mortgage_35_percent_plus,mortgage_35_percent_plus_rate,mortgage_not_computed,mortgage_not_computed_rate,without_mortgage,without_mortgage_rate,without_mortgage_less_10_percent,without_mortgage_less_10_percent_rate,without_mortgage_10_15_percent,without_mortgage_10_15_percent_rate,without_mortgage_15_20_percent,without_mortgage_15_20_percent_rate,without_mortgage_20_25_percent,without_mortgage_20_25_percent_rate,without_mortgage_25_30_percent,without_mortgage_25_30_percent_rate,without_mortgage_30_35_percent,without_mortgage_30_35_percent_rate,without_mortgage_35_percent_plus,without_mortgage_35_percent_plus_rate,without_mortgage_not_computed,without_mortgage_not_computed_rate,occupied_units_paying_rent,occupied_units_paying_rent_rate,rent_less_15_percent,rent_less_15_percent_rate,rent_15_20_percent,rent_15_20_percent_rate,rent_20_25_percent,rent_20_25_percent_rate,rent_25_30_percent,rent_25_30_percent_rate,rent_30_35_percent,rent_30_35_percent_rate,rent_35_percent_plus,rent_35_percent_plus_rate,rent_not_computed,rent_not_computed_rate,built_1959_or_earlier_rate,built_1960_to_1989_rate,built_1990_or_later_rate,built_1979_or_earlier_rate,built_1980_or_later_rate,total_population_nativity,total_native,native_in_state,native_out_state,native_outside_us,native_outside_us_puerto,native_outside_us_islands,native_outside_us_abroad,total_foreign,foreign_citizen,foreign_non_citizen,total_native_rate,native_in_state_rate,native_out_state_rate,native_outside_us_rate,native_outside_us_puerto_rate,native_outside_us_islands_rate,native_outside_us_abroad_rate,total_foreign_rate,foreign_citizen_rate,foreign_non_citizen_rate
0,1400000US12001000200,G1200010000200,12001000200,"Census Tract 2, Alachua County, Florida",Alachua County,Florida,6834,3096,3738,1941,564,256,479,0.290572,0.131891,0.24678,0.378671,6681,3151,3147,2923,224,4,3530,0.471636,0.471037,0.437509,0.033528,0.000599,0.528364,345,28,49,30,0,0,31,0,0,0,71,49,14,21,30,22,0,6752.0,12345.0,25520.0,6125.0,4405.0,27139.0,3134.0,2364,1328,91,22,69,0,69,1237,622,615,1036,254,52,202,80,122,782,431,351,0.56176,0.038494,0.009306,0.029188,0.0,0.029188,0.523266,0.263113,0.260152,0.43824,0.107445,0.021997,0.085448,0.033841,0.051607,0.330795,0.182318,0.148477,2364,196,2168,0.08291,0.91709,3374,3374,2364,70.1,1010,29.9,10.1,,25.7,,3374,3374,94,2.8,273,8.1,931,27.6,378,11.2,307,9.1,216,6.4,335,9.9,312,9.2,196,5.8,332,9.8,2364,2364,417,0.176396,1947,0.823604,2.62,,2.4,,2364,2364,602,25.5,1320,55.8,129,5.5,178,7.5,70,3.0,65,2.7,0,0.0,29,1.2,93.0,3.9,34,8.2,173,41.5,62,14.9,91,21.8,41,9.8,16,3.8,0,0.0,0,0.0,106300.0,,221,221,36,16.3,26,11.8,0,0.0,0,0.0,159,71.9,0,,191,191,120,62.8,14,7.3,19,9.9,21,11.0,0,0.0,0,0.0,17,8.9,5,,1617,1617,50,3.1,56,3.5,152,9.4,101,6.2,107,6.6,1151,71.2,330,,0.248963,0.254298,0.49674,0.41227,0.58773,6834,6322,4479,1754,89,0,0,89,512,128,384,0.92508,0.655399,0.256658,0.013023,0.0,0.0,0.013023,0.07492,0.01873,0.05619
1,1400000US12001000301,G1200010000301,12001000301,"Census Tract 3.01, Alachua County, Florida",Alachua County,Florida,3849,1806,2043,2443,467,570,440,0.191158,0.23332,0.180106,0.413426,3499,2397,2397,2161,236,0,1102,0.685053,0.685053,0.617605,0.067448,0.0,0.314947,592,55,71,12,63,5,10,53,39,66,55,24,86,0,14,23,16,20481.0,22639.0,36750.0,11402.0,18427.0,34900.0,9127.0,1751,608,151,94,57,0,57,457,244,213,1143,441,314,127,22,105,702,251,451,0.34723,0.086236,0.053684,0.032553,0.0,0.032553,0.260994,0.139349,0.121645,0.65277,0.251856,0.179326,0.07253,0.012564,0.059966,0.400914,0.143347,0.257567,1751,487,1264,0.278127,0.721873,2014,2014,1751,86.9,263,13.1,0.0,,11.0,,2014,2014,0,0.0,93,4.6,156,7.7,321,15.9,175,8.7,437,21.7,313,15.5,257,12.8,140,7.0,122,6.1,1751,1751,360,0.205597,1391,0.794403,2.33,,2.14,,1751,1751,459,26.2,747,42.7,351,20.0,48,2.7,52,3.0,94,5.4,0,0.0,5,0.3,72.0,4.1,25,6.9,100,27.8,69,19.2,80,22.2,75,20.8,11,3.1,0,0.0,0,0.0,143500.0,,150,150,50,33.3,55,36.7,7,4.7,0,0.0,38,25.3,0,,170,170,80,47.1,50,29.4,15,8.8,25,14.7,0,0.0,0,0.0,0,0.0,40,,1332,1332,118,8.9,59,4.4,191,14.3,184,13.8,93,7.0,687,51.6,59,,0.257696,0.459285,0.283019,0.630089,0.369911,3849,3718,1812,1750,156,75,0,81,131,74,57,0.965965,0.470772,0.454664,0.04053,0.019486,0.0,0.021044,0.034035,0.019226,0.014809
2,1400000US12001000302,G1200010000302,12001000302,"Census Tract 3.02, Alachua County, Florida",Alachua County,Florida,2374,1151,1223,1700,464,246,194,0.272941,0.144706,0.114118,0.258824,2047,1241,1241,1064,177,0,806,0.606253,0.606253,0.519785,0.086468,0.0,0.393747,433,8,22,12,106,24,29,19,0,13,34,57,22,41,23,23,0,25108.0,31607.0,45063.0,11434.0,23733.0,28911.0,14018.0,1101,269,53,40,13,0,13,216,117,99,832,380,215,165,35,130,452,243,209,0.244323,0.048138,0.036331,0.011807,0.0,0.011807,0.196185,0.106267,0.089918,0.755677,0.345141,0.195277,0.149864,0.031789,0.118074,0.410536,0.220708,0.189827,1101,284,817,0.257947,0.742053,1489,1489,1101,73.9,388,26.1,9.6,,22.2,,1489,1489,0,0.0,0,0.0,64,4.3,69,4.6,114,7.7,383,25.7,220,14.8,357,24.0,172,11.6,110,7.4,1101,1101,469,0.425976,632,0.574024,2.14,,2.12,,1101,1101,230,20.9,404,36.7,224,20.3,94,8.5,48,4.4,101,9.2,0,0.0,33,3.0,54.0,4.9,69,14.7,153,32.6,163,34.8,36,7.7,19,4.1,6,1.3,23,4.9,0,0.0,103100.0,,296,296,87,29.4,44,14.9,15,5.1,48,16.2,102,34.5,0,,173,173,58,33.5,24,13.9,25,14.5,39,22.5,7,4.0,20,11.6,0,0.0,0,,565,565,59,10.4,63,11.2,43,7.6,41,7.3,49,8.7,310,54.9,67,,0.429147,0.481531,0.089322,0.834117,0.165883,2374,2221,1372,828,21,7,0,14,153,112,41,0.935552,0.577928,0.348778,0.008846,0.002949,0.0,0.005897,0.064448,0.047178,0.01727


In [10]:
# Check vars in index
print(demoDf.columns)

Index(['censusid', 'gisjoin', 'tractid', 'tractname', 'county', 'state',
       'total_population', 'male_population', 'female_population',
       'total_population_edu',
       ...
       'total_native_rate', 'native_in_state_rate', 'native_out_state_rate',
       'native_outside_us_rate', 'native_outside_us_puerto_rate',
       'native_outside_us_islands_rate', 'native_outside_us_abroad_rate',
       'total_foreign_rate', 'foreign_citizen_rate',
       'foreign_non_citizen_rate'],
      dtype='object', length=250)


In [11]:
# Change to tractid datatype to string
demoDf['tractid'] = demoDf['tractid'].astype('str')
print(f"tractid datatype: {demoDf['tractid'].dtype}")

tractid datatype: object


In [13]:
# Select subset of variables
demoDf = demoDf[['censusid', 'tractid', 'tractname', 'county', 'state', 'below_poverty_rate', 'median_earnings_total',
                 'unemployed_labor_rate', 'built_1979_or_earlier_rate', 'owner_occupied_rate']]

demoDf.head(3)

Unnamed: 0,censusid,tractid,tractname,county,state,below_poverty_rate,median_earnings_total,unemployed_labor_rate,built_1979_or_earlier_rate,owner_occupied_rate
0,1400000US12001000200,12001000200,"Census Tract 2, Alachua County, Florida",Alachua County,Florida,0.56176,6752.0,0.033528,0.41227,0.176396
1,1400000US12001000301,12001000301,"Census Tract 3.01, Alachua County, Florida",Alachua County,Florida,0.34723,20481.0,0.067448,0.630089,0.205597
2,1400000US12001000302,12001000302,"Census Tract 3.02, Alachua County, Florida",Alachua County,Florida,0.244323,25108.0,0.086468,0.834117,0.425976


### Combine Demographics with FEMA data

In [14]:
# Merge demographics and FEMA datasets
df = pd.merge(femaDf, demoDf, left_on='censusTractId', right_on='tractid')

In [15]:
# Check how many rows we have in the merged dataframe
print(f"There are {len(df)} records in the merged dataset.")
print(f"This is a {len(femaDf) - len(df)} difference b/w the original dataset and the merged.")

There are 4734980 records in the merged dataset.
This is a 59177 difference b/w the original dataset and the merged.


In [16]:
# Check how many census tracts are represented between TX & FL
print(f"There are {len(df['tractid'].unique())} tracts represented in the dataset.")
print(f"Of these, {len(df.loc[df['damagedStateAbbreviation'] == 'PR', 'censusid'].unique())} are from PR.")
print(f"Of these, {len(df.loc[df['damagedStateAbbreviation'] == 'TX', 'censusid'].unique())} are from TX.")
print(f"Of these, {len(df.loc[df['damagedStateAbbreviation'] == 'FL', 'censusid'].unique())} are from FL.")
print(f"Of these, {len(df.loc[df['damagedStateAbbreviation'] == 'NC', 'censusid'].unique())} are from NC.")

There are 8806 tracts represented in the dataset.
Of these, 896 are from PR.
Of these, 2461 are from TX.
Of these, 4158 are from FL.
Of these, 1294 are from NC.


### Feature Engineering

In [17]:
# Define all dollar amount columns
amountCols = ['foundationDamageAmount', 'roofDamageAmount', 'rentalAssistanceAmount', 
              'replacementAmount','repairAmount', 'rpfvl', 'ppfvl']

# Set them to zero if null
df[amountCols] = df[amountCols].fillna(0)

# Calculate the aggregated haAmount
df['haAmount'] = df['rentalAssistanceAmount'] + df['replacementAmount'] + df['repairAmount']

print('Total ${:,.2f} in HA payouts'.format(df['haAmount'].sum()))
print('Total ${:,.2f} in mainland HA payouts'.format(df.loc[df['damagedStateAbbreviation'] != 'PR', 'haAmount'].sum()))

Total $2,672,533,413.51 in HA payouts
Total $2,023,524,135.44 in mainland HA payouts


In [18]:
# Convert boolean columns to int
bool_cols = ['specialNeeds', 'homeOwnersInsurance', 'floodInsurance', 'inspected', 'destroyed', 
             'habitabilityRepairsRequired', 'floodDamage', 'foundationDamage', 'roofDamage', 
             'tsaEligible', 'rentalAssistanceEligible', 'repairAssistanceEligible', 
             'replacementAssistanceEligible', 'sbaEligible', 'primaryResidence']

df[bool_cols] = df[bool_cols].astype(int)

In [19]:
# Fill the na's with zero's in waterLevel 
df['waterLevel'].fillna(0, inplace=True)

In [20]:
# One-hot encode the categorical variables
cat_cols = ['ownRent', 'residenceType']
for col in cat_cols:
    df = pd.concat([df, pd.get_dummies(df[col], prefix=col)], axis=1)
df.drop(cat_cols, axis=1, inplace=True)

### Create Train and Test - With Outliers

In [21]:
# Train (FL/TX) data
trainDf = df.loc[(df['damagedStateAbbreviation'] != 'PR') & (df['damagedStateAbbreviation'] != 'NC'), ]

# Write train to csv
trainDf.to_csv("../data/open-fema/FEMA-Large-Demographics-FL-TX.csv", index=False, encoding='utf-8')
print('Train (FL/TX) rows:', len(trainDf))

Train (FL/TX) rows: 3535319


In [22]:
# Test (PR) data
testDf = df.loc[df['damagedStateAbbreviation'] == 'PR', ]

# Write test to csv
testDf.to_csv("../data/open-fema/FEMA-Large-Demographics-PR.csv", index=False, encoding='utf-8')
print('Test (PR) rows:', len(testDf))

Test (PR) rows: 1067075


In [23]:
# Test (PR) data
testNCDf = df.loc[df['damagedStateAbbreviation'] == 'NC', ]

# Write test to csv
testNCDf.to_csv("../data/open-fema/FEMA-Large-Demographics-NC.csv", index=False, encoding='utf-8')
print('Test (NC) rows:', len(testNCDf))

Test (NC) rows: 132586


### Treat Outliers

In [24]:
# Drop uninspected with HA amount
print(f"Expected amount of rows after dropped: {len(df) - len(df[(df['haAmount']>0) & (df['inspected']==False)])}")
uninspected = df.loc[(df['haAmount'] > 0) & (df['inspected'] == False)].index
df.drop(uninspected, inplace=True)
print(f"Amount after dropped: {len(df)}")

Expected amount of rows after dropped: 4362096
Amount after dropped: 4362096


In [25]:
# Drop those with HA amount over max
print(f"Expected amount of rows after dropped: {len(df) - len(df[(df['haAmount'] > 33300)])}")
maxLimit_ha = df.loc[(df['haAmount'] > 33300)].index
df.drop(maxLimit_ha, inplace=True)
print(f"Amount after dropped: {len(df)}")

Expected amount of rows after dropped: 4361867
Amount after dropped: 4361867


In [26]:
# Drop those with household composition over 15
print(f"Expected amount of rows after dropped: {len(df) - len(df[(df['householdComposition'] > 15)])}")
maxLimit_house = df.loc[(df['householdComposition'] > 15)].index
df.drop(maxLimit_house, inplace=True)
print(f"Amount after dropped: {len(df)}")

Expected amount of rows after dropped: 4361803
Amount after dropped: 4361803


In [27]:
# Drop those with water level over 120
print(f"Expected amount of rows after dropped: {len(df) - len(df[(df['waterLevel'] > 120)])}")
maxLimit_water = df.loc[(df['waterLevel'] > 120)].index
df.drop(maxLimit_water, inplace=True)
print(f"Amount after dropped: {len(df)}")

Expected amount of rows after dropped: 4361366
Amount after dropped: 4361366


### Create Train and Test - Outliers Removed

In [28]:
# Create train without outliers
trainDf = df.loc[(df['damagedStateAbbreviation'] != 'PR') & (df['damagedStateAbbreviation'] != 'NC'), ]

# Write train to csv
trainDf.to_csv("../data/open-fema/FEMA-Large-Demographics-FL-TX-clean.csv", index=False, encoding='utf-8')
print('Train (FL/TX) rows:', len(trainDf))

Train (FL/TX) rows: 3187285


In [27]:
# Create test without outliers
testDf = df.loc[df['damagedStateAbbreviation'] == 'PR', ]

# Write test to csv
testDf.to_csv("../data/open-fema/FEMA-Large-Demographics-PR-clean.csv", index=False, encoding='utf-8')
print('Test (PR) rows:', len(testDf))

Test (PR) rows: 1041745


In [30]:
# Create test without outliers
testNCDf = df.loc[df['damagedStateAbbreviation'] == 'NC', ]

# Write test to csv
testNCDf.to_csv("../data/open-fema/FEMA-Large-Demographics-NC-clean.csv", index=False, encoding='utf-8')
print('Test (NC) rows:', len(testNCDf))

Test (NC) rows: 132336
