In [1]:
# importing the dependencies
import pandas as pd
import numpy
import json
from json import loads, dumps

##Cancer Rates Data##

In [2]:
# Making the dataframe to inspect the data
df_cancer = pd.read_csv("Source_Data/Countytable.csv")
df_cancer.head()

Unnamed: 0,Cancer header,County,County population,Note,Rate,Cancer type,Sex
0,All cancer types combined,Mower,39807,,410.7,All cancer types combined,Everyone
1,All cancer types combined,Todd,24494,,414.3,All cancer types combined,Everyone
2,All cancer types combined,Fillmore,20949,,416.5,All cancer types combined,Everyone
3,All cancer types combined,Lac qui Parle,6719,,422.4,All cancer types combined,Everyone
4,All cancer types combined,Koochiching,12515,,422.7,All cancer types combined,Everyone


In [3]:
# getting rid of some columns and renaming some columns
df_cancer.drop(["Cancer header", "Note","Cancer type","Sex"], axis = 1,inplace = True)
df_cancer.head()

Unnamed: 0,County,County population,Rate
0,Mower,39807,410.7
1,Todd,24494,414.3
2,Fillmore,20949,416.5
3,Lac qui Parle,6719,422.4
4,Koochiching,12515,422.7


In [4]:
df_cancer = df_cancer.rename(columns = {"Rate": "cancer_rate_per_100k","County population": "county_population","County":"county"})
df_cancer.head()

Unnamed: 0,county,county_population,cancer_rate_per_100k
0,Mower,39807,410.7
1,Todd,24494,414.3
2,Fillmore,20949,416.5
3,Lac qui Parle,6719,422.4
4,Koochiching,12515,422.7


In [5]:
# sort by county name 
df_cancer.sort_values(by = "county",inplace = True)
df_cancer.head()

Unnamed: 0,county,county_population,cancer_rate_per_100k
26,Aitkin,15834,449.5
64,Anoka,350253,486.5
69,Becker,34011,491.9
70,Beltrami,46403,492.3
7,Benton,40129,428.1


In [6]:
# setting the index
df_cancer.reset_index(drop= True, inplace = True)
df_cancer.head()

Unnamed: 0,county,county_population,cancer_rate_per_100k
0,Aitkin,15834,449.5
1,Anoka,350253,486.5
2,Becker,34011,491.9
3,Beltrami,46403,492.3
4,Benton,40129,428.1


In [7]:
# checking for null values
df_cancer.isnull().sum()

county                  0
county_population       0
cancer_rate_per_100k    0
dtype: int64

In [8]:
# checking the duplicated values
df_cancer.duplicated()

0     False
1     False
2     False
3     False
4     False
      ...  
83    False
84    False
85    False
86    False
87    False
Length: 88, dtype: bool

In [9]:
#checking the data types
df_cancer.dtypes

county                   object
county_population        object
cancer_rate_per_100k    float64
dtype: object

In [10]:
# change the datatype of county population to integer
df_cancer['county_population'] = df_cancer['county_population'].str.replace(',', '').astype(float)
#df_cancer[['County population']].apply(pd.to_numeric) 
#df_cancer.dtypes
df_cancer.head()

Unnamed: 0,county,county_population,cancer_rate_per_100k
0,Aitkin,15834.0,449.5
1,Anoka,350253.0,486.5
2,Becker,34011.0,491.9
3,Beltrami,46403.0,492.3
4,Benton,40129.0,428.1


In [11]:
df_cancer.dtypes

county                   object
county_population       float64
cancer_rate_per_100k    float64
dtype: object

## Health Outcomes Data ##

In [12]:
#reading the healthoutcome excel file 
df_healthoutcome = pd.read_excel("Source_Data/HealthOutcomeAndFactors.xlsx",header = 1)
df_healthoutcome.head()

Unnamed: 0,FIPS,State,County,Z-Score,Rank,Z-Score.1,Rank.1
0,,,,,,,
1,27001.0,Minnesota,Aitkin,0.43145,69.0,0.761015,82.0
2,27003.0,Minnesota,Anoka,-0.426424,23.0,-0.413081,17.0
3,27005.0,Minnesota,Becker,0.472333,71.0,0.169356,63.0
4,27007.0,Minnesota,Beltrami,1.717782,86.0,0.763052,83.0


In [13]:
#Renaming the columns
df_healthoutcome.rename( columns = {"Z-Score":"hlt_outcome_z", 
                                    "Rank":"hlt_outcome_rank","Z-Score.1":"hlt_factor_z",
                                    "Rank.1":"hlt_factor_rank","County": "county"}, inplace = True)
df_healthoutcome.head()

Unnamed: 0,FIPS,State,county,hlt_outcome_z,hlt_outcome_rank,hlt_factor_z,hlt_factor_rank
0,,,,,,,
1,27001.0,Minnesota,Aitkin,0.43145,69.0,0.761015,82.0
2,27003.0,Minnesota,Anoka,-0.426424,23.0,-0.413081,17.0
3,27005.0,Minnesota,Becker,0.472333,71.0,0.169356,63.0
4,27007.0,Minnesota,Beltrami,1.717782,86.0,0.763052,83.0


In [14]:
df_healthoutcome.drop(index = 0,inplace = True)
df_healthoutcome.head()

Unnamed: 0,FIPS,State,county,hlt_outcome_z,hlt_outcome_rank,hlt_factor_z,hlt_factor_rank
1,27001.0,Minnesota,Aitkin,0.43145,69.0,0.761015,82.0
2,27003.0,Minnesota,Anoka,-0.426424,23.0,-0.413081,17.0
3,27005.0,Minnesota,Becker,0.472333,71.0,0.169356,63.0
4,27007.0,Minnesota,Beltrami,1.717782,86.0,0.763052,83.0
5,27009.0,Minnesota,Benton,-0.022054,47.0,0.114975,59.0


In [15]:
# resetting the index
df_healthoutcome.reset_index( drop = True, inplace = True)
df_healthoutcome.head()

Unnamed: 0,FIPS,State,county,hlt_outcome_z,hlt_outcome_rank,hlt_factor_z,hlt_factor_rank
0,27001.0,Minnesota,Aitkin,0.43145,69.0,0.761015,82.0
1,27003.0,Minnesota,Anoka,-0.426424,23.0,-0.413081,17.0
2,27005.0,Minnesota,Becker,0.472333,71.0,0.169356,63.0
3,27007.0,Minnesota,Beltrami,1.717782,86.0,0.763052,83.0
4,27009.0,Minnesota,Benton,-0.022054,47.0,0.114975,59.0


In [16]:
# dropping column 
df_healthoutcome.drop( columns = ["State"], inplace = True)
df_healthoutcome.head()

Unnamed: 0,FIPS,county,hlt_outcome_z,hlt_outcome_rank,hlt_factor_z,hlt_factor_rank
0,27001.0,Aitkin,0.43145,69.0,0.761015,82.0
1,27003.0,Anoka,-0.426424,23.0,-0.413081,17.0
2,27005.0,Becker,0.472333,71.0,0.169356,63.0
3,27007.0,Beltrami,1.717782,86.0,0.763052,83.0
4,27009.0,Benton,-0.022054,47.0,0.114975,59.0


## Asthma Data ##

In [17]:
#reading the asthma data
df_asthma = pd.read_csv("Source_Data/MN-asthma-county.csv")
df_asthma.head()

Unnamed: 0,__Outcome,__Year,_County,"Age-adjusted rate per 10,000",Note,Count of cases
0,Hospitalizations,2018-2020,Winona,0.5,Unstable rate due to small population,9
1,Hospitalizations,2018-2020,Todd,0.6,Unstable rate due to small population,7
2,Hospitalizations,2018-2020,Wright,1.1,,45
3,Hospitalizations,2018-2020,Wabasha,1.1,Unstable rate due to small population,7
4,Hospitalizations,2018-2020,Waseca,1.1,Unstable rate due to small population,7


In [18]:
# Getting the column names
df_asthma.columns

Index(['__Outcome', '__Year', '_County', 'Age-adjusted rate per 10,000',
       'Note', 'Count of cases'],
      dtype='object')

In [19]:
 # dropping some columns
df_asthma.drop(['__Outcome', '__Year','Note'], axis = 1,inplace = True)
df_asthma.head()

Unnamed: 0,_County,"Age-adjusted rate per 10,000",Count of cases
0,Winona,0.5,9
1,Todd,0.6,7
2,Wright,1.1,45
3,Wabasha,1.1,7
4,Waseca,1.1,7


In [20]:
# Renaming the columns
df_asthma.rename(columns = {"_County": "county","Age-adjusted rate per 10,000":"asthma_rate_per_100k","Count of cases":
                           "count_of_cases"}, inplace = True)
df_asthma.head()

Unnamed: 0,county,asthma_rate_per_100k,count_of_cases
0,Winona,0.5,9
1,Todd,0.6,7
2,Wright,1.1,45
3,Wabasha,1.1,7
4,Waseca,1.1,7


In [21]:
# sorting data by county names
df_asthma.sort_values(by = "county", inplace = True)
df_asthma.head()

Unnamed: 0,county,asthma_rate_per_100k,count_of_cases
22,Aitkin,2.0,9
42,Anoka,2.6,262
46,Becker,2.7,26
59,Beltrami,3.7,55
52,Benton,3.2,37


In [22]:
# Resetting the index
df_asthma.reset_index(drop= True, inplace = True)
df_asthma

Unnamed: 0,county,asthma_rate_per_100k,count_of_cases
0,Aitkin,2,9
1,Anoka,2.6,262
2,Becker,2.7,26
3,Beltrami,3.7,55
4,Benton,3.2,37
...,...,...,...
83,Watonwan,*,*
84,Wilkin,*,*
85,Winona,0.5,9
86,Wright,1.1,45


In [23]:
# checking the length of dataframe
len(df_asthma)

88

In [24]:
#Checking for null values
df_asthma.isnull().sum()

county                  0
asthma_rate_per_100k    0
count_of_cases          0
dtype: int64

In [25]:
# checking for data types 
df_asthma.dtypes

county                  object
asthma_rate_per_100k    object
count_of_cases          object
dtype: object

In [26]:
df_asthma['asthma_rate_per_100k'] = pd.to_numeric(df_asthma['asthma_rate_per_100k']
                                                                           ,errors='coerce')
df_asthma.dtypes

county                   object
asthma_rate_per_100k    float64
count_of_cases           object
dtype: object

In [27]:
df_asthma['count_of_cases'] = pd.to_numeric(df_asthma['count_of_cases'], errors='coerce')
                                                                           
df_asthma.dtypes

county                   object
asthma_rate_per_100k    float64
count_of_cases          float64
dtype: object

In [28]:
df_asthma.head()

Unnamed: 0,county,asthma_rate_per_100k,count_of_cases
0,Aitkin,2.0,9.0
1,Anoka,2.6,262.0
2,Becker,2.7,26.0
3,Beltrami,3.7,55.0
4,Benton,3.2,37.0


## EPA FRS Sites Data ##

In [29]:
# Read in CSV file to create EPA FRS dataframe
df_epa_frs = pd.read_csv("Source_Data/STATE_SINGLE_MN.csv")

  df_epa_frs = pd.read_csv("Source_Data/STATE_SINGLE_MN.csv")


In [30]:
# inspect dataframe 
df_epa_frs.head()

Unnamed: 0,FRS_FACILITY_DETAIL_REPORT_URL,REGISTRY_ID,PRIMARY_NAME,LOCATION_ADDRESS,SUPPLEMENTAL_LOCATION,CITY_NAME,COUNTY_NAME,FIPS_CODE,STATE_CODE,STATE_NAME,...,SIC_CODES,SIC_CODE_DESCRIPTIONS,LATITUDE83,LONGITUDE83,CONVEYOR,COLLECT_DESC,ACCURACY_VALUE,REF_POINT_DESC,HDATUM_DESC,SOURCE_DESC
0,https://ofmpub.epa.gov/frs_public2/fii_query_d...,110055452380,SHERWIN-WILLIAMS 3296,10690 BALTIMORE ST NE,,10690 BALTIMORE ST NE,ANOKA,27003,MN,MINNESOTA,...,5231.0,"PAINT, GLASS, AND WALLPAPER STORES",45.16277,-93.23254,FRS-GEOCODE,ADDRESS MATCHING-HOUSE NUMBER,30.0,CENTER OF A FACILITY OR STATION,NAD83,
1,https://ofmpub.epa.gov/frs_public2/fii_query_d...,110055527835,PARKER AUTO MALL,6771 MAIN ST,,6771 MAIN ST,CHISAGO,27025,MN,MINNESOTA,...,,,45.51152,-92.97108,FRS-GEOCODE,ADDRESS MATCHING-HOUSE NUMBER,30.0,CENTER OF A FACILITY OR STATION,NAD83,
2,https://ofmpub.epa.gov/frs_public2/fii_query_d...,110069113113,150TH STREET - AASTAD TOWNSHIP,SEE LOCATION DESCRIPTION,,AASTAD TOWNSHIP,OTTER TAIL,27111,MN,MINNESOTA,...,,,,,,,,,NAD83,
3,https://ofmpub.epa.gov/frs_public2/fii_query_d...,110068363425,AASTAD TOWNSHIP DUMP SITE,SEE LOCATION DESCRIPTION,,AASTAD TOWNSHIP,OTTER TAIL,27111,MN,MINNESOTA,...,,,,,,,,,NAD83,
4,https://ofmpub.epa.gov/frs_public2/fii_query_d...,110068290904,GARY T LIEN FARM,12701 220TH AVE,,AASTAD TOWNSHIP,OTTER TAIL,27111,MN,MINNESOTA,...,,,46.14703,-96.02937,FRS-GEOCODE,ADDRESS MATCHING-HOUSE NUMBER,30.0,CENTER OF A FACILITY OR STATION,NAD83,


In [31]:
# inspect columns
df_epa_frs.columns

Index(['FRS_FACILITY_DETAIL_REPORT_URL', 'REGISTRY_ID', 'PRIMARY_NAME',
       'LOCATION_ADDRESS', 'SUPPLEMENTAL_LOCATION', 'CITY_NAME', 'COUNTY_NAME',
       'FIPS_CODE', 'STATE_CODE', 'STATE_NAME', 'COUNTRY_NAME', 'POSTAL_CODE',
       'FEDERAL_FACILITY_CODE', 'FEDERAL_AGENCY_NAME', 'TRIBAL_LAND_CODE',
       'TRIBAL_LAND_NAME', 'CONGRESSIONAL_DIST_NUM', 'CENSUS_BLOCK_CODE',
       'HUC_CODE', 'EPA_REGION_CODE', 'SITE_TYPE_NAME', 'LOCATION_DESCRIPTION',
       'CREATE_DATE', 'UPDATE_DATE', 'US_MEXICO_BORDER_IND', 'PGM_SYS_ACRNMS',
       'INTEREST_TYPES', 'NAICS_CODES', 'NAICS_CODE_DESCRIPTIONS', 'SIC_CODES',
       'SIC_CODE_DESCRIPTIONS', 'LATITUDE83', 'LONGITUDE83', 'CONVEYOR',
       'COLLECT_DESC', 'ACCURACY_VALUE', 'REF_POINT_DESC', 'HDATUM_DESC',
       'SOURCE_DESC'],
      dtype='object')

In [32]:
# create new dataframe by dropping unneccessary columns
df_epa_frs_clean = df_epa_frs[['REGISTRY_ID', 'PRIMARY_NAME', 'LOCATION_ADDRESS', 'COUNTY_NAME','SITE_TYPE_NAME', 'INTEREST_TYPES', 'LATITUDE83', 'LONGITUDE83']]

# rename columns for readability
df_epa_frs_clean.rename(columns = {'LATITUDE83': 'LATITUDE', 'LONGITUDE83': 'LONGITUDE', 'PRIMARY_NAME': 'SITE PRIMARY NAME', 'LOCATION_ADDRESS': 'SITE_ADDRESS'}, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_epa_frs_clean.rename(columns = {'LATITUDE83': 'LATITUDE', 'LONGITUDE83': 'LONGITUDE', 'PRIMARY_NAME': 'SITE PRIMARY NAME', 'LOCATION_ADDRESS': 'SITE_ADDRESS'}, inplace = True)


In [33]:
#drop rows with null values in coordinate, interest types, location address, or site types columns
df_epa_frs_clean = df_epa_frs_clean.dropna(axis=0, subset=['LATITUDE'])
df_epa_frs_clean = df_epa_frs_clean.dropna(axis=0, subset=['LONGITUDE'])

In [34]:
# drop rows where interest type is "state master" only
df_epa_frs_clean = df_epa_frs_clean[df_epa_frs_clean.INTEREST_TYPES != "STATE MASTER"]

In [35]:
# create list of search terms for interest types to try and narrow down to relevant sites
search_term_list = ['RELEASE ASSESSMENT','AIR EMISSION INVENTORY', 'AIR MAJOR', 'AIR MINOR', 'AIR SYNTHETIC MINOR', 'ELECTRIC GENERATOR', 'ELECTRIC POWER GENERAT', 'ETHANOL FACILITY', 'GASOLINE', 'GREENHOUSE GAS', 'ANIMAL', 'LIVESTOCK', 'TRI REPORTER', 'TSCA SUBMITTER', 'EPCRA', 'FRP', 'SPCC', 'MINERAL EXPLORATION', 'CATHODE RAY', 'CESQG', 'CORRECTIVE ACTION', 'HAZARDOUS WASTE', 'INCINERATOR', 'INFECTIOUS WASTE', 'LQG', 'SQG', 'TSD', 'UIC', 'USED OIL', 'COMPLIANCE', 'ENFORCEMENT', 'PESTICIDE', 'RAD NPL', 'RAD WIPP', 'RAD NESHAPS', 'RADIOACTIVE', 'CLEANUP', 'REMEDIATION', 'REMEDIAL', 'BROWNFIELDS', 'SEDIMENTS', 'SUPERFUND', 'E-WASTE', 'INDUSTRIAL SITES', 'MATERIAL RECYCLING', 'MINE', 'MINING', 'OIL CONTROL', 'REFUSE DISPOSAL', 'SCRAP TIRE MANAGEMENT', 'SEWAGE SLUDGE', 'SOLID WASTE', 'LEAKING STORAGE TANK','UNDERGROUND STORAGE TANK', 'BIOSOLIDS', 'NPDES MAJOR', 'NPDES-MINOR', 'NPDES NON-MAJOR' 'NPDES UNPERMITTED', 'OIL AND GAS', 'MULTI-PERMITTED SITE']

# filter dataframe by search term list within the Interest_Types field
final_epa_frs_clean = df_epa_frs_clean[df_epa_frs_clean['INTEREST_TYPES'].str.contains('|'.join(search_term_list), na=False)]

In [36]:
# modified list of search terms for a smaller, more precise dataset focused on pollution
modified_search_term_list = ['TRI REPORTER', 'BROWNFIELDS PROPERTY', 'PESTICIDE PRODUCER', 'LQG', 'AIR MAJOR', 'TSCA SUBMITTER', 'HAZARDOUS AIR POLLUTANT MAJOR', 'GREENHOUSE GAS REPORTER', 'SUPERFUND', 'GASOLINE AND DIESEL PRODUCERS', 'ICIS-NPDES MAJOR', 'LEAKING UNDERGROUND STORAGE TANK', 'OIL BASED', 'GAS BASED', 'TSD', 'COAL BASED']

# filter dataframe by search term list within the Interest_Types field
modified_final_epa_frs_clean = df_epa_frs_clean[df_epa_frs_clean['INTEREST_TYPES'].str.contains('|'.join(modified_search_term_list), na=False)]

# filter out certain strings
modified_final_epa_frs_clean[modified_final_epa_frs_clean["INTEREST_TYPES"].str.contains('VSQG')==False]
modified_final_epa_frs_clean[modified_final_epa_frs_clean["INTEREST_TYPES"].str.contains('SQG')==False]

Unnamed: 0,REGISTRY_ID,SITE PRIMARY NAME,SITE_ADDRESS,COUNTY_NAME,SITE_TYPE_NAME,INTEREST_TYPES,LATITUDE,LONGITUDE
191,110040710905,13869 FORMER GAS STATION,402 W THORPE AVE,NORMAN,STATIONARY,LEAKING UNDERGROUND STORAGE TANK - ARRA,47.296260,-96.520978
203,110017862954,ADA FEED & SEED INC,117 W MAIN ST SOUT,NORMAN,STATIONARY,"COMPLIANCE ACTIVITY, PESTICIDE PRODUCER",47.296295,-96.515827
282,110071164025,"SIMPLOT AB RETAIL, INC.",999 W MAIN ST,NORMAN COUNTY,STATIONARY,PESTICIDE PRODUCER,47.310540,-96.519700
293,110038254509,"TRIANGLE AG, LLC",202 W MAIN,NORMAN,STATIONARY,PESTICIDE PRODUCER,47.297522,-96.516176
327,110008059028,LAND O'LAKES FARMLAND FEED,13 COMMERCE ST,MOWER,STATIONARY,TRI REPORTER,43.564190,-92.713809
...,...,...,...,...,...,...,...,...
211069,110070698829,ST. OLAF STANDBY GENERATORS,,RICE,,ELECTRIC POWER GENERATOR (OIL BASED),44.461111,-93.180833
213002,110070699470,UNITED HEALTH CARE,,HENNEPIN,,ELECTRIC POWER GENERATOR (OIL BASED),44.984444,-93.360833
213037,110070699471,UNIV MINNESOTA CHP PLANT,,HENNEPIN,,ELECTRIC POWER GENERATOR (GAS BASED),44.978611,-93.240833
214228,110070699426,WEST FAIRBAULT,,RICE,,ELECTRIC POWER GENERATOR (GAS BASED),44.269722,-93.290556


In [44]:
# inspect final dataframe
final_epa_frs_clean.tail(10)

Unnamed: 0,REGISTRY_ID,SITE PRIMARY NAME,SITE_ADDRESS,COUNTY_NAME,SITE_TYPE_NAME,INTEREST_TYPES,LATITUDE,LONGITUDE
214725,110070699431,WINONA COUNTY WIND LLC,,WINONA,,ELECTRIC POWER GENERATOR (WIND BASED),44.1125,-91.924722
214733,110070699432,WINONA SOLAR,,WINONA,,ELECTRIC POWER GENERATOR (SOLAR BASED),44.124097,-91.900997
214804,110070934927,WOLLAN GARDEN SOLAR,,POPE,,ELECTRIC POWER GENERATOR (SOLAR BASED),45.637121,-95.530903
214826,110070934928,WOODBURY SOLAR,,WASHINGTON,,ELECTRIC POWER GENERATOR (SOLAR BASED),44.881944,-92.9675
214916,110070699433,WRIGHT CUDDYER CSG,,WRIGHT,,ELECTRIC POWER GENERATOR (SOLAR BASED),45.1831,-93.7341
214920,110070698748,WRIGHT KIRBY 1-5 CSG,,WRIGHT,,ELECTRIC POWER GENERATOR (SOLAR BASED),45.259,-93.791
214925,110070698738,"WRIGHTSUN CSG, LLC",,WRIGHT,,ELECTRIC POWER GENERATOR (SOLAR BASED),45.21643,-93.718376
214949,110070698739,"WYOMING 2 CSG, LLC",,CHISAGO,,ELECTRIC POWER GENERATOR (SOLAR BASED),45.35,-92.95
215080,110070698740,ZUMBRO COMMUNITY SOLAR GARDEN,,GOODHUE,,ELECTRIC POWER GENERATOR (SOLAR BASED),44.319478,-92.670339
215083,110070934929,ZUMBRO SOLAR GARDEN,,DODGE,,ELECTRIC POWER GENERATOR (SOLAR BASED),44.04603,-92.75999


In [38]:
modified_final_epa_frs_clean.tail(10)

Unnamed: 0,REGISTRY_ID,SITE PRIMARY NAME,SITE_ADDRESS,COUNTY_NAME,SITE_TYPE_NAME,INTEREST_TYPES,LATITUDE,LONGITUDE
205458,110070698885,MINNESOTA VALLEY,,CHIPPEWA,,ELECTRIC POWER GENERATOR (GAS BASED),44.802778,-95.526944
207094,110070699138,OWATONNA ENERGY STATION,,STEELE,,ELECTRIC POWER GENERATOR (GAS BASED),44.085225,-93.262714
207915,110070699148,POET BIOREFINING LAKE CRYSTAL,,BLUE EARTH,,ELECTRIC POWER GENERATOR (GAS BASED),44.093333,-94.276667
209981,110070699372,SHAKOPEE ENERGY PARK,,SCOTT,,ELECTRIC POWER GENERATOR (GAS BASED),44.783611,-93.481111
210501,110070698825,SOUTHEAST STEAM PLANT,,HENNEPIN,,ELECTRIC POWER GENERATOR (GAS BASED),44.980833,-93.249722
211069,110070698829,ST. OLAF STANDBY GENERATORS,,RICE,,ELECTRIC POWER GENERATOR (OIL BASED),44.461111,-93.180833
213002,110070699470,UNITED HEALTH CARE,,HENNEPIN,,ELECTRIC POWER GENERATOR (OIL BASED),44.984444,-93.360833
213037,110070699471,UNIV MINNESOTA CHP PLANT,,HENNEPIN,,ELECTRIC POWER GENERATOR (GAS BASED),44.978611,-93.240833
214228,110070699426,WEST FAIRBAULT,,RICE,,ELECTRIC POWER GENERATOR (GAS BASED),44.269722,-93.290556
214310,110070699429,WESTSIDE ENERGY STATION,,OLMSTED,,ELECTRIC POWER GENERATOR (GAS BASED),44.039023,-92.551866


## JSONIFY dataframes for ingestion into DB ##

In [39]:
# saving the data in an output file as json format
df_cancer.to_json('Cleaned_Data/cancer(2015-2019).json', orient='records')

In [40]:
# Saving the file in json format
df_healthoutcome.to_json('Cleaned_Data/healthoutcome(2023).json', orient='records')

In [41]:
# saving the file in json form
df_asthma.to_json('Cleaned_Data/asthma(2018-2020).json', orient='records')

In [42]:
# convert to json
final_epa_frs_clean.to_json('Cleaned_Data/epa_frs_data.json', orient='records')
modified_final_epa_frs_clean.to_json('Cleaned_Data/modified_final_epa_frs_clean.json', orient='records')