In [1]:
## https://hydrofunctions.readthedocs.io/en/master/introduction.html
import hydrofunctions as hf
import pandas as pd

In [2]:
## https://help.waterdata.usgs.gov/code/parameter_cd_query?fmt=rdb&inline=true&group_cd=%
## 00400	Physical	pH, water, unfiltered, field, standard units	Agree					Total		 	pH	std units
## 63680	Physical	Turbidity, water, unfiltered, monochrome near infra-red LED light, 780-900 nm, detection angle 90 +-2.5 degrees, formazin nephelometric units (FNU)	USGS use and no use by EPA					Total		 	Turbidity	FNU
## 00010	Physical	Temperature, water, degrees Celsius	Agree							 	Temperature, water	deg C

In [3]:
## dates
start_date = "2000-01-01"
end_date = "2022-12-31"

## water data parameters (pH, turbidity, temperature)
parameters = ["00400","63680","00010"]

In [4]:
def get_water_quality_data(state,start_date,end_date, parameters):
  state = hf.NWIS(stateCd = state, start_date=start_date, end_date=end_date, parameterCd=parameters)
  state_df = state.df()
  state_df = state_df.reset_index()

  # get turbidity values
  turbidity_columns = [col for col in state_df.columns if ':63680' in col and not col.endswith('_qualifiers')]
  turbidity = state_df[turbidity_columns].mean(axis=1)

  # get temperature values
  temperature_columns = [col for col in state_df.columns if ':00010' in col and not col.endswith('_qualifiers')]
  temperature = state_df[temperature_columns].mean(axis=1)

  # get pH values
  ph_columns = [col for col in state_df.columns if ':00400' in col and not col.endswith('_qualifiers')]
  ph = state_df[ph_columns].mean(axis=1)

  # water quality for state
  state_water_quality = pd.DataFrame({'datetimeUTC': state_df['datetimeUTC'], 
                          'turbidity value': turbidity,
                          'temperature value': temperature,
                          'pH value': ph})
  
  state_water_quality['datetimeUTC'] = pd.to_datetime(state_water_quality['datetimeUTC'])
  state_water_quality = state_water_quality.set_index('datetimeUTC')
  state_water_quality_monthly = state_water_quality.resample('M').mean()
  return state_water_quality_monthly


In [5]:
# check if any feature columns contain null values
def features_contain_nulls(state_water_quality_monthly):
  ph_nulls = state_water_quality_monthly['pH value'].isnull().sum()
  turbidity_nulls = state_water_quality_monthly['turbidity value'].isnull().sum()
  temperature_nulls = state_water_quality_monthly['temperature value'].isnull().sum()

  null_df = pd.DataFrame({
    'pH nulls': [ph_nulls],
    'turbidity nulls': [turbidity_nulls],
    'temperature nulls': [temperature_nulls]
  })

  return null_df

# Get Water Quality Feature Data Frames

## California

In [6]:
## California

california_water_quality_monthly = get_water_quality_data('CA',start_date,end_date, parameters)
california_water_quality_monthly

Requesting data from https://waterservices.usgs.gov/nwis/dv/?...Requested data from https://waterservices.usgs.gov/nwis/dv/?format=json%2C1.1&stateCd=CA&parameterCd=00400%2C63680%2C00010&startDT=2000-01-01&endDT=2022-12-31


ERROR:hydrofunctions.exceptions:It is not possible to determine the frequency for one of the datasets in this request. This dataset will be set to a frequency of 0 minutes
ERROR:hydrofunctions.exceptions:It is not possible to determine the frequency for one of the datasets in this request. This dataset will be set to a frequency of 0 minutes
ERROR:hydrofunctions.exceptions:One or more datasets in this request is going to be 'upsampled' to 1 days 00:00:00 because the data were collected at a lower frequency of 355 days 00:00:00
  if not DF.index.is_monotonic:


Unnamed: 0_level_0,turbidity value,temperature value,pH value
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-31 00:00:00+00:00,,9.178916,8.317204
2000-02-29 00:00:00+00:00,,9.914265,8.116379
2000-03-31 00:00:00+00:00,,11.156489,8.110941
2000-04-30 00:00:00+00:00,,13.570469,7.831667
2000-05-31 00:00:00+00:00,,15.317995,7.815430
...,...,...,...
2022-08-31 00:00:00+00:00,3.378961,19.537764,7.898180
2022-09-30 00:00:00+00:00,3.206449,18.180644,7.854580
2022-10-31 00:00:00+00:00,2.251239,15.170036,7.837428
2022-11-30 00:00:00+00:00,2.489977,9.512101,7.927999


In [7]:
# check if California has any null values
features_contain_nulls(california_water_quality_monthly)

Unnamed: 0,pH nulls,turbidity nulls,temperature nulls
0,0,23,0


producers = {"Texas", "North Dakota", "Wyoming", "Pennsylvania", "Oklahoma", "West Virginia"}
consumers = {"Texas", "California", "New York", "Florida", "Ohio", "Pennsylvania"}
relevstates = list(producers.union(consumers))

## Texas

In [8]:
## Texas

texas_water_quality_monthly = get_water_quality_data('TX',start_date,end_date, parameters)
texas_water_quality_monthly

Requesting data from https://waterservices.usgs.gov/nwis/dv/?...Requested data from https://waterservices.usgs.gov/nwis/dv/?format=json%2C1.1&stateCd=TX&parameterCd=00400%2C63680%2C00010&startDT=2000-01-01&endDT=2022-12-31


ERROR:hydrofunctions.exceptions:It is not possible to determine the frequency for one of the datasets in this request. This dataset will be set to a frequency of 0 minutes
ERROR:hydrofunctions.exceptions:It is not possible to determine the frequency for one of the datasets in this request. This dataset will be set to a frequency of 0 minutes
ERROR:hydrofunctions.exceptions:It is not possible to determine the frequency for one of the datasets in this request. This dataset will be set to a frequency of 0 minutes
ERROR:hydrofunctions.exceptions:It is not possible to determine the frequency for one of the datasets in this request. This dataset will be set to a frequency of 0 minutes
ERROR:hydrofunctions.exceptions:It is not possible to determine the frequency for one of the datasets in this request. This dataset will be set to a frequency of 0 minutes
ERROR:hydrofunctions.exceptions:It is not possible to determine the frequency for one of the datasets in this request. This dataset will be 

Unnamed: 0_level_0,turbidity value,temperature value,pH value
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-31 00:00:00+00:00,,12.445836,7.569588
2000-02-29 00:00:00+00:00,,15.157896,7.588297
2000-03-31 00:00:00+00:00,,18.204863,7.687039
2000-04-30 00:00:00+00:00,,20.714020,7.673864
2000-05-31 00:00:00+00:00,,25.044272,7.569981
...,...,...,...
2022-08-31 00:00:00+00:00,47.417719,28.551445,7.975502
2022-09-30 00:00:00+00:00,29.167917,27.057506,7.965684
2022-10-31 00:00:00+00:00,23.380178,22.548581,8.050167
2022-11-30 00:00:00+00:00,41.448533,17.693974,7.819498


In [9]:
# check if Texas has any null values
features_contain_nulls(texas_water_quality_monthly)

Unnamed: 0,pH nulls,turbidity nulls,temperature nulls
0,0,62,0


## North Dakota

In [10]:
## North Dakota

north_dakota_water_quality_monthly = get_water_quality_data('ND',start_date,end_date, parameters)
north_dakota_water_quality_monthly

Requesting data from https://waterservices.usgs.gov/nwis/dv/?...Requested data from https://waterservices.usgs.gov/nwis/dv/?format=json%2C1.1&stateCd=ND&parameterCd=00400%2C63680%2C00010&startDT=2000-01-01&endDT=2022-12-31


ERROR:hydrofunctions.exceptions:One or more datasets in this request is going to be 'upsampled' to 1 days 00:00:00 because the data were collected at a lower frequency of 22 days 00:00:00
  if not DF.index.is_monotonic:


Unnamed: 0_level_0,turbidity value,temperature value,pH value
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-31 00:00:00+00:00,,0.783656,7.641935
2000-02-29 00:00:00+00:00,,1.332989,7.647321
2000-03-31 00:00:00+00:00,,3.457043,8.077500
2000-04-30 00:00:00+00:00,,8.685681,8.320833
2000-05-31 00:00:00+00:00,,15.998694,8.372581
...,...,...,...
2022-08-31 00:00:00+00:00,35.211470,22.939558,8.296989
2022-09-30 00:00:00+00:00,27.414074,17.880304,8.402667
2022-10-31 00:00:00+00:00,12.487455,9.400869,8.480072
2022-11-30 00:00:00+00:00,8.439630,1.902397,8.557778


In [11]:
# check if North Dakota has any null values
features_contain_nulls(north_dakota_water_quality_monthly)

Unnamed: 0,pH nulls,turbidity nulls,temperature nulls
0,1,45,0


## Wyoming

In [12]:
## Wyoming

wyoming_water_quality_monthly = get_water_quality_data('WY',start_date,end_date, parameters)
wyoming_water_quality_monthly

Requesting data from https://waterservices.usgs.gov/nwis/dv/?...Requested data from https://waterservices.usgs.gov/nwis/dv/?format=json%2C1.1&stateCd=WY&parameterCd=00400%2C63680%2C00010&startDT=2000-01-01&endDT=2022-12-31


ERROR:hydrofunctions.exceptions:One or more datasets in this request is going to be 'upsampled' to 1 days 00:00:00 because the data were collected at a lower frequency of 15 days 00:00:00
  if not DF.index.is_monotonic:


Unnamed: 0_level_0,turbidity value,temperature value,pH value
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-31 00:00:00+00:00,,0.045161,
2000-02-29 00:00:00+00:00,,0.405747,
2000-03-31 00:00:00+00:00,,3.229032,
2000-04-30 00:00:00+00:00,,9.367778,
2000-05-31 00:00:00+00:00,,11.838710,
...,...,...,...
2022-08-31 00:00:00+00:00,,18.675138,8.061290
2022-09-30 00:00:00+00:00,,15.548521,7.995833
2022-10-31 00:00:00+00:00,,10.417843,8.093000
2022-11-30 00:00:00+00:00,,5.557778,


In [13]:
# check if Wyoming has any null values
features_contain_nulls(wyoming_water_quality_monthly)

Unnamed: 0,pH nulls,turbidity nulls,temperature nulls
0,96,179,4


## Pennsylvania

In [14]:
## Pennsylvania

pennsylvania_water_quality_monthly = get_water_quality_data('PA',start_date,end_date, parameters)
pennsylvania_water_quality_monthly

Requesting data from https://waterservices.usgs.gov/nwis/dv/?...Requested data from https://waterservices.usgs.gov/nwis/dv/?format=json%2C1.1&stateCd=PA&parameterCd=00400%2C63680%2C00010&startDT=2000-01-01&endDT=2022-12-31


ERROR:hydrofunctions.exceptions:One or more datasets in this request is going to be 'upsampled' to 1 days 00:00:00 because the data were collected at a lower frequency of 66 days 00:00:00
  if not DF.index.is_monotonic:


Unnamed: 0_level_0,turbidity value,temperature value,pH value
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-31 00:00:00+00:00,,2.368902,6.270027
2000-02-29 00:00:00+00:00,,2.267108,6.217853
2000-03-31 00:00:00+00:00,,6.821660,6.352823
2000-04-30 00:00:00+00:00,,10.050706,6.622318
2000-05-31 00:00:00+00:00,,15.322927,6.770357
...,...,...,...
2022-08-31 00:00:00+00:00,10.905007,22.691034,7.707585
2022-09-30 00:00:00+00:00,10.608230,19.345940,7.730803
2022-10-31 00:00:00+00:00,9.736892,12.494275,7.755555
2022-11-30 00:00:00+00:00,7.744278,8.596683,7.667910


In [15]:
# check if Pennsylvania has any null values
features_contain_nulls(pennsylvania_water_quality_monthly)

Unnamed: 0,pH nulls,turbidity nulls,temperature nulls
0,0,51,0


## West Virgina

In [16]:
## West Virgina

west_virginia_water_quality_monthly = get_water_quality_data('WV',start_date,end_date, parameters)
west_virginia_water_quality_monthly

Requesting data from https://waterservices.usgs.gov/nwis/dv/?...Requested data from https://waterservices.usgs.gov/nwis/dv/?format=json%2C1.1&stateCd=WV&parameterCd=00400%2C63680%2C00010&startDT=2000-01-01&endDT=2022-12-31


ERROR:hydrofunctions.exceptions:One or more datasets in this request is going to be 'upsampled' to 1 days 00:00:00 because the data were collected at a lower frequency of 12 days 00:00:00
  if not DF.index.is_monotonic:


Unnamed: 0_level_0,turbidity value,temperature value,pH value
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-31 00:00:00+00:00,,2.491935,
2000-02-29 00:00:00+00:00,,3.750000,
2000-03-31 00:00:00+00:00,,8.145161,
2000-04-30 00:00:00+00:00,,11.125000,
2000-05-31 00:00:00+00:00,,15.491935,
...,...,...,...
2022-08-31 00:00:00+00:00,12.104433,22.322228,7.796911
2022-09-30 00:00:00+00:00,5.601111,19.965941,7.846245
2022-10-31 00:00:00+00:00,4.156052,12.276018,7.825817
2022-11-30 00:00:00+00:00,7.157004,8.784340,7.726117


In [17]:
# check if West Virginia has any null values
features_contain_nulls(west_virginia_water_quality_monthly)

Unnamed: 0,pH nulls,turbidity nulls,temperature nulls
0,59,75,0


## Oklahoma

In [18]:
## Oklahoma

oklahoma_water_quality_monthly = get_water_quality_data('OK',start_date,end_date, parameters)
oklahoma_water_quality_monthly

Requesting data from https://waterservices.usgs.gov/nwis/dv/?...Requested data from https://waterservices.usgs.gov/nwis/dv/?format=json%2C1.1&stateCd=OK&parameterCd=00400%2C63680%2C00010&startDT=2000-01-01&endDT=2022-12-31


ERROR:hydrofunctions.exceptions:One or more datasets in this request is going to be 'upsampled' to 1 days 00:00:00 because the data were collected at a lower frequency of 8 days 00:00:00
  if not DF.index.is_monotonic:


Unnamed: 0_level_0,turbidity value,temperature value,pH value
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-31 00:00:00+00:00,,5.682437,7.820645
2000-02-29 00:00:00+00:00,,8.703448,7.863448
2000-03-31 00:00:00+00:00,,12.275986,7.634839
2000-04-30 00:00:00+00:00,,16.698148,7.812667
2000-05-31 00:00:00+00:00,,20.693369,7.461505
...,...,...,...
2022-08-31 00:00:00+00:00,11.257348,26.514215,8.149977
2022-09-30 00:00:00+00:00,3.490500,24.276610,7.953333
2022-10-31 00:00:00+00:00,2.588513,18.497588,7.997034
2022-11-30 00:00:00+00:00,4.916944,12.625269,7.898194


In [19]:
# check if Oklahoma has any null values
features_contain_nulls(oklahoma_water_quality_monthly)

Unnamed: 0,pH nulls,turbidity nulls,temperature nulls
0,0,56,0


## Florida

In [20]:
## Florida

florida_water_quality_monthly = get_water_quality_data('FL',start_date,end_date, parameters)
florida_water_quality_monthly

Requesting data from https://waterservices.usgs.gov/nwis/dv/?...Requested data from https://waterservices.usgs.gov/nwis/dv/?format=json%2C1.1&stateCd=FL&parameterCd=00400%2C63680%2C00010&startDT=2000-01-01&endDT=2022-12-31


ERROR:hydrofunctions.exceptions:One or more datasets in this request is going to be 'upsampled' to 1 days 00:00:00 because the data were collected at a lower frequency of 25 days 00:00:00
  if not DF.index.is_monotonic:


Unnamed: 0_level_0,turbidity value,temperature value,pH value
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-31 00:00:00+00:00,,19.011127,
2000-02-29 00:00:00+00:00,,19.756983,
2000-03-31 00:00:00+00:00,,23.263321,
2000-04-30 00:00:00+00:00,,24.153959,
2000-05-31 00:00:00+00:00,,27.407253,
...,...,...,...
2022-08-31 00:00:00+00:00,14.765233,28.547353,7.319975
2022-09-30 00:00:00+00:00,14.407815,27.196034,7.242145
2022-10-31 00:00:00+00:00,15.860287,24.061891,7.392603
2022-11-30 00:00:00+00:00,17.803148,22.843775,7.469025


In [21]:
# check if Florida has any null values
features_contain_nulls(florida_water_quality_monthly)

Unnamed: 0,pH nulls,turbidity nulls,temperature nulls
0,43,29,0


## New York

In [22]:
## New York

new_york_water_quality_monthly = get_water_quality_data('NY',start_date,end_date, parameters)
new_york_water_quality_monthly

Requesting data from https://waterservices.usgs.gov/nwis/dv/?...Requested data from https://waterservices.usgs.gov/nwis/dv/?format=json%2C1.1&stateCd=NY&parameterCd=00400%2C63680%2C00010&startDT=2000-01-01&endDT=2022-12-31


ERROR:hydrofunctions.exceptions:One or more datasets in this request is going to be 'upsampled' to 1 days 00:00:00 because the data were collected at a lower frequency of 21 days 00:00:00
  if not DF.index.is_monotonic:


Unnamed: 0_level_0,turbidity value,temperature value,pH value
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-31 00:00:00+00:00,,1.201868,
2000-02-29 00:00:00+00:00,,0.846331,
2000-03-31 00:00:00+00:00,,4.431051,
2000-04-30 00:00:00+00:00,,7.774637,
2000-05-31 00:00:00+00:00,,13.506535,
...,...,...,...
2022-08-31 00:00:00+00:00,10.813529,20.875112,7.756573
2022-09-30 00:00:00+00:00,22.361714,17.867870,7.803483
2022-10-31 00:00:00+00:00,7.928440,12.220596,7.914533
2022-11-30 00:00:00+00:00,16.023567,8.295794,7.903684


In [23]:
# check if New York has any null values
features_contain_nulls(new_york_water_quality_monthly)

Unnamed: 0,pH nulls,turbidity nulls,temperature nulls
0,99,57,0


## Ohio

In [24]:
## Ohio

ohio_water_quality_monthly = get_water_quality_data('OH',start_date,end_date, parameters)
ohio_water_quality_monthly

Requesting data from https://waterservices.usgs.gov/nwis/dv/?...Requested data from https://waterservices.usgs.gov/nwis/dv/?format=json%2C1.1&stateCd=OH&parameterCd=00400%2C63680%2C00010&startDT=2000-01-01&endDT=2022-12-31


ERROR:hydrofunctions.exceptions:One or more datasets in this request is going to be 'upsampled' to 1 days 00:00:00 because the data were collected at a lower frequency of 2 days 00:00:00
  if not DF.index.is_monotonic:


Unnamed: 0_level_0,turbidity value,temperature value,pH value
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-31 00:00:00+00:00,,3.340598,7.318536
2000-02-29 00:00:00+00:00,,4.119287,7.383687
2000-03-31 00:00:00+00:00,,9.965557,7.008531
2000-04-30 00:00:00+00:00,,12.320933,7.169826
2000-05-31 00:00:00+00:00,,19.024503,7.275332
...,...,...,...
2022-08-31 00:00:00+00:00,23.081973,22.957689,7.938600
2022-09-30 00:00:00+00:00,16.769210,19.880930,7.910273
2022-10-31 00:00:00+00:00,9.469147,12.216695,7.994524
2022-11-30 00:00:00+00:00,12.764152,7.969582,7.983438


In [25]:
# check if Ohio has any null values
features_contain_nulls(ohio_water_quality_monthly)

Unnamed: 0,pH nulls,turbidity nulls,temperature nulls
0,0,129,0


# Create Data Frame for Each Feature

In [26]:
## Ohio
ohio_turbidity_monthly = ohio_water_quality_monthly['turbidity value']
ohio_ph_monthly = ohio_water_quality_monthly['pH value']
ohio_temperature_monthly = ohio_water_quality_monthly['temperature value']

In [27]:
state_list = [california_water_quality_monthly,texas_water_quality_monthly,north_dakota_water_quality_monthly,wyoming_water_quality_monthly,pennsylvania_water_quality_monthly,west_virginia_water_quality_monthly,oklahoma_water_quality_monthly,florida_water_quality_monthly,new_york_water_quality_monthly,ohio_water_quality_monthly]

In [28]:
state_names = ["California", "Texas", "North Dakota", "Wyoming", "Pennsylvania", "West Virginia", "Oklahoma", "Florida", "New York", "Ohio"]

In [29]:
## Turbidity

# list of state_water_quality_monthly dfs
state_turbidity_dfs = [state_water_quality_monthly['turbidity value'] for state_water_quality_monthly in state_list]

# concat the dfs along the columns axis
turbidity_monthly_df = pd.concat(state_turbidity_dfs, axis=1)

# set column names as states
turbidity_monthly_df.columns = state_names

turbidity_monthly_df

Unnamed: 0_level_0,California,Texas,North Dakota,Wyoming,Pennsylvania,West Virginia,Oklahoma,Florida,New York,Ohio
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2000-01-31 00:00:00+00:00,,,,,,,,,,
2000-02-29 00:00:00+00:00,,,,,,,,,,
2000-03-31 00:00:00+00:00,,,,,,,,,,
2000-04-30 00:00:00+00:00,,,,,,,,,,
2000-05-31 00:00:00+00:00,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
2022-08-31 00:00:00+00:00,3.378961,47.417719,35.211470,,10.905007,12.104433,11.257348,14.765233,10.813529,23.081973
2022-09-30 00:00:00+00:00,3.206449,29.167917,27.414074,,10.608230,5.601111,3.490500,14.407815,22.361714,16.769210
2022-10-31 00:00:00+00:00,2.251239,23.380178,12.487455,,9.736892,4.156052,2.588513,15.860287,7.928440,9.469147
2022-11-30 00:00:00+00:00,2.489977,41.448533,8.439630,,7.744278,7.157004,4.916944,17.803148,16.023567,12.764152


In [30]:
## pH

# list of state pH dfs
state_pH_dfs = [state_water_quality_monthly['pH value'] for state_water_quality_monthly in state_list]

# concat the dfs along the columns axis
pH_monthly_df = pd.concat(state_pH_dfs, axis=1)

# set column names as states
pH_monthly_df.columns = state_names

pH_monthly_df

Unnamed: 0_level_0,California,Texas,North Dakota,Wyoming,Pennsylvania,West Virginia,Oklahoma,Florida,New York,Ohio
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2000-01-31 00:00:00+00:00,8.317204,7.569588,7.641935,,6.270027,,7.820645,,,7.318536
2000-02-29 00:00:00+00:00,8.116379,7.588297,7.647321,,6.217853,,7.863448,,,7.383687
2000-03-31 00:00:00+00:00,8.110941,7.687039,8.077500,,6.352823,,7.634839,,,7.008531
2000-04-30 00:00:00+00:00,7.831667,7.673864,8.320833,,6.622318,,7.812667,,,7.169826
2000-05-31 00:00:00+00:00,7.815430,7.569981,8.372581,,6.770357,,7.461505,,,7.275332
...,...,...,...,...,...,...,...,...,...,...
2022-08-31 00:00:00+00:00,7.898180,7.975502,8.296989,8.061290,7.707585,7.796911,8.149977,7.319975,7.756573,7.938600
2022-09-30 00:00:00+00:00,7.854580,7.965684,8.402667,7.995833,7.730803,7.846245,7.953333,7.242145,7.803483,7.910273
2022-10-31 00:00:00+00:00,7.837428,8.050167,8.480072,8.093000,7.755555,7.825817,7.997034,7.392603,7.914533,7.994524
2022-11-30 00:00:00+00:00,7.927999,7.819498,8.557778,,7.667910,7.726117,7.898194,7.469025,7.903684,7.983438


In [31]:
## Temperature

# list of state temperature dfs
state_temperature_dfs = [state_water_quality_monthly['temperature value'] for state_water_quality_monthly in state_list]

# concat the dfs along the columns axis
temperature_monthly_df = pd.concat(state_temperature_dfs, axis=1)

# set column names as states
temperature_monthly_df.columns = state_names

temperature_monthly_df

Unnamed: 0_level_0,California,Texas,North Dakota,Wyoming,Pennsylvania,West Virginia,Oklahoma,Florida,New York,Ohio
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2000-01-31 00:00:00+00:00,9.178916,12.445836,0.783656,0.045161,2.368902,2.491935,5.682437,19.011127,1.201868,3.340598
2000-02-29 00:00:00+00:00,9.914265,15.157896,1.332989,0.405747,2.267108,3.750000,8.703448,19.756983,0.846331,4.119287
2000-03-31 00:00:00+00:00,11.156489,18.204863,3.457043,3.229032,6.821660,8.145161,12.275986,23.263321,4.431051,9.965557
2000-04-30 00:00:00+00:00,13.570469,20.714020,8.685681,9.367778,10.050706,11.125000,16.698148,24.153959,7.774637,12.320933
2000-05-31 00:00:00+00:00,15.317995,25.044272,15.998694,11.838710,15.322927,15.491935,20.693369,27.407253,13.506535,19.024503
...,...,...,...,...,...,...,...,...,...,...
2022-08-31 00:00:00+00:00,19.537764,28.551445,22.939558,18.675138,22.691034,22.322228,26.514215,28.547353,20.875112,22.957689
2022-09-30 00:00:00+00:00,18.180644,27.057506,17.880304,15.548521,19.345940,19.965941,24.276610,27.196034,17.867870,19.880930
2022-10-31 00:00:00+00:00,15.170036,22.548581,9.400869,10.417843,12.494275,12.276018,18.497588,24.061891,12.220596,12.216695
2022-11-30 00:00:00+00:00,9.512101,17.693974,1.902397,5.557778,8.596683,8.784340,12.625269,22.843775,8.295794,7.969582


# Convert them to csvs

In [32]:
turbidity_monthly_df.to_csv('turbidity_monthly.csv')

In [33]:
temperature_monthly_df.to_csv('temperature_monthly.csv')

In [34]:
pH_monthly_df.to_csv('pH_monthly.csv')