In [None]:
import pickle

# Load the pickled data from the file
with open('WQ_Data_Nov2021.p', 'rb') as file:
    data = pickle.load(file)
# print(data)

metadata = data['metadataWQ']
print(metadata)
measurments = data['StackFrame']
print(measurments)

In [None]:
"{:.0f}".format(10000509.0)

In [None]:
measurements = data['StackFrame']
# print(measurements)

measurements["nz_segment"] = measurements["nzsegment"].astype(int)
measurements = measurements.merge(hydro[['nz_reach', "nz_segment"]], on="nz_segment", how='outer')
measurements
measurements = measurements[["sID", "nz_segment", "nz_reach", "npID", "RawValue", "Flow", "myDate",
                            "Year", "Month", "BiMonth", "Qtr", "Censored", "CenType", "QSource"]]

column_name_mapping = {
    'npID': 'contaminant',
    "RawValue": "value",
    "Flow": "flow",
    "myDate": "date",
    "Year": "year",
    "Month": "month",
    "BiMonth": "bi_month",
    "Qtr": "quarter",
    "Censored": "censor_measured",
    "CenType": "censor_type",
    "QSource": "q_source"
}

measurements.rename(columns=column_name_mapping, inplace=True)

measurements["year"] = measurements["year"].astype("int")

# Columns to exclude from conversion
columns_to_exclude = ['value', 'flow']

# Convert all other columns to strings
for column in measurements.columns:
    if column not in columns_to_exclude:
        measurements[column] = measurements[column].astype(str)
    elif column in columns_to_exclude:
        measurements[column] = measurements[column].astype(float)
measurements.to_json("Ready/measurements.json")
print(measurements)

In [None]:
import pandas as pd
import json

# Read JSON files as DataFrames
meta_geog = pd.read_json("Ready/meta_geog.json")
meta_topo = pd.read_json("Ready/meta_topo.json")
meta_hydro = pd.read_json("Ready/meta_hydro.json")

# Merge DataFrames based on common_column
meta_merged = pd.merge(meta_geog, meta_topo, on='nz_segment', how='outer')
meta_merged = pd.merge(meta_merged, meta_hydro, on='nz_segment', how='outer')

# Drop duplicated columns
meta_merged = meta_merged.loc[:, ~meta_merged.columns.duplicated()]
meta_merged.drop("nz_reach", axis=1, inplace=True)
# Rename a specific column
meta_merged.rename(columns={'nz_reach_x': 'nz_reach'}, inplace=True)

# Save the merged DataFrame as a JSON file
meta_merged.to_json("Ready/metadat_simple_df.json", orient="records", indent=4)

# metadata = {}
# metadata["meta_geog"] = meta_geog
# metadata["meta_topo"] = meta_topo
# metadata["meta_hydro"] = meta_hydro

# # Save the dictionary of DataFrames as a pickled file
# with open("Ready/metadat_complex_dict.p", 'wb') as pickle_file:
#     pickle.dump(metadata, pickle_file)

# # Load the pickled dictionary from the file
# with open('Ready/metadat_complex_dict.p', 'rb') as pickle_file:
#     loaded_data_frames = pickle.load(pickle_file)

In [None]:
import pandas as pd
import numpy as np
import pickle

# Load the pickled data from the file
with open('IAI/RS_Indices.pkl', 'rb') as file:
    rs_data = pickle.load(file)

# Function to extract values from the inner dictionary
def extract_rs_indices(row):
    nz_segment = int(row['nz_segment'])
    date = row['date']
    
    # Get inner_dict only if nz_segment and date exist in rs_data
    inner_dict = rs_data.get(nz_segment, {}).get(date, {})
    
    # Create a dictionary to store the extracted values
    extracted_values = {}
    
    for key, index_dict in inner_dict.items():
        index = key.lower()
        median = index_dict.get('median', np.nan)  # Set NaN for missing data
        stdv = index_dict.get('stdv', np.nan)      # Set NaN for missing data
        extracted_values[f"{index}_median"] = median
        extracted_values[f"{index}_stdv"] = stdv
    
    return extracted_values

# Apply the function to create new columns
extracted_rs = measurements.apply(extract_rs_indices, axis=1, result_type='expand')

# Join the extracted values to the original DataFrame
measurements_with_rs = pd.concat([measurements, extracted_rs], axis=1)
measurements_with_rs = measurements_with_rs.loc[:, ~measurements_with_rs.columns.duplicated()]
measurements_with_rs.to_json("Ready/measurements_with_rs.json")
print(measurements_with_rs)

In [None]:
import pandas as pd
import numpy as np
import pickle

# # Load the pickled data from the file
# with open('ClimateData.pkl', 'rb') as file:
#     climate_data = pickle.load(file)

# Function to extract values from the inner dictionary
def extract_climate_indices(row):
    nz_segment = row['nz_segment']
    date = row['date']
    
    # Get inner_dict only if nz_segment exists in new_nested_dict
    inner_dict = climate_data.get(nz_segment, {})
    date_dict = inner_dict.get(date, {})
    
    # Create a dictionary to store the extracted values
    extracted_values = {}
    
    for key, value in date_dict.items():
        index = key.lower().replace('.', '_')
        extracted_values[f"{index}"] = value
    
    return extracted_values

# Apply the function to create new columns
extracted_climate = measurements_with_rs.apply(extract_climate_indices, axis=1, result_type='expand')

# # Join the extracted values to the original DataFrame
# measurements_with_climate = pd.concat([measurements_with_rs, extracted_climate], axis=1)
# measurements_with_climate = measurements_with_climate.loc[:, ~measurements_with_climate.columns.duplicated()]
# measurements_with_climate.to_json("Ready/measurements_with_rs&climate_simple.json")
# print(measurements_with_climate)

In [None]:
# meta = pd.read_json("Ready/metadat_df.json")

# meta['nz_segment'] = meta['nz_segment'].astype(str)
# measurements_with_climate['nz_segment'] = measurements_with_climate['nz_segment'].astype(str)

# # Merge DataFrames based on "nz_segment"
# all_data_nov2019_df = pd.merge(measurements_with_climate, meta, on='nz_segment', how='outer',
#                                suffixes=('_left', '_right'))

# all_data_nov2019_df = all_data_nov2019_df.loc[:, ~all_data_nov2019_df.columns.duplicated()]
# all_data_nov2019_df.drop(['s_id', 'nz_reach_right'], axis=1, inplace=True)
# all_data_nov2019_df.rename(columns={'sID': 's_id', 'nz_reach_left': 'nz_reach'}, inplace=True)
# all_data_nov2019_df2 = all_data_nov2019_df
# all_data_nov2019_df.drop(['geometry_wkt'], axis=1, inplace=True)
# all_data_nov2019_df.to_json("Ready/all_data_nov2019_df.json")

# Load the pickled dictionary from the file
# with open('Ready/all_data_nov2021_df.p', 'wb') as pickle_file:
#     pickle.dump(all_data_nov2019_df2, pickle_file)

In [None]:
import pandas as pd
import json
import pickle
# Load the pickled data from the file
with open('Ready/Complex_Structures/all_data_nov2021_df.p', 'rb') as file:
    all_data_nov2021_df = pickle.load(file)

all_data_nov2021_df.keys()

In [None]:
nested_dict = {}

# Iterate through rows in the DataFrame
for index, row in all_data_nov2021_df.iterrows():
    s_id = row['s_id']
    nz_segment = row['nz_segment']
    nz_reach = row['nz_reach']
    
    # Create the geographic dictionary
    geographic = {
        "lat": row['lat'],
        "long": row['long'],
        "altitude": row['altitude'],
        "island": row['island'],
        "region": row['region'],
        "district": row['district'],
        "council_site_id": row['council_site_id'],
        "agency": row['agency'],
        "climate": row['climate'],
        "geology": row['geology'],
        "landcover": row['landcover'],
        "landcover_lawa": row['landcover_lawa'],
        "swq_landuse": row['swq_landuse'],
        "src_of_flw": row['src_of_flw']
    }
    
    # Create the topographic dictionary
    topographic = {
        "elevation_min": row['elevation_min'],
        "elevation_median": row['elevation_median'],
        "elevation_max": row['elevation_max'],
        "elevation_stdev": row['elevation_stdev'],
        "aspect_min": row['aspect_min'],
        "aspect_median": row['aspect_median'],
        "aspect_max": row['aspect_max'],
        "aspect_stdev": row['aspect_stdev'],
        "slope_min": row['slope_min'],
        "slope_median": row['slope_median'],
        "slope_max": row['slope_max'],
        "slope_stdev": row['slope_stdev'],
        "tri_min": row['tri_min'],
        "tri_median": row['tri_median'],
        "tri_max": row['tri_max'],
        "tri_stdev": row['tri_stdev']
    }
    
    # Create the hydrologic dictionary
    hydrologic = {
        "from_node": row['from_node'],
        "to_node": row['to_node'],
        "length_m": row['length_m'],
        "strahler_order": row['strahler_order'],
        "dis_to_sea_m": row['dis_to_sea_m'],
        "catchment_area_m": row['catchment_area_m'],
        "sea_drain_catchment": row['sea_drain_catchment'],
        "catchment": row['catchment']
    }
    
    # Create the metadata dictionary
    metadata = {
        "geographic": geographic,
        "topographic": topographic,
        "hydrologic": hydrologic
    }
    
    measurements = {
        "date": row['date'],
        "contaminants": row['contaminant'],
        "value": row['value'],
        "censor_measured": row['censor_measured'],
        "censor_type": row['censor_type'],
        "flow": row['flow'],
        "rs_indices": {
            "mndwi": {
                "median": row['mndwi_median'],
                "stdv": row['mndwi_stdv']
            },
            "ndvi": {
                "median": row['ndvi_median'],
                "stdv": row['ndvi_stdv']
            },
            "nirv": {
                "median": row['nirv_median'],
                "stdv": row['nirv_stdv']
            },
            "nsmi": {
                "median": row['nsmi_median'],
                "stdv": row['nsmi_stdv']
            },
            "bsi": {
                "median": row['bsi_median'],
                "stdv": row['bsi_stdv']
            },
            "evi": {
                "median": row['evi_median'],
                "stdv": row['evi_stdv']
            },
            "savi": {
                "median": row['savi_median'],
                "stdv": row['savi_stdv']
            },
            "ndmi": {
                "median": row['ndmi_median'],
                "stdv": row['ndmi_stdv']
            },
            "nbr": {
                "median": row['nbr_median'],
                "stdv": row['nbr_stdv']
            },
            "ci": {
                "median": row['ci_median'],
                "stdv": row['ci_stdv']
            },
            "lai": {
                "median": row['lai_median'],
                "stdv": row['lai_stdv']
            },
            "fapar": {
                "median": row['fapar_median'],
                "stdv": row['fapar_stdv']
            },
            # ... add other rs_indices ...
        },
        "climate_indices": {
            "air_temp": row['air_temp'],
            "earth_temp": row['earth_temp'],
            "rain": row['rain'],
            "soil_moist": row['soil_moist'],
            # ... add other climate_indices ...
        }
    }
    
    # Combine the metadata and measurements dictionaries
    combined_dict = {
        **metadata,
        **measurements
    }
    
    # Create a key for the unique combination of s_id, nz_segment, and nz_reach
    key = (s_id, nz_segment, nz_reach)

    # Add the combined dictionary to the nested dictionary
    nested_dict[key] = combined_dict
    

# Convert nested_dict to a list of values and save as JSON
nested_dict_list = list(nested_dict.values())
with open('Ready/Complex_Structures/all_data_nov2021_df_2.p', "w") as json_file:
    json.dump(nested_dict_list, json_file, indent=4)

In [None]:
import pickle

# Specify the path to the pickle file
pickle_file_path = 'Ready/Complex_Structures/all_data_nov2021_df_2.p'

# Open the pickle file and load the data
with open(pickle_file_path, 'rb') as file:
    loaded_data = pickle.load(file)

# Print the loaded data
print(loaded_data)

In [None]:
temp = all_data_nov2021_df[all_data_nov2021_df["region"] == "Southland"]
temp = temp.drop(columns=["s_id", "nz_segment", "year", "month", "bi_month", "quarter",
                         "censor_measured", "censor_type", "q_source", "from_node", "to_node",
                         "catchment"])
temp.to_csv("Southland.csv")

In [None]:
hydro = pd.read_json("Ready/meta_hydro.json")
print(hydro)

# hydro = hydro.merge(topo[['nz_reach','nz_segment']], on='nz_reach', how='outer')
# hydro = hydro.drop_duplicates(subset='nz_segment')
# print(hydro)

# # Reset the index to remove the "Unnamed: 0" column
# hydro.reset_index(drop=True, inplace=True)

# # Create a new DataFrame with the desired column order
# new_df = hydro.copy()

# # Drop the original 'B' column from the new DataFrame
# new_df.drop(columns=['nz_segment'], inplace=True)

# # Reinsert the 'B' column at the desired position
# new_df.insert(0, 'nz_segment', hydro['nz_segment'])

# hydro = new_df
# # print(hydro)

# # Convert the DataFrame to JSON
hydro.to_csv("Ready/meta_hydro.csv")

In [None]:
topo = pd.read_json("Ready/meta_topo.json")
print(topo)

# topo = topo.drop_duplicates(subset='nz_segment')
# # topo.drop(columns=['Unnamed: 0'], inplace=True)

# # Reset the index to remove the "Unnamed: 0" column
# topo.reset_index(drop=True, inplace=True)

# # Convert the DataFrame to JSON
topo.to_csv("Ready/meta_topo.csv")

In [None]:
geo = pd.read_json("Ready/meta_geog.json")
print(geo)

# geo = geo.drop_duplicates(subset='nz_segment')
# # topo.drop(columns=['Unnamed: 0'], inplace=True)
# print(geo)
# # Reset the index to remove the "Unnamed: 0" column
# geo.reset_index(drop=True, inplace=True)

# # Convert the DataFrame to JSON
geo.to_csv("Ready/meta_geog.csv")

In [None]:
# merged_df = pd.merge(topo, [geo, hydro], on='nz_segment')
merged_df = pd.merge(geo, hydro, on='nz_segment').merge(topo, on='nz_segment')
merged_df

In [None]:
sites_topo = sites[["nzsegment", "Elev_min", "Elev_media", "Elev_max", "Elev_stdev",
                    "Aspe_min", "Aspe_media", "Aspe_max", "Aspe_stdev",
                    "Slop_min", "Slop_media", "Slop_max", "Slop_stdev", 
                    "TRI_min", "TRI_median", "TRI_max", "TRI_stdev"]]

sites_topo = meta_geographic[["nzsegment"]].merge(sites_topo, on='nzsegment', how='outer')

column_name_mapping = {
    'nzsegment': 'nz_segment',
    "Elev_min": "elevation_min",
    "Elev_media": "elevation_median",
    "Elev_max": "elevation_max",
    "Elev_stdev": "elevation_stdev",
    "Aspe_min": "aspect_min",
    "Aspe_media": "aspect_median",
    "Aspe_max": "aspect_max",
    "Aspe_stdev": "aspect_stdev",
    "Slop_min": "slope_min",
    "Slop_media": "slope_median",
    "Slop_max": "slope_max",
    "Slop_stdev": "slope_stdev",
    "TRI_min": "tri_min",
    "TRI_median": "tri_median",
    "TRI_max": "tri_max",
    "TRI_stdev": "tri_stdev",
}
sites_topo.rename(columns=column_name_mapping, inplace=True)
sites_topo.to_json("Ready/meta_topographic.json")
print(sites_topo)

## Reconstructing saved files

In [112]:
import pandas as pd

measurements = pd.read_json("measurements_with_rs&climate_simplified.json")
measurements = measurements.loc[: ,['nz_segment', 'nz_reach', 'contaminant', 'value', 'flow', 'date']]

# Use pd.pivot_table to pivot the DataFrame to wide format
measurements = pd.pivot_table(measurements, values='value', index=['nz_segment', 'nz_reach', 'date', 'flow'],
                            columns='contaminant', aggfunc='first')

# Reset the index to make it look similar to the original DataFrame
measurements = measurements.reset_index()

# Display the resulting DataFrame
print(measurements)

contaminant  nz_segment  nz_reach       date      flow    ECOLI    TN     TP
0               1002765   1002580 2019-01-17  0.551681    460.0  0.10  0.019
1               1002765   1002580 2019-02-12  0.200425    370.0  0.15  0.012
2               1002765   1002580 2019-04-03  1.520738    290.0  0.16  0.014
3               1002765   1002580 2019-05-01  0.226481    250.0  0.11  0.010
4               1002765   1002580 2019-06-05  2.597617    290.0  0.15  0.020
...                 ...       ...        ...       ...      ...   ...    ...
20607          15320438  15062815 2020-08-10  1.485043     60.0  1.84  0.086
20608          15320438  15062815 2020-09-07  2.074016    170.0  1.85  0.077
20609          15320438  15062815 2020-10-12  3.392775    190.0  2.10  0.106
20610          15320438  15062815 2020-11-16  0.787419  14000.0  1.49  0.116
20611          15320438  15062815 2020-12-07  0.623957    270.0  1.04  0.040

[20612 rows x 7 columns]


In [72]:
meta = pd.read_json("metadata_simplified.json")
meta

Unnamed: 0,s_id,nz_segment,nz_reach,lat,long,altitude,island,region,district,council_site_id,...,tri_max,tri_stdev,from_node,to_node,length_m,strahler_order,dis_to_sea_m,catchment_area_m,sea_drain_catchment,catchment
0,LAWA-102854,1002765,1002580,-35.0105,173.6018,19.0,North_Island,North Auckland,Far North District,304641,...,41.775066,4.446160,2942,2872,779.11688,6,9348.154,156565856.0,Oruaiti River,oruru
1,NRC-00024,1003120,1002906,-35.0395,173.4907,20.0,North_Island,North Auckland,Far North District,108979,...,36.807804,5.065007,3363,3219,1416.39610,5,3769.188,91675824.0,Peria River,oruru oruru river
2,NRC-00017,1003549,1003666,-35.1080,173.2273,8.0,North_Island,North Auckland,Far North District,100370,...,44.956730,5.494675,4465,4036,4040.43723,6,14578.300,314201760.0,Awanui River,awanui awanui river
3,NRC-00020,1004253,1003935,-35.0981,173.7733,20.0,North_Island,North Auckland,Far North District,102674,...,40.960320,5.335415,4365,4312,512.13203,5,3785.879,98159376.0,Kaeo River,kaeo kaeo river
4,NRC-00016,1004707,1004333,-35.1215,173.2754,20.0,North_Island,North Auckland,Far North District,100363,...,44.956730,5.377339,4762,4668,1748.52814,5,24134.320,219430848.0,Awanui River,awanui awanui river
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1083,ES-00054,15319462,15061999,-46.5421,168.4339,10.0,South_Island,Southland,Invercargill City,mokotua stream at awarua,...,1.925784,0.249268,63307,63260,429.41125,3,10148.740,5497200.0,Duck Creek,mokotua stream
1084,ES-00058,15319333,15062000,-46.5407,168.5692,,South_Island,Southland,Southland District,moffat creek at moffat road,...,4.437410,0.362373,63267,63281,217.27922,3,6967.493,14676300.0,Waituna Creek,moffat creek
1086,ES-00034,15319599,15062197,-46.5742,169.1467,19.0,South_Island,Southland,Southland District,waikawa river at progress valley,...,37.135399,5.107625,63464,63559,809.11688,5,5952.275,179421360.0,Waikawa River East Branch,waikawa river
1087,ES-00033,15320424,15062800,-46.6297,169.0232,20.0,South_Island,Southland,Southland District,waikopikopiko stream at haldane curiobay,...,33.114792,3.719500,64126,64186,1286.10173,4,2275.219,42525900.0,Waikopikopiko Stream,waikopikopiko stream


In [113]:
import pickle
# Load the pickled data from the file
with open('Prep_ClimateData.pkl', 'rb') as file:
    climated_df = pickle.load(file)

measurements_clim = measurements.copy()
# Iterate over DataFrame rows
for index, row in measurements_clim.iterrows():
    id_val = str(row['nz_segment'])
    date_val = row['date'].strftime('%Y-%m-%d')
    
    # Check if the ID and date exist in the dictionary
    if id_val in climated_df and date_val in climated_df[id_val]:
        climate_measures = climated_df[id_val][date_val]

        # Add climate_measure to DataFrame
        for measurement_key, measurement_value in climate_measures.items():
            measurements_clim.at[index, measurement_key] = measurement_value
            
measurements_clim

contaminant,nz_segment,nz_reach,date,flow,ECOLI,TN,TP,air.temp,air.temp_mean_dist,earth.temp,earth.temp_mean_dist,rain,rain_mean_dist,soil.moist,soil.moist_mean_dist
0,1002765,1002580,2019-01-17,0.551681,460.0,0.10,0.019,19.8,29.3,22.203921,34.4,0.0,13.6,23.872306,34.4
1,1002765,1002580,2019-02-12,0.200425,370.0,0.15,0.012,20.4,29.3,23.914407,34.4,0.0,13.6,17.873328,34.4
2,1002765,1002580,2019-04-03,1.520738,290.0,0.16,0.014,19.8,29.3,20.768420,34.4,26.5,13.6,20.711849,34.4
3,1002765,1002580,2019-05-01,0.226481,250.0,0.11,0.010,16.7,29.3,15.603614,34.4,2.4,13.6,18.590393,34.4
4,1002765,1002580,2019-06-05,2.597617,290.0,0.15,0.020,12.5,29.3,12.380624,34.4,0.0,13.6,25.864415,34.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20607,15320438,15062815,2020-08-10,1.485043,60.0,1.84,0.086,8.5,38.6,6.036103,,3.3,17.0,41.568000,47.6
20608,15320438,15062815,2020-09-07,2.074016,170.0,1.85,0.077,10.3,38.6,7.630895,,2.6,17.0,44.248000,47.6
20609,15320438,15062815,2020-10-12,3.392775,190.0,2.10,0.106,12.5,38.6,13.278959,,0.0,17.0,47.240000,47.6
20610,15320438,15062815,2020-11-16,0.787419,14000.0,1.49,0.116,16.0,38.6,17.564888,,2.1,17.0,34.508000,47.6


In [114]:
import pickle

# Load the pickled data from the file
with open('Prep_RS_Indices.pkl', 'rb') as file:
    rs_df = pickle.load(file)

# Explicitly create a copy of the DataFrame
measurements_clim_rs = measurements_clim.copy()

# Iterate over DataFrame rows
for index, row in measurements_clim_rs.iterrows():
    id_val = row['nz_segment']
    date_val = row['date'].strftime('%Y-%m-%d')
    
    # Check if the ID and date exist in the dictionary
    if id_val in rs_df and date_val in rs_df[id_val]:
        rs_measures = rs_df[id_val][date_val]

        # Add climate_measure to DataFrame
        for measurement_key, measurement_dic in rs_measures.items():
            for key_title, key_val in measurement_dic.items():
                col_name = f"{measurement_key}_{key_title}"
                measurements_clim_rs.loc[index, col_name] = key_val

# Display the resulting DataFrame
measurements_clim_rs


contaminant,nz_segment,nz_reach,date,flow,ECOLI,TN,TP,air.temp,air.temp_mean_dist,earth.temp,...,NDMI_median,NDMI_stdv,NBR_median,NBR_stdv,CI_median,CI_stdv,LAI_median,LAI_stdv,FAPAR_median,FAPAR_stdv
0,1002765,1002580,2019-01-17,0.551681,460.0,0.10,0.019,19.8,29.3,22.203921,...,0.316412,0.127776,0.597667,0.119834,-0.324248,0.153055,2.992797e+164,,0.980531,0.086687
1,1002765,1002580,2019-02-12,0.200425,370.0,0.15,0.012,20.4,29.3,23.914407,...,0.230465,0.182764,0.503866,0.186789,-0.242260,0.203011,7.132584e+225,,0.902357,0.137356
2,1002765,1002580,2019-04-03,1.520738,290.0,0.16,0.014,19.8,29.3,20.768420,...,0.246043,0.176235,0.527351,0.175345,-0.336011,0.174758,1.531129e+131,3672762790856391074414540044615508336798276734...,0.972681,0.107242
3,1002765,1002580,2019-05-01,0.226481,250.0,0.11,0.010,16.7,29.3,15.603614,...,0.292957,0.156057,0.582008,0.148397,-0.317782,0.158033,1.007294e+149,,0.996107,0.092724
4,1002765,1002580,2019-06-05,2.597617,290.0,0.15,0.020,12.5,29.3,12.380624,...,0.300763,0.164494,0.589825,0.149043,-0.385018,0.179870,7.891292e+232,,1.011733,0.100439
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20607,15320438,15062815,2020-08-10,1.485043,60.0,1.84,0.086,8.5,38.6,6.036103,...,0.472641,0.166823,0.691443,0.126833,-0.407372,0.230933,8.871964e+277,,0.996133,0.289445
20608,15320438,15062815,2020-09-07,2.074016,170.0,1.85,0.077,10.3,38.6,7.630895,...,0.480498,0.110887,0.699181,0.100272,-0.332029,0.172317,3.729390e+148,,0.980578,0.210131
20609,15320438,15062815,2020-10-12,3.392775,190.0,2.10,0.106,12.5,38.6,13.278959,...,0.363301,0.142528,0.628936,0.144826,-0.407342,0.205656,3.585518e+140,4866519625660916560785387147657436471442363560...,1.027393,0.125299
20610,15320438,15062815,2020-11-16,0.787419,14000.0,1.49,0.116,16.0,38.6,17.564888,...,0.433592,0.141724,0.699280,0.161502,-0.523547,0.211678,1.470907e+137,8586126231205457085272164939240466198476128036...,1.058668,0.126686


In [130]:
merged_df = pd.merge(measurements_clim_rs, meta, on='nz_segment', how='left', suffixes=('', '_meta'))
merged_df.to_json("modeling_df.json")