In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from torch.utils.data import TensorDataset, DataLoader, random_split
import scipy.stats as stats
import pickle
import statsmodels.api as sm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import json

test_df = pd.read_csv('test_processed.csv')
test_df['issue_date'] = pd.to_datetime(test_df['issue_date'])
test_df['month'] = test_df['issue_date'].dt.month
test_df['year'] = test_df['issue_date'].dt.year
cols = ['usgs_rw','mnf_rw','mjo_rw','nino_rw','oni_rw','pdo_rw','pna_rw','soi_rw']
test_mnf = test_df[['site_id','year','month','issue_date','mnf_scaled']]
def string_to_float_list(s):
    items = s.strip('[]').split(',')
    return [float(item.strip()) for item in items if item]
for col in cols:
    test_df[col] = test_df[col].apply(string_to_float_list)

In [3]:
test_df

Unnamed: 0,site_id,issue_date,volume_10,volume_50,volume_90,usgs_rw,mnf_scaled,mnf_rw,mjo_rw,nino_rw,oni_rw,pdo_rw,pna_rw,soi_rw,month,year
0,hungry_horse_reservoir_inflow,2005-01-01,0.0,0.0,0.0,"[0.07350602135878209, 0.07407407407407408, 0.0...",0.050192,"[0.037790486675308835, 0.03420793947373866, 0....","[1.68, 1.03, 0.28, -1.14, -2.39, -1.6, -0.31, ...","[0.28, 0.12, -0.78, -0.02, -0.97, -0.94, -0.75...","[0.35, 0.37, 0.31, 0.23, 0.17, 0.17, 0.28, 0.4...","[-0.55, -0.21, -0.15, -0.0, 0.61, -0.11, 0.04,...","[0.41, 1.06, 0.3, 0.51, -1.76, -0.37, 0.09, 1....","[-1.3, 1.2, 0.4, -0.9, 1.0, -0.8, -0.5, -0.3, ...",1,2005
1,hungry_horse_reservoir_inflow,2005-01-08,0.0,0.0,0.0,"[0.07577823221995002, 0.076346284935242, 0.075...",0.050192,"[0.037790486675308835, 0.03420793947373866, 0....","[0.28, -1.14, -2.39, -1.6, -0.31, -0.28, -0.19...","[0.28, 0.12, -0.78, -0.02, -0.97, -0.94, -0.75...","[0.35, 0.37, 0.31, 0.23, 0.17, 0.17, 0.28, 0.4...","[-0.55, -0.21, -0.15, -0.0, 0.61, -0.11, 0.04,...","[0.41, 1.06, 0.3, 0.51, -1.76, -0.37, 0.09, 1....","[-1.3, 1.2, 0.4, -0.9, 1.0, -0.8, -0.5, -0.3, ...",1,2005
2,hungry_horse_reservoir_inflow,2005-01-15,0.0,0.0,0.0,"[0.14224039990911158, 0.14280845262440356, 0.1...",0.050192,"[0.037790486675308835, 0.03420793947373866, 0....","[-1.14, -2.39, -1.6, -0.31, -0.28, -0.19, -0.9...","[0.28, 0.12, -0.78, -0.02, -0.97, -0.94, -0.75...","[0.35, 0.37, 0.31, 0.23, 0.17, 0.17, 0.28, 0.4...","[-0.55, -0.21, -0.15, -0.0, 0.61, -0.11, 0.04,...","[0.41, 1.06, 0.3, 0.51, -1.76, -0.37, 0.09, 1....","[-1.3, 1.2, 0.4, -0.9, 1.0, -0.8, -0.5, -0.3, ...",1,2005
3,hungry_horse_reservoir_inflow,2005-01-22,0.0,0.0,0.0,"[0.07464212678936606, 0.07521017950465804, 0.0...",0.050192,"[0.037790486675308835, 0.03420793947373866, 0....","[-2.39, -1.6, -0.31, -0.28, -0.19, -0.9, -0.93...","[0.28, 0.12, -0.78, -0.02, -0.97, -0.94, -0.75...","[0.35, 0.37, 0.31, 0.23, 0.17, 0.17, 0.28, 0.4...","[-0.55, -0.21, -0.15, -0.0, 0.61, -0.11, 0.04,...","[0.41, 1.06, 0.3, 0.51, -1.76, -0.37, 0.09, 1....","[-1.3, 1.2, 0.4, -0.9, 1.0, -0.8, -0.5, -0.3, ...",1,2005
4,hungry_horse_reservoir_inflow,2005-02-01,0.0,0.0,0.0,"[0.0786184957964099, 0.07691433765053397, 0.07...",0.029020,"[0.03420793947373866, 0.03973546692170584, 0.0...","[-0.31, -0.28, -0.19, -0.9, -0.93, -0.27, 0.72...","[0.12, -0.78, -0.02, -0.97, -0.94, -0.75, -0.6...","[0.37, 0.31, 0.23, 0.17, 0.17, 0.28, 0.47, 0.6...","[-0.21, -0.15, -0.0, 0.61, -0.11, 0.04, 0.24, ...","[1.06, 0.3, 0.51, -1.76, -0.37, 0.09, 1.55, -0...","[1.2, 0.4, -0.9, 1.0, -0.8, -0.5, -0.3, -0.3, ...",2,2005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7235,owyhee_r_bl_owyhee_dam,2023-06-22,0.0,0.0,0.0,"[0.01703534197813374, 0.01703534197813374, 0.0...",0.076305,"[0.10317582841704802, 0.2993648343165904, 0.17...","[-1.02, 0.2, 0.35, 1.29, 1.72, 0.99, -1.11, -1...","[-1.41, -1.16, -0.57, -1.12, -1.81, -1.36, -0....","[-0.99, -0.85, -0.81, -0.91, -1.01, -0.99, -0....","[-1.34, -2.62, -2.38, -2.29, -1.8, -2.41, -2.2...","[-0.31, 2.54, 0.79, 0.21, 0.17, -0.73, -0.66, ...","[1.7, 0.8, 1.0, 1.6, 1.7, 0.3, 2.1, 1.4, 1.4, ...",6,2023
7236,owyhee_r_bl_owyhee_dam,2023-07-01,0.0,0.0,0.0,"[0.01686583608780405, 0.016950589032968894, 0....",,"[0.2993648343165904, 0.1790174780834758, 0.076...","[0.2, 0.35, 1.29, 1.72, 0.99, -1.11, -1.21, -0...","[-1.16, -0.57, -1.12, -1.81, -1.36, -0.3, -0.2...","[-0.85, -0.81, -0.91, -1.01, -0.99, -0.92, -0....","[-2.62, -2.38, -2.29, -1.8, -2.41, -2.21, -1.2...","[2.54, 0.79, 0.21, 0.17, -0.73, -0.66, 0.21, -...","[0.8, 1.0, 1.6, 1.7, 0.3, 2.1, 1.4, 1.4, 0.2, ...",7,2023
7237,owyhee_r_bl_owyhee_dam,2023-07-08,0.0,0.0,0.0,"[0.014153741842529028, 0.014153741842529028, 0...",,"[0.2993648343165904, 0.1790174780834758, 0.076...","[1.29, 1.72, 0.99, -1.11, -1.21, -0.28, 0.31, ...","[-1.16, -0.57, -1.12, -1.81, -1.36, -0.3, -0.2...","[-0.85, -0.81, -0.91, -1.01, -0.99, -0.92, -0....","[-2.62, -2.38, -2.29, -1.8, -2.41, -2.21, -1.2...","[2.54, 0.79, 0.21, 0.17, -0.73, -0.66, 0.21, -...","[0.8, 1.0, 1.6, 1.7, 0.3, 2.1, 1.4, 1.4, 0.2, ...",7,2023
7238,owyhee_r_bl_owyhee_dam,2023-07-15,0.0,0.0,0.0,"[0.014323247732858717, 0.014408000678023562, 0...",,"[0.2993648343165904, 0.1790174780834758, 0.076...","[1.72, 0.99, -1.11, -1.21, -0.28, 0.31, 0.61, ...","[-1.16, -0.57, -1.12, -1.81, -1.36, -0.3, -0.2...","[-0.85, -0.81, -0.91, -1.01, -0.99, -0.92, -0....","[-2.62, -2.38, -2.29, -1.8, -2.41, -2.21, -1.2...","[2.54, 0.79, 0.21, 0.17, -0.73, -0.66, 0.21, -...","[0.8, 1.0, 1.6, 1.7, 0.3, 2.1, 1.4, 1.4, 0.2, ...",7,2023


In [4]:
class MultiLSTM(nn.Module):
    def __init__(self, months, dropout_rate = 0.2):
        super(MultiLSTM, self).__init__()
        # LSTM layers for each input
        self.lstm_usgs = nn.LSTM(30, 32, 2, batch_first = True)
        self.lstm_mnf = nn.LSTM(3, 32, batch_first = True)
        self.lstm_mjo = nn.LSTM(18, 32, 2, batch_first = True)
        self.lstm_nino = nn.LSTM(12, 32, batch_first = True)
        self.lstm_oni = nn.LSTM(12, 32, batch_first = True)
        self.lstm_pdo = nn.LSTM(12, 32, batch_first = True)
        self.lstm_pna = nn.LSTM(12, 32, batch_first = True)
        self.lstm_soi = nn.LSTM(12, 32, batch_first = True)
        self.month = nn.Embedding(months+1, 32)

        # Total concat size
        concat_size = 32*9

        self.dropout = nn.Dropout(dropout_rate)
        
        # Final dense layers after concatenation
        self.fcc_1 = nn.Linear(concat_size, 128)
        self.fcc_2 = nn.Linear(128, 128)
        self.fcc_3 = nn.Linear(128, 64)
        self.fcc_4 = nn.Linear(64, 32)
        self.output_layer = nn.Linear(32, 3)

    def forward(self, usgs, mnf, mjo, nino, oni, pdo, pna, soi, month):
        # Process each LSTM Input
        usgs_out, _ = self.lstm_usgs(usgs)
        mnf_out, _ = self.lstm_mnf(mnf)
        mjo_out, _ = self.lstm_mjo(mjo)
        nino_out, _ = self.lstm_nino(nino)
        oni_out, _ = self.lstm_oni(oni)
        pdo_out, _ = self.lstm_pdo(pdo)
        pna_out, _ = self.lstm_pna(pna)
        soi_out, _ = self.lstm_soi(soi)
        month_out = self.month(month)
        # Combine all outputs
        combined = torch.cat([usgs_out, mnf_out, mjo_out, nino_out, oni_out, pdo_out, pna_out, soi_out, month_out], dim=1)
        combined = self.dropout(combined)
        # Final dense layers and output
        # Pass through fully connected layers
        # x = torch.relu(self.fcc_1(combined))
        x = self.fcc_1(combined)
        x = self.dropout(x)
        x = torch.relu(self.fcc_2(x))
        x = self.dropout(x)
        x = torch.relu(self.fcc_3(x))
        x = self.dropout(x)
        x = torch.relu(self.fcc_4(x))

        output = self.output_layer(x)
        return output
    def enable_dropout(self):
        """enable montecarlo dropout"""
        for module in self.modules():
            if isinstance(module, nn.Dropout):
                module.train()

class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        """
        Args:
            patience (int): How many epochs to wait after last time validation loss improved.
                            Default: 5
            min_delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                                Default: 0
        """
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0


In [5]:
mar_model = MultiLSTM(3)
mar_model.load_state_dict(torch.load('mar_model_quant.pth'))
apr_model = MultiLSTM(4)
apr_model.load_state_dict(torch.load('apr_model_quant.pth'))
may_model = MultiLSTM(5)
may_model.load_state_dict(torch.load('may_model_quant.pth'))
jun_model = MultiLSTM(6)
jun_model.load_state_dict(torch.load('jun_model_quant.pth'))
jul_model = MultiLSTM(7)
jul_model.load_state_dict(torch.load('jul_model_quant.pth'))

<All keys matched successfully>

In [6]:
def test_lstm(model, df, month, col_name):
    filtered_df = df[df['month']<=month].reset_index(drop=True)
    usgs_tensor = torch.tensor(filtered_df['usgs_rw'].tolist()).float()
    mnf_tensor = torch.tensor(filtered_df['mnf_rw'].tolist()).float()
    mjo_tensor = torch.tensor(filtered_df['mjo_rw'].tolist()).float()
    nino_tensor = torch.tensor(filtered_df['nino_rw'].tolist()).float()
    oni_tensor = torch.tensor(filtered_df['oni_rw'].tolist()).float()
    pdo_tensor = torch.tensor(filtered_df['pdo_rw'].tolist()).float()
    pna_tensor = torch.tensor(filtered_df['pna_rw'].tolist()).float()
    soi_tensor = torch.tensor(filtered_df['soi_rw'].tolist()).float()
    month_tensor = torch.tensor(filtered_df['month'].tolist(), dtype = torch.float32).long()
    dataset = TensorDataset(usgs_tensor, mnf_tensor, mjo_tensor, nino_tensor, oni_tensor, pdo_tensor, pna_tensor, soi_tensor, month_tensor)

    data_loader = DataLoader(dataset, batch_size=64, shuffle=False)


    with torch.no_grad():
        all_outputs = []
        for batch in data_loader:
            usgs, mnf, mjo, nino, oni, pdo, pna, soi, month = batch
            outputs = model(usgs, mnf, mjo, nino, oni, pdo, pna, soi, month)
            all_outputs+=outputs.cpu()
    dat = [t.tolist() for t in all_outputs]
    transposed_lists = list(zip(*dat))
    new_data = pd.DataFrame(transposed_lists).T.rename(columns = {0:'volume_10_'+col_name,1:'volume_50_'+col_name,2:'volume_90_'+col_name})
    result_df = pd.concat([filtered_df,new_data], axis = 1)
    return result_df[['site_id','issue_date','volume_10_'+col_name,'volume_50_'+col_name,'volume_90_'+col_name]]

In [7]:
mar_results = test_lstm(mar_model, test_df, 3, 'mar')
apr_results = test_lstm(apr_model, test_df, 4, 'apr')
may_results = test_lstm(may_model, test_df, 5, 'may')
jun_results = test_lstm(jun_model, test_df, 6, 'jun')
jul_results = test_lstm(jul_model, test_df, 7, 'jul')

In [8]:
test_nested_dict = {}
for ind, row in test_mnf.iterrows():
    site_id = row['site_id']
    year = row['year']
    month = row['month']
    volume = row['mnf_scaled']
    

    if site_id not in test_nested_dict:
        test_nested_dict[site_id] = {}
    if year not in test_nested_dict[site_id]:
        test_nested_dict[site_id][year] = {}

    test_nested_dict[site_id][year][month] = volume

In [9]:
with open('mnf_scalers.pkl', 'rb') as file:
    mnf_scalers = pickle.load(file)
with open('sjm_max_model.pkl', 'rb') as file:
    sjm_model = pickle.load(file)
with open('mry_max_model.pkl', 'rb') as file:
    mry_model = pickle.load(file)
with open('arf_max_model.pkl', 'rb') as file:
    arf_model = pickle.load(file)
lr_models = {'merced_river_yosemite_at_pohono_bridge':mry_model,'american_river_folsom_lake':arf_model,'san_joaquin_river_millerton_reservoir':sjm_model}
missing_sites = ['merced_river_yosemite_at_pohono_bridge','american_river_folsom_lake','san_joaquin_river_millerton_reservoir']
metadata = pd.read_csv('./data/metadata.csv')
metadata_startend = metadata[['site_id','season_start_month','season_end_month']]
submission_df = test_df[['site_id','issue_date']]
submission_df['month'] = submission_df['issue_date'].dt.month
submission_df['year'] = submission_df['issue_date'].dt.year
submission_df = pd.merge(submission_df,metadata_startend, on = 'site_id', how = 'left')
submission_df = pd.merge(submission_df,mar_results, on = ['site_id','issue_date'], how = 'left')
submission_df = pd.merge(submission_df,apr_results, on = ['site_id','issue_date'], how = 'left')
submission_df = pd.merge(submission_df,may_results, on = ['site_id','issue_date'], how = 'left')
submission_df = pd.merge(submission_df,jun_results, on = ['site_id','issue_date'], how = 'left')
submission_df = pd.merge(submission_df,jul_results, on = ['site_id','issue_date'], how = 'left')
vols_10 = []
vols_50 = []
vols_90 = []
for ind, row in submission_df.iterrows():
    mar_10 = row['volume_10_mar']
    apr_10 = row['volume_10_apr']
    may_10 = row['volume_10_may']
    jun_10 = row['volume_10_jun']
    jul_10 = row['volume_10_jul']
    mar_50 = row['volume_50_mar']
    apr_50 = row['volume_50_apr']
    may_50 = row['volume_50_may']
    jun_50 = row['volume_50_jun']
    jul_50 = row['volume_50_jul']
    mar_90 = row['volume_90_mar']
    apr_90 = row['volume_90_apr']
    may_90 = row['volume_90_may']
    jun_90 = row['volume_90_jun']
    jul_90 = row['volume_90_jul']
    site_id = row['site_id']
    season_start = row['season_start_month']
    season_end = row['season_end_month']
    month = row['month']
    year = row['year']
    if site_id not in missing_sites:
        if season_start == 4 and season_end == 7:
            if month<=season_start:
                total_vol_10 = apr_10 + may_10 + jun_10 + jul_10
                total_vol_50 = apr_50 + may_50 + jun_50 + jul_50
                total_vol_90 = apr_90 + may_90 + jun_90 + jul_90
            elif month > season_start:
                if month == 5:
                    total_vol_10 = test_nested_dict[site_id][year][4] + may_10 + jun_10 + jul_10
                    total_vol_50 = test_nested_dict[site_id][year][4] + may_50 + jun_50 + jul_50
                    total_vol_90 = test_nested_dict[site_id][year][4] + may_90 + jun_90 + jul_90
                elif month == 6:
                    total_vol_10 = test_nested_dict[site_id][year][4] + test_nested_dict[site_id][year][5] + jun_10 + jul_10
                    total_vol_50 = test_nested_dict[site_id][year][4] + test_nested_dict[site_id][year][5] + jun_50 + jul_50
                    total_vol_90 = test_nested_dict[site_id][year][4] + test_nested_dict[site_id][year][5] + jun_90 + jul_90
                elif month == 7:
                    total_vol_10 = test_nested_dict[site_id][year][4] + test_nested_dict[site_id][year][5] + test_nested_dict[site_id][year][6] + jul_10
                    total_vol_50 = test_nested_dict[site_id][year][4] + test_nested_dict[site_id][year][5] + test_nested_dict[site_id][year][6] + jul_50
                    total_vol_90 = test_nested_dict[site_id][year][4] + test_nested_dict[site_id][year][5] + test_nested_dict[site_id][year][6] + jul_90
        elif season_start == 3 and season_end == 7:
            if month<=season_start:
                total_vol_10 = mar_10 + apr_10 + may_10 + jun_10 + jul_10
                total_vol_50 = mar_50 + apr_50 + may_50 + jun_50 + jul_50
                total_vol_90 = mar_90 + apr_90 + may_90 + jun_90 + jul_90
            elif month > season_start:
                if month == 4:
                    total_vol_10 = test_nested_dict[site_id][year][3] + apr_10 + may_10 + jun_10 + jul_10
                    total_vol_50 = test_nested_dict[site_id][year][3] + apr_50 + may_50 + jun_50 + jul_50
                    total_vol_90 = test_nested_dict[site_id][year][3] + apr_90 + may_90 + jun_90 + jul_90
                if month == 5:
                    total_vol_10 = test_nested_dict[site_id][year][3] + test_nested_dict[site_id][year][4] + may_10 + jun_10 + jul_10
                    total_vol_50 = test_nested_dict[site_id][year][3] + test_nested_dict[site_id][year][4] + may_50 + jun_50 + jul_50
                    total_vol_90 = test_nested_dict[site_id][year][3] + test_nested_dict[site_id][year][4] + may_90 + jun_90 + jul_90
                elif month == 6:
                    total_vol_10 = test_nested_dict[site_id][year][3] + test_nested_dict[site_id][year][4] + test_nested_dict[site_id][year][5] + jun_10 + jul_10
                    total_vol_50 = test_nested_dict[site_id][year][3] + test_nested_dict[site_id][year][4] + test_nested_dict[site_id][year][5] + jun_50 + jul_50
                    total_vol_90 = test_nested_dict[site_id][year][3] + test_nested_dict[site_id][year][4] + test_nested_dict[site_id][year][5] + jun_90 + jul_90
                elif month == 7:
                    total_vol_10 = test_nested_dict[site_id][year][3] + test_nested_dict[site_id][year][4] + test_nested_dict[site_id][year][5] + test_nested_dict[site_id][year][6] + jul_10
                    total_vol_50 = test_nested_dict[site_id][year][3] + test_nested_dict[site_id][year][4] + test_nested_dict[site_id][year][5] + test_nested_dict[site_id][year][6] + jul_50
                    total_vol_90 = test_nested_dict[site_id][year][3] + test_nested_dict[site_id][year][4] + test_nested_dict[site_id][year][5] + test_nested_dict[site_id][year][6] + jul_90
        elif season_start == 4 and season_end == 6:
            if month<=season_start:
                total_vol_10 = apr_10 + may_10 + jun_10 
                total_vol_50 = apr_50 + may_50 + jun_50 
                total_vol_90 = apr_90 + may_90 + jun_90 
            elif month > season_start:
                if month == 5:
                    total_vol_10 = test_nested_dict[site_id][year][4] + may_10 + jun_10
                    total_vol_50 = test_nested_dict[site_id][year][4] + may_50 + jun_50
                    total_vol_90 = test_nested_dict[site_id][year][4] + may_90 + jun_90
                elif month == 6:
                    total_vol_10 = test_nested_dict[site_id][year][4] + test_nested_dict[site_id][year][5] + jun_10
                    total_vol_50 = test_nested_dict[site_id][year][4] + test_nested_dict[site_id][year][5] + jun_50
                    total_vol_90 = test_nested_dict[site_id][year][4] + test_nested_dict[site_id][year][5] + jun_90
    else:
        if month <= 4:
            vol_10 = np.array([apr_10,may_10,jun_10,jul_10])
            vol_50 = np.array([apr_50,may_50,jun_50,jul_50])
            vol_90 = np.array([apr_90,may_90,jun_90,jul_90])
        elif month == 5:
            vol_10 = np.array([test_nested_dict[site_id][year][4],may_10,jun_10,jul_10])
            vol_50 = np.array([test_nested_dict[site_id][year][4],may_50,jun_50,jul_50])
            vol_90 = np.array([test_nested_dict[site_id][year][4],may_90,jun_90,jul_90])
        elif month == 6:
            vol_10 = np.array([test_nested_dict[site_id][year][4],test_nested_dict[site_id][year][5],jun_10,jul_10])
            vol_50 = np.array([test_nested_dict[site_id][year][4],test_nested_dict[site_id][year][5],jun_50,jul_50])
            vol_90 = np.array([test_nested_dict[site_id][year][4],test_nested_dict[site_id][year][5],jun_90,jul_90])
        elif month == 7:
            vol_10 = np.array([test_nested_dict[site_id][year][4],test_nested_dict[site_id][year][5],test_nested_dict[site_id][year][6],jul_10])
            vol_50 = np.array([test_nested_dict[site_id][year][4],test_nested_dict[site_id][year][5],test_nested_dict[site_id][year][6],jul_50])
            vol_90 = np.array([test_nested_dict[site_id][year][4],test_nested_dict[site_id][year][5],test_nested_dict[site_id][year][6],jul_90])
        lr_model = lr_models[site_id]
        vol_arr = np.array([vol_10,vol_50,vol_90])
        vol_arr = sm.add_constant(vol_arr,has_constant='add')
        pred = lr_model.predict(vol_arr)
        total_vol_10 = pred[0]
        total_vol_50 = pred[1]
        total_vol_90 = pred[2]
    vols_10.append(total_vol_10)
    vols_50.append(total_vol_50)
    vols_90.append(total_vol_90)
submission_df['volume_10_scaled'] = vols_10
submission_df['volume_50_scaled'] = vols_50
submission_df['volume_90_scaled'] = vols_90

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  submission_df['month'] = submission_df['issue_date'].dt.month
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  submission_df['year'] = submission_df['issue_date'].dt.year


In [10]:
file_path = "volume_dict.txt"
with open(file_path, 'r') as file:
    volume_dict = json.load(file)
def unscale_res(df):
    volume_10 = []
    volume_50 = []
    volume_90 = []
    for ind,row in df.iterrows():
        site_id = row['site_id']
        max_vol = volume_dict[site_id]['max_vol']
        if site_id not in missing_sites:
            vol_10 = row['volume_10_scaled']*max_vol
            vol_50 = row['volume_50_scaled']*max_vol
            vol_90 = row['volume_90_scaled']*max_vol
        else:
            vol_10 = row['volume_10_scaled']
            vol_50 = row['volume_50_scaled']
            vol_90 = row['volume_90_scaled']
        volume_10.append(vol_10)
        volume_50.append(vol_50)
        volume_90.append(vol_90)
    df['volume_10'] = volume_10
    df['volume_50'] = volume_50
    df['volume_90'] = volume_90
    return df[['site_id','issue_date','volume_10','volume_50','volume_90']]
submission_df = unscale_res(submission_df)
submission_df

Unnamed: 0,site_id,issue_date,volume_10,volume_50,volume_90
0,hungry_horse_reservoir_inflow,2005-01-01,650.480604,1427.295827,2355.419229
1,hungry_horse_reservoir_inflow,2005-01-08,590.587012,1276.560025,2078.501559
2,hungry_horse_reservoir_inflow,2005-01-15,527.909369,1206.370132,2082.173074
3,hungry_horse_reservoir_inflow,2005-01-22,552.260020,1238.679388,2068.397895
4,hungry_horse_reservoir_inflow,2005-02-01,707.245539,1456.561031,2315.086874
...,...,...,...,...,...
7235,owyhee_r_bl_owyhee_dam,2023-06-22,475.092959,536.753494,630.173726
7236,owyhee_r_bl_owyhee_dam,2023-07-01,523.248192,533.561816,569.485933
7237,owyhee_r_bl_owyhee_dam,2023-07-08,524.179033,530.432576,586.053820
7238,owyhee_r_bl_owyhee_dam,2023-07-15,528.540547,551.117531,626.988358


In [11]:
submission_df.to_csv('./submissions/qLSTM_monthly_lrmax.csv', index = False)