# Create a CSV with 8 Playas

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import glob
np.random.seed(62)

In [None]:
def prep_lc_frac_df(ids=[]):
    """LC Frac csv is hardcoded! Change if you need it"""
    lc_df = pd.read_csv('../data/fraster_landcover_allyears_bigger.csv').set_index('id')
    if len(ids)>0:
        lc_df = lc_df.loc[ids]
    lc_frac = pd.DataFrame()
    for col in lc_df.columns:
        year = int(col[0:4])
        jsond = lc_df[col].str.replace(r'([0-9]+)(:)', r'"\1"\2', regex=True).apply(json.loads)
        temp_frac_df = (pd.json_normalize(jsond)/5000)
        temp_frac_df.columns = ['lcf{}'.format(lc) for lc in temp_frac_df.columns]
        temp_frac_df = temp_frac_df.assign(id=lc_df.index, year=year)
        lc_frac = lc_frac.append(temp_frac_df)
    lc_frac.fillna(0,inplace=True)
    
    return lc_frac.set_index(['id','year'])


def read_join_csv(inun_csv, drop_zeros=True):
    # Prep inundation data
    inun_df = pd.read_csv(inun_csv)
    inun_df.set_index(['id','year','month'], inplace=True)
    inun_df = inun_df.loc[~inun_df['inundation'].isna()]
    if drop_zeros:
        max_inun = inun_df.groupby('id').agg({'inundation':'max'})
        zero_ids = max_inun.loc[max_inun['inundation']==0].index
        inun_df.drop(zero_ids, inplace=True)
        if inun_df.shape[0]==0:
            return 
        
    # Prep weather data
    weather_csv = inun_csv.replace('inun_frac_','weather_')
    weather_df = pd.read_csv(weather_csv)
    weather_df.set_index(['id','year','month'], inplace=True)
    joined_df = weather_df.join(inun_df, how='inner')
    
    # Finally, prep landcover fraction dataframe
    # Both prep and join are a bit slow
    # Could prep into fractions ahead of time
    # And also split up lc df by county
    lc_frac_df = prep_lc_frac_df(ids=joined_df.index.get_level_values(0).unique())
    joined_df = joined_df.join(lc_frac_df, how='inner')
    
    return joined_df

In [None]:
inun_csv_list = glob.glob('../data/state_county_csvs/counties/inun_frac*')

In [None]:
target_num_playas = 8

In [None]:
joined_df = pd.DataFrame()
while joined_df.index.get_level_values(0).unique().shape[0] <= target_num_playas:
    rand_csv = np.random.choice(inun_csv_list)
    joined_df = pd.concat([joined_df, read_join_csv(rand_csv, drop_zeros=True)])
    
joined_df.fillna(0, inplace=True)

joined_df = joined_df.loc[joined_df.index.get_level_values(0).unique()[:target_num_playas]]

In [None]:
for i in joined_df.index.get_level_values(0).unique():
    joined_df.loc[i,'inundation'].plot()

In [None]:
joined_df = joined_df.drop(columns=['area'])#[['inundation', 'acres', 'vpd', 'temp','precip']]

In [None]:
len_of_timeseries = 418
new_ids = np.array([
    np.repeat(i, len_of_timeseries) for i in range(int(joined_df.shape[0]/len_of_timeseries))]
).flatten()
joined_df = joined_df.assign(id=new_ids) # Put id at end for embedding

# Pop inundation to end
inun = joined_df.pop('inundation')
joined_df['inundation'] = inun


In [None]:
joined_df.to_csv('./prepped_8.csv')