# Exploring Playa Inundation & Area Over Time

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

## Counting number of 0s per state, and saving csvs

In [None]:
state_zeros_df = pd.DataFrame()
for state in state_list:
    state_csv_path = inun_csv_basename + state + '.csv'
    state_df = pd.read_csv(state_csv_path)
    zeros = (state_df[['id','inundation']].groupby('id').max()==0).sum()
    nonzeros = state_df['id'].unique().shape[0] - zeros
    state_zeros_df = state_zeros_df.append(pd.DataFrame(
        {'state':state,'zeros':zeros, 'nonzeros':nonzeros}))
    
state_zeros_df = state_zeros_df.set_index('state')
state_zeros_df = state_zeros_df.assign(
    total=state_zeros_df['zeros'] + state_zeros_df['nonzeros'])
state_zeros_df = state_zeros_df.assign(
    frac_zeros=state_zeros_df['zeros']/state_zeros_df['total'])

## Get basic per-lake stats

In [None]:
inun_csv_basename = '../data/state_county_csvs/inun_frac_'
state_list = ['CO', 'KS', 'NE', 'NM', 'OK','TX']

In [None]:
# Hardcoded number of months and years in dataset
num_months = 418
num_years = 35

In [None]:
def months_nonzero(x):
    return (x>0).sum()

def years_nonzero(x):
    return max(x)>0

In [None]:
def calc_per_playa_stats(state_df):

    # Might be way to combine into one groupby, but this works pretty fast
    per_playa = state_df[['id','inundation', 'area']].groupby('id').agg(['max','min','sum','mean','median',months_nonzero])
    years_nonzero_df = state_df[['id','inundation', 'year']].groupby(['id', 'year']).agg({'inundation':years_nonzero}).groupby('id').sum()
    per_playa = per_playa.assign(years_nonzero=years_nonzero_df['inundation'])
    
    return per_playa

In [None]:
per_playa_df = pd.DataFrame()
for state in state_list:
    state_csv_path = inun_csv_basename + state + '.csv'
    state_df = pd.read_csv(state_csv_path)
    state_per_playa = calc_per_playa_stats(state_df)
    per_playa_df = per_playa_df.append(state_per_playa)

In [None]:
print((per_playa_df['inundation']['max']==0).sum())
print((per_playa_df['inundation']['max']>0).sum())

In [None]:
per_playa_df['inundation']['max'].hist()

In [None]:
# For playas that were inundated at some point, on avg how many months were they inundated?
per_playa_df.loc[per_playa_df['inundation']['max']>0,
                 per_playa_df.columns.get_level_values(1)=='months_nonzero'].mean() # On average, 1/10 months

## Save per-playa stats as csv

In [None]:
per_playa_df.assign(nonzero = per_playa_df['inundation']['max']>0)
per_playa_df.to_csv('../data/jrc-water_summary_atts.csv')

## Get basic per-year stats

In [None]:
def calc_per_year_stats(state_df,state):

    per_year = state_df[['inundation','area','year']].groupby('year').agg(['max','min','sum','mean','median',months_nonzero])
    per_year = per_year.assign(state=state)
    return per_year

In [None]:
per_year_df = pd.DataFrame()
for state in state_list:
    state_csv_path = inun_csv_basename + state + '.csv'
    state_df = pd.read_csv(state_csv_path)
    state_per_year= calc_per_year_stats(state_df, state)
    per_year_df = per_year_df.append(state_per_year)
per_year_df.index.rename('year',inplace=True)

In [None]:
per_year_df['inundation'].groupby('year').agg({'sum':'sum', 'months_nonzero':'sum'}).plot()

In [None]:
per_year_df['area'].groupby('year').agg({'sum':'sum'}).plot(title='Inundated Area (acres)')

## Basic per-month stats

In [None]:
def calc_per_month_stats(state_df, state):

    per_month = state_df[['inundation','area','month']].groupby(['month']).agg(['max','min','sum','mean','median'])
    per_month = per_month.assign(state=state)
               
    return per_month

In [None]:
per_month_df = pd.DataFrame()
for state in state_list:
    state_csv_path = inun_csv_basename + state + '.csv'
    state_df = pd.read_csv(state_csv_path)
    state_per_month= calc_per_month_stats(state_df, state)
    per_month_df = per_month_df.append(state_per_month)
per_month_df.index.rename('month',inplace=True)

In [None]:
per_month_df['inundation'].groupby('month').agg({'sum':'sum'}).plot(title="Sum of Inundation")

In [None]:
per_month_df['area'].groupby('month').agg({'sum':'sum'}).plot(title="Monthly Inundated Area (Acres)")

In [None]:
per_month_df.assign(sum_area = per_month_df['area']['sum'])[['state','sum_area']].pivot(
    index=None, columns='state', values='sum_area').plot(title='Monthly Inundated Area, by state')

## Get basic per-month/year stats

In [None]:
def calc_per_month_year_stats(state_df, state):

    per_month_year = state_df[['inundation','area','year','month']].groupby(['year','month']).agg(['max','min','sum','mean','median'])
    per_month_year = per_month_year.assign(state=state)
               
    return per_month_year

In [None]:
per_month_year_df = pd.DataFrame()
for state in state_list:
    state_csv_path = inun_csv_basename + state + '.csv'
    state_df = pd.read_csv(state_csv_path)
    state_per_month_year = calc_per_month_year_stats(state_df, state)
    per_month_year_df = per_month_year_df.append(state_per_month_year)
per_month_year_df.index.rename(['year','month'],inplace=True)

In [None]:
per_month_year_df['inundation'].groupby(['year','month']).agg({'sum':'sum'}).plot()

In [None]:
per_month_year_df['area'].groupby(['year','month']).agg({'sum':'sum'}).plot()