# Explore correlations between temp, precip, acres, and inundation
## Including basic modeling

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from scipy.stats import spearmanr

In [None]:
weather_csv_basename = '../data/state_county_csvs/weather_'
inun_csv_basename = '../data/state_county_csvs/inun_frac_'
state_list = ['CO', 'KS', 'NE', 'NM', 'OK','TX']

In [None]:
def read_state_csvs(state):
    """Read and also rename columns to be a bit easier"""
    
    # Weather
    weather_csv_path = weather_csv_basename + state + '.csv'
    weather_df = pd.read_csv(weather_csv_path)
    
    # Inundation
    inun_csv_path = inun_csv_basename + state + '.csv'
    inun_df = pd.read_csv(inun_csv_path)
    
    return inun_df, weather_df


def read_join_state(state, drop_zeros=False):
    """Read state weather and inundation csv, then join"""
    
    inun_df, weather_df = read_state_csvs(state)
    
    inun_df.set_index(['id','year','month'], inplace=True)
    weather_df.set_index(['id','year','month'], inplace=True)
    
    # Set NANs to 0
    inun_df.loc[pd.isna(inun_df['inundation']),'inundation'] = 0
    
    if drop_zeros:
        max_inun = inun_df.groupby('id').agg({'inundation':'max'})
        zero_ids = max_inun.loc[max_inun['inundation']==0].index
        inun_df.drop(zero_ids, inplace=True)
        
    
    return inun_df.join(weather_df)

# Read in dfs

In [None]:
state = 'NM'
joined_df = read_join_state(state)
joined_nonzero = read_join_state(state, drop_zeros=True)

## Univariate autocorrelations

In [None]:
plt.acorr(joined_nonzero.iloc[0:50000]['inundation'], usevlines=True, normed=True, maxlags=50, lw=2)

## Bivariate correlations: basic stats and plots

In [None]:
def spearmanr_pval(x,y):
    return spearmanr(x,y)[1]

In [None]:
cor = joined_nonzero[['inundation','precip','temp', 'acres']].corr(method='spearman')
cor_pvals = joined_nonzero[['inundation','precip','temp', 'acres']].corr(method=spearmanr_pval)
print('Coefs\n',cor, '\n\n')
print('Pvals\n',cor_pvals)
plt.matshow(cor)
plt.colorbar()
plt.show()

In [None]:
pd.plotting.scatter_matrix(joined_nonzero[['inundation','precip','temp', 'acres']], figsize=(12, 12))
plt.show()

# Plot inundation trajectories

In [None]:
def normalize_var(x):
    x = (x-min(x))/(max(x)-min(x))
    return x

In [None]:
params = {'legend.fontsize': 14,
          'legend.handlelength': 2}
plt.rcParams.update(params)
rand_index = joined_nonzero.sample(1).index.get_level_values(0)[0]
traj = joined_nonzero.loc[rand_index, ['precip','inundation','temp']]
traj['precip'] = normalize_var(traj['precip'])
ax = traj[['precip','inundation']].plot(
    figsize=(12,3),xlim=[200,418],ylim=[0,1], lw=2)
ax.set_xlabel('Year, Month',size=15)
ax.set_ylabel('Inundation Frac/\nRelative Precip Magnitude',size=15)
ax.tick_params(axis="x", labelsize=12)
ax.tick_params(axis="y", labelsize=12)
ax.set_title('Playa: {}'.format(rand_index),loc='left', size=16)
plt.show()

In [None]:
# Max precip vs frac of months filled per year
rand_index = joined_nonzero.sample(1).index.get_level_values(0)[0]
traj = joined_nonzero.loc[rand_index, ['precip','inundation','temp']]
traj['temp'] = normalize_var(traj['temp'])
traj['precip'] = normalize_var(traj['precip'])
traj.assign(nonzero_inun=traj['inundation']>0).groupby('year').agg(
    {'precip':'max','temp':'mean', 'nonzero_inun':'mean'}).plot()

# Basic linear modeling and log reg

In [None]:
def basic_lm(joined_nonzero, random_traj=True,model_type='linear'):
    # Can be linear or 'log' for logistic regression
    if random_traj:
        rand_index = joined_nonzero.sample(1).index.get_level_values(0)[0]
        traj = joined_nonzero.loc[rand_index, ['precip','inundation','temp']]
        X = traj[['precip','temp']]
        y = traj['inundation']
    else:
        traj = joined_nonzero
        X = traj[['precip','temp','acres']]
        y = traj['inundation']
    
    if model_type=='linear':
        est = sm.OLS(y, X)
    elif model_type=='log':
        y = y>0
        est = sm.Logit(y, X)
    else:
        print("model_type not 'linear' or 'log'")

        
    est2 = est.fit()
    print(est2.summary())
    
    plt.scatter(traj['temp'], traj['inundation'])
    plt.title('Temp vs Inundation')
    plt.show()

    
    plt.scatter(traj['precip'], traj['inundation'])
    plt.title('Precip vs Inundation')

    plt.show()
    
    if not random_traj:
        plt.scatter(traj['acres'], traj['inundation'])
        plt.title('Playa Area vs Inundation')
        plt.show()
    
    plt.scatter(y, est2.predict(X))
    plt.xlabel('True')
    plt.ylabel('Predicted')
    plt.show()
    
    fig = plt.figure(figsize=(12,5))
    fig = sm.graphics.plot_partregress_grid(est2, fig=fig)
    plt.show()
    
    if model_type == 'linear':
        plt.scatter(est2.predict(X), est2.resid)
        plt.xlabel('Fitted')
        plt.ylabel('Resid')
        plt.show()


### Modeling a single random trajectory

In [None]:
basic_lm(joined_nonzero, random_traj=True, model_type='linear')

In [None]:
basic_lm(joined_nonzero, random_traj=True, model_type='log')

### Modeling all playas together

In [None]:
basic_lm(joined_nonzero, random_traj=False, model_type='linear')

In [None]:
basic_lm(joined_nonzero, random_traj=False, model_type='log')