# Explore correlations between temp, precip, inundation

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
weather_csv_basename = '../data/state_county_csvs/weather_'
inun_csv_basename = '../data/state_county_csvs/inun_frac_'
state_list = ['CO', 'KS', 'NE', 'NM', 'OK','TX']

In [None]:
def read_state_csvs(state):
    """Read and also rename columns to be a bit easier"""
    
    # Weather
    weather_csv_path = weather_csv_basename + state + '.csv'
    weather_df = pd.read_csv(weather_csv_path)
    weather_df = weather_df.rename(columns={'mean_2m_air_temperature':'temp', 'total_precipitation':'precip'})
    weather_df = weather_df.drop('acres', axis=1)

    
    # Inundation
    inun_csv_path = inun_csv_basename + state + '.csv'
    inun_df = pd.read_csv(inun_csv_path)
    
    return inun_df, weather_df


def read_join_state(state, drop_zeros=False):
    """Read state weather and inundation csv, then join"""
    
    inun_df, weather_df = read_state_csvs(state)
    
    inun_df.set_index(['id','year','month'], inplace=True)
    weather_df.set_index(['id','year','month'], inplace=True)
    
    if drop_zeros:
        max_inun = inun_df.groupby('id').agg({'inundation':'max'})
        zero_ids = max_inun.loc[max_inun['inundation']==0].index
        inun_df.drop(zero_ids, inplace=True)
        
    
    return inun_df.join(weather_df)

# Read in dfs

In [None]:
state = 'NM'
joined_df = read_join_state(state)
joined_nonzero = read_join_state(state, drop_zeros=True)

## Univariate autocorrelations

In [None]:
plt.acorr(joined_nonzero.iloc[0:50000]['inundation'], usevlines=True, normed=True, maxlags=50, lw=2)

## Bivariate correlations: basic stats and plots

In [None]:
print(joined_df[['inundation','precip','temp', 'acres']].corr())
plt.matshow(joined_df[['inundation','precip','temp', 'acres']].corr())
plt.colorbar()
plt.show()

In [None]:
print(joined_nonzero[['inundation','precip','temp', 'acres']].corr())
plt.matshow(joined_nonzero[['inundation','precip','temp', 'acres']].corr())
plt.colorbar()
plt.show()

In [None]:
pd.plotting.scatter_matrix(joined_nonzero[['inundation','precip','temp', 'acres']], figsize=(12, 12))
plt.show()