In [None]:
%load_ext autoreload
%autoreload 2
import sys
# instead of creating a package using setup.py or building from a docker/singularity file,
# import the sister directory of src code to be called on in notebook.
# This keeps the notebook free from code to only hold visualizations and is easier to test
# It also helps keep the state of variables clean such that cells aren't run out of order with a mysterious state
sys.path.append("..")

In [None]:
from src import most_recent_mesonet_data
from src import most_recent_mesonet_time
from src import landtype_describe
from src.plotting_scripts import landtype
import os
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
from scipy.stats import skew
import statistics 
import cartopy.crs as crs
import cartopy.feature as cfeature

In [None]:
def format_df(df):
    new_df = pd.DataFrame()
    value_list = []
    for x,_ in df.iterrows():
        count = int(df.iloc[x]['Count'])
        value = df.iloc[x]['Value']
        for n in np.arange(count):
            val = value
            value_list.append(value)
    new_df['Value'] = value_list
    return new_df

def stat_anal(state, buffer, directory, state_df, station_list, lonlist, latlist):
    final_df = pd.DataFrame()
    std_list = []
    variance_list = []
    skew_list = []
    distance_list = []
    stations = []
    elevs = []
    for x,_ in enumerate(directory):
        # read in csv
        df2 = pd.DataFrame()
        df = pd.read_csv(f'/home/aevans/landtype/elevation/data/{state}/elev/{buffer}km/{directory[x]}')
        dfv1 = format_df(df)
        std = statistics.stdev(dfv1['Value'])
        variance = statistics.pvariance(dfv1['Value'])
        my_skew = skew(dfv1['Value'])
        elevation = state_df['elev'].iloc[x]
        station = station_list[x]
        split_diff = dfv1['Value'] - state_df['elev'].iloc[x]
        diff_list = split_diff.to_list()
        df2['diff_elev'] = diff_list
        describe = df2['diff_elev'].describe()
        fifty = describe[5]
        distance = state_df['elev'].iloc[x] - fifty
        # add data
        stations.append(station)
        elevs.append(elevation)
        distance_list.append(distance)
        skew_list.append(my_skew)
        variance_list.append(variance)
        std_list.append(std)

    final_df['station'] =  stations
    final_df['elev'] = elevs 
    final_df['std'] = std_list
    final_df['variance'] = variance_list
    final_df['skew'] = skew_list
    final_df['med_dist'] = distance_list
    final_df['lon'] = lonlist
    final_df['lat'] = latlist
    return final_df

def percent_plot(df, variable):
    fig, ax = plt.subplots(figsize=(20,10))
    ax.scatter(df['station'], df[variable], s=50)
    for n in df.iterrows():
        ax.annotate(n[1]['station'], (n[1]['station'], n[1][variable]), fontsize=15)
    ax.grid()
    ax.set_ylabel(variable, size=20)
    ax.tick_params(labelbottom = False, bottom = False)

def plurality_plot(df, s):

    fig, ax = plt.subplots()
    df.plot.scatter(x='lon',
                                        y='lat',
                                        c=s,
                                        s=s,
                                        colormap='jet',
                                        figsize=(9,6),
                                        ax = ax
                                        )
    ax.set_title(f'Mesonet Site {s} by Elevation', size=16)
    ax.set_xlabel('Longitude',size=14)
    ax.set_ylabel('Latitude',size=14)
    ax.tick_params(axis='x', labelsize=12)
    ax.tick_params(axis='y', labelsize=12)
    ax.grid()

def good_sites_elev_ok(elev_df):
    good_list = []

    for x,_ in elev_df.iterrows():
        if (elev_df.iloc[x]['std'] < 100) & (elev_df.iloc[x]['variance'] < 5000) & (elev_df.iloc[x]['med_dist'] < 400) & (elev_df.iloc[x]['skew'] < 1.0) & (elev_df.iloc[x]['skew'] > -1.0):
            good_station = elev_df.iloc[x]['station']
            good_list.append(good_station)

    return good_list

def good_sites_elev(elev_df):
    good_list = []

    for x,_ in elev_df.iterrows():
        if (elev_df.iloc[x]['std'] < 150) & (elev_df.iloc[x]['variance'] < 20000) & (elev_df.iloc[x]['med_dist'] < 400) & (elev_df.iloc[x]['skew'] < 1.0) & (elev_df.iloc[x]['skew'] > -1.0):
            good_station = elev_df.iloc[x]['station']
            good_list.append(good_station)

    return good_list

def good_sites_df(df, good_list):
    lons = []
    lats = []
    stations = []
    for x,_ in df.iterrows():
        if df.iloc[x]['station'] in good_list:
            get_station = df.iloc[x]['station']
            get_lon = df.iloc[x]['lon']
            get_lat = df.iloc[x]['lat']
            lons.append(get_lon)
            lats.append(get_lat)
            stations.append(get_station)
    good_elev_df = pd.DataFrame()
    good_elev_df['station'] = stations
    good_elev_df['lon'] = lons
    good_elev_df['lat'] = lats
    return good_elev_df

def good_elevs_plot(good_elev_df):
    projPC = crs.PlateCarree()
    latN = good_elev_df['lat'].max()+1
    latS = good_elev_df['lat'].min()-1
    lonW = good_elev_df['lon'].max()+1
    lonE = good_elev_df['lon'].min()-1
    cLat = (latN + latS) / 2
    cLon = (lonW + lonE) / 2
    projLcc = crs.LambertConformal(central_longitude=cLon, central_latitude=cLat)

    fig, ax = plt.subplots(figsize=(12,9), subplot_kw={'projection': crs.PlateCarree()})
    ax.set_extent([lonW, lonE, latS, latN], crs=projPC)
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS, linestyle='--');
    ax.add_feature(cfeature.LAKES, alpha=0.5)
    ax.add_feature(cfeature.STATES)
    ax.xticklabels_top = False
    ax.ylabels_right = False
    ax.gridlines(crs=crs.PlateCarree(), draw_labels=True,
                    linewidth=2, color='black', alpha=0.5, linestyle='--')
    ax.scatter(x=good_elev_df['lon'],
                                        y=good_elev_df['lat'],
                                        c = 'r',
                                        s = 40,
                                        )
    for n in good_elev_df.iterrows():
        ax.annotate(n[1]['station'], (n[1]['lon']+0.1, n[1]['lat']), fontsize=10)
    ax.set_title(f'Good Mesonet Sites by Elevation', size=16)
    ax.set_xlabel('Longitude',size=14)
    ax.set_ylabel('Latitude',size=14)
    ax.tick_params(axis='x', labelsize=12)
    ax.tick_params(axis='y', labelsize=12)
    ax.grid()

In [None]:
# This will return the most recent data avail on mesonet 
# this is my file path
ny_mesonet_data_path = '/home/aevans/nysm/archive/nysm/netcdf/proc'
ok_mesonet_data_path = '/home/aevans/landtype/geoinfo.csv'

In [None]:
# create a dataframe of mesonet data ny
ny_df = most_recent_mesonet_data.current_time_mesonet_df(ny_mesonet_data_path)
ny_df = most_recent_mesonet_time.most_recent_time(ny_df, ny_mesonet_data_path)

In [None]:
# create a dataframe of mesonet data ok
ok_df = pd.read_csv(ok_mesonet_data_path)

In [None]:
# paths to data 
path_ny_30 = f'/home/aevans/landtype/elevation/data/CSVs_elevation_ny_30km/'
path_ny_50 = f'/home/aevans/landtype/elevation/data/CSVs_elevation_ny_50km/'
path_ok_30 = f'/home/aevans/landtype/elevation/data/CSVs_elevation_ok_30km/'
path_ok_50 = f'/home/aevans/landtype/elevation/data/CSVs_elevation_ok_50km/'

In [None]:
station_list_ny = ny_df['station'].to_list()

In [None]:
elevation_list_ny = ny_df['elev'].to_list()
elevation_list_ok = ok_df['elev'].to_list()

In [None]:
station_list_ok = ok_df['stid'].to_list()

In [None]:
ny_df_lons = ny_df['lon'].to_list()
ny_df_lats = ny_df['lat'].to_list()
ok_df_lons = ok_df['elon'].to_list()
ok_df_lats = ok_df['nlat'].to_list()

In [None]:
# x = 0
# for i in range(1,127): 
#     print(i)
#     df = pd.read_csv(f'{path_ny_30}/{i}_csv.csv')
#     df.to_csv(f'/home/aevans/landtype/elevation/data/NY/elev/30km/{station_list_ny[x]}_elev_30km.csv')
#     x+= 1

In [None]:
# x = 0
# for i in range(1,127): 
#     df = pd.read_csv(f'{path_ny_50}/{i}_csv.csv')
#     df.to_csv(f'/home/aevans/landtype/elevation/data/NY/elev/50km/{station_list_ny[x]}_elev_50km.csv')
#     x+= 1

In [None]:
# x = 0
# for i in range(1,145): 
#     df = pd.read_csv(f'{path_ok_30}/{i}_csv.csv')
#     df.to_csv(f'/home/aevans/landtype/elevation/data/OK/elev/30km/{station_list_ok[x]}_elev_30km.csv')
#     x+= 1

In [None]:
# x = 0
# for i in range(1,145): 
#     df = pd.read_csv(f'{path_ok_50}/{i}_csv.csv')
#     df.to_csv(f'/home/aevans/landtype/elevation/data/OK/elev/50km/{station_list_ok[x]}_elev_50km.csv')
#     x+= 1

In [None]:
directory = os.listdir(f'/home/aevans/landtype/elevation/data/NY/elev/50km/')
sorted_direct= sorted(directory)

In [None]:
directory_ok = os.listdir(f'/home/aevans/landtype/elevation/data/OK/elev/50km/')
sorted_direct_ok = sorted(directory_ok)

# New York

## 50 km

In [None]:
elev_df = stat_anal('NY', 50, sorted_direct, ny_df, station_list_ny, ny_df_lons, ny_df_lats)

In [None]:
plurality_plot(elev_df, 'std')

In [None]:
plurality_plot(elev_df, 'variance')

In [None]:
plurality_plot(elev_df, 'skew')

In [None]:
plurality_plot(elev_df, 'med_dist')

In [None]:
good_list = good_sites_elev(elev_df)

In [None]:
good_df = good_sites_df(elev_df, good_list)

In [None]:
good_elevs_plot(good_df)

## 30 km

In [None]:
directory_30 = os.listdir(f'/home/aevans/landtype/elevation/data/NY/elev/30km/')
sorted_direct_30 = sorted(directory_30)

In [None]:
ny30_df = stat_anal('NY', 30, sorted_direct_30, ny_df, station_list_ny, ny_df_lons, ny_df_lats)

In [None]:
ny30_df.to_csv('/home/aevans/landtype/elevation/data/NY/elev/ny30_df.csv')

In [None]:
plurality_plot(ny30_df, 'std')

In [None]:
plurality_plot(ny30_df, 'variance')

In [None]:
plurality_plot(ny30_df, 'skew')

In [None]:
plurality_plot(ny30_df, 'med_dist')

In [None]:
good_list_30 = good_sites_elev(ny30_df)

In [None]:
good_df30 = good_sites_df(ny30_df, good_list_30)

In [None]:
good_elevs_plot(good_df30)

In [None]:
directory_ok_30 = os.listdir(f'/home/aevans/landtype/elevation/data/OK/elev/30km/')
sorted_direct_ok_30 = sorted(directory_ok_30)

In [None]:
ok30_df = stat_anal('OK', 30, sorted_direct_ok_30, ok_df, station_list_ok, ok_df_lons, ok_df_lats)
ok30_df.to_csv('/home/aevans/landtype/elevation/data/OK/elev/ok30_df.csv')

In [None]:
plurality_plot(ok30_df, 'std')

In [None]:
plurality_plot(ok30_df, 'variance')

In [None]:
plurality_plot(ok30_df, 'skew')

In [None]:
plurality_plot(ok30_df, 'med_dist')

In [None]:
good_list_ok = good_sites_elev_ok(ok30_df)

In [None]:
good_df_ok = good_sites_df(ok30_df, good_list_ok)

In [None]:
good_elevs_plot(good_df_ok)