In [None]:
%load_ext autoreload
%autoreload 2
import sys

# instead of creating a package using setup.py or building from a docker/singularity file,
# import the sister directory of src code to be called on in notebook.
# This keeps the notebook free from code to only hold visualizations and is easier to test
# It also helps keep the state of variables clean such that cells aren't run out of order with a mysterious state
sys.path.append("..")

In [None]:
from pynhd import NLDI
import pygeohydro as gh
from pygeohydro import plot
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import xarray as xr
import os
import cfgrib
from datetime import datetime, timedelta, date
import cartopy.crs as crs
import cartopy.feature as cfeature
from scipy.interpolate import griddata
import rasterio
import matplotlib as mpl
import dask.dataframe as dd
from matplotlib import colors
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from matplotlib import colors as mcolors
from array import array
from shapely.geometry import Polygon
import json
from shapely.geometry import Polygon, mapping
import py3dep as pp
import statistics
from matplotlib import cm
import matplotlib.patches as mpatches
from scipy.stats import skew
from src.plotting_scripts import landtype

- https://www.mrlc.gov/data/nlcd-2019-land-cover-conus 
- https://www.mrlc.gov/data/legends/national-land-cover-database-class-legend-and-description 

## Create a DF for Mesonet

In [None]:
# This will return the most recent data avail on mesonet
# this is my file path
mesonet_data_path = "/home/aevans/nysm/archive/nysm/netcdf/proc"


# sort years
# find most recent year
dir_Year = os.listdir(f"{mesonet_data_path}")
data_point_Year = dir_Year[-1]
print(dir_Year)

# find most recent month
dir_Month = os.listdir(f"{mesonet_data_path}/{data_point_Year}")
data_point_Month = dir_Month[-1]

# this is your most recent directory
# don't use 'dir' because it will list attributes
dir1 = os.listdir(f"{mesonet_data_path}/{data_point_Year}/{data_point_Month}")

# this will give me the most recent data point
data_point = dir1[-1]

# this will return the year of the most recent data point
new_year = data_point[0:4]

# this will return the month of the most recent datapoint
new_month = data_point[4:6]

# this will return the day of the most recent datapoint
new_day = data_point[6:8]

In [None]:
# most recent year
dir_Year = os.listdir(f"{mesonet_data_path}")
sort_dir_Year = sorted(dir_Year)
data_point_Year = sort_dir_Year[-1]

In [None]:
# find most recent month
dir_Month = os.listdir(f"{mesonet_data_path}/{data_point_Year}")
sort_dir_Month = sorted(dir_Month)
data_point_Month = sort_dir_Month[-1]

In [None]:
# this is your directory for most recent year and month
most_recent = os.listdir(f"{mesonet_data_path}/{data_point_Year}/{data_point_Month}")

In [None]:
# most recent datapoint
sort_most_recent = sorted(most_recent)
data_point = sort_most_recent[-1]

In [None]:
# this will return the year of the most recent data point
new_year = data_point[0:4]

In [None]:
# this will return the month of the most recent datapoint
new_month = data_point[4:6]

In [None]:
# this will return the day of the most recent datapoint
new_day = data_point[6:8]

In [None]:
# create Mesonet DataFrame

# year
year = new_year

# month
month = new_month

# day
day = new_day

# file path
file = year + month + day + ".nc"

mesonet_df = (
    xr.open_dataset(f"{mesonet_data_path}/{year}/{month}/{file}")
    .to_dataframe()
    .reset_index()
)

In [None]:
mesonet_df

In [None]:
current_time_df = mesonet_df.dropna(subset=["tair"])

last_value = current_time_df["time_5M"].iat[-1]
hour = last_value.hour
minute = last_value.minute
second = last_value.second

string_hour = str(hour)
string_minute = str(minute)
string_sec = str(second)

# time
time = string_hour + ":" + string_minute + ":" + string_sec
mesonet_df.reset_index(inplace=True)

# creating a new dataframe that is centered on the location in the dataframe
mesonet_single_datetime_df = mesonet_df.loc[
    mesonet_df["time_5M"] == f"{year}-{month}-{day} {time}"
]
mesonet_single_datetime_df

## create lists to feed into nlcd finder

In [None]:
mesonet_single_datetime_df["longitude"] = mesonet_single_datetime_df["lon"].astype(
    float
)
mesonet_single_datetime_df["latitude"] = mesonet_single_datetime_df["lat"].astype(float)

In [None]:
longitude_list = mesonet_single_datetime_df["longitude"].to_list()
latitude_list = mesonet_single_datetime_df["latitude"].to_list()

In [None]:
mesonet_lon_lat_list = []

for x, _ in enumerate(longitude_list):
    longitudes = longitude_list[x]
    latitudes = latitude_list[x]
    tuple_edit = (longitudes, latitudes)
    mesonet_lon_lat_list.append(tuple_edit)

In [None]:
mesonet_lon_lat_list

In [None]:
lon_lat_df = pd.DataFrame()
lon_lat_df["lon"] = longitude_list
lon_lat_df["lat"] = latitude_list

In [None]:
# lon_lat_df.to_csv('nysm_coords.csv')

In [None]:
lulc = gh.nlcd_bycoords(mesonet_lon_lat_list).set_crs(epsg=4326)
lulc

In [None]:
# # analysis on data
lulc["cover_2019"].plot.hist(bins=80, rwidth=0.9, color="red")

In [None]:
lulc[["cover_2019"]].describe()

In [None]:
lulc["cover_2019"].mode()

In [None]:
lulc["cover_2019"].value_counts()

In [None]:
lulc["lon"] = longitude_list
lulc["lat"] = latitude_list

In [None]:
def scatterPlot(df, ax, lonW, lonE, latS, latN, fig):
    sc = df.plot.scatter(x="lon", y="lat", c="color", ax=ax, zorder=5)

    ax.legend()
    ax.set_extent([lonW, lonE, latS, latN], crs=projPC)
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS, linestyle="--")
    ax.add_feature(cfeature.LAKES, alpha=0.5)
    ax.add_feature(cfeature.STATES)
    ax.xticklabels_top = False
    ax.ylabels_right = False
    ax.gridlines(
        crs=crs.PlateCarree(),
        draw_labels=True,
        linewidth=2,
        color="black",
        alpha=0.5,
        linestyle="--",
    )

    # # to annotate map
    # for k, v in df.iterrows():
    #     ax.annotate(v.nlcd,xy=(v.lon,v.lat),
    #                 xytext=(10,-5), textcoords='offset points',
    #                 family='sans-serif', fontsize=10, color='darkslategrey')

In [None]:
# create colormap
colors = [
    "black",
    "blue",
    "white",
    "coral",
    "pink",
    "red",
    "magenta",
    "gray",
    "lime",
    "forestgreen",
    "green",
    "olive",
    "brown",
    "slategray",
    "darkorchid",
    "plum",
    "indigo",
    "purple",
    "yellow",
    "gold",
    "orange",
    "cyan",
]

legend = np.array(
    [
        0,
        11,
        12,
        21,
        22,
        23,
        24,
        31,
        41,
        42,
        43,
        45,
        51,
        52,
        71,
        72,
        73,
        74,
        81,
        82,
        90,
        95,
    ]
)

leg_str = [
    "No Data",
    "Open Water",
    "Perennial Ice/Snow",
    "Developed, Open Space",
    "Developed, Low Intensity",
    "Developed, Medium Intensity",
    "Developed High Intensity",
    "Barren Land (Rock/Sand/Clay)",
    "Deciduous Forest",
    "Evergreen Forest",
    "Mixed Forest",
    "Forest/Shrub",
    "Dwarf Scrub",
    "Shrub/Scrub",
    "Grassland/Herbaceous",
    "Sedge/Herbaceous",
    "Lichens",
    "Moss",
    "Pasture/Hay",
    "Cultivated Crops",
    "Woody Wetlands",
    "Emergent Herbaceous Wetlands",
]

In [None]:
len(colors)
colordict = {}
for x in range(22):
    colordict.update({legend[x]: colors[x]})

In [None]:
len(colors)
descripdict = {}
for x in range(22):
    descripdict.update({legend[x]: leg_str[x]})

In [None]:
lulc["color"] = lulc["cover_2019"].map(colordict)
lulc["nlcd"] = lulc["cover_2019"].map(descripdict)
lulc

In [None]:
new_cmap = ListedColormap(colors)

In [None]:
cmap = ListedColormap(colordict)

In [None]:
landtype.landtype(lulc)

In [None]:
lulc["nlcd"].value_counts()

# Oklahoma

In [None]:
# create dataframe for Oklahome
# Oklahoma_mesonet_filepath = '/home/aevans/landtype/NY_cartopy/csv_city/geoinfo.csv'
oklahoma_mesonet_df = pd.read_csv(Oklahoma_mesonet_filepath)
oklahoma_mesonet_df

In [None]:
len(oklahoma_mesonet_df)

In [None]:
ok_longitude_list = oklahoma_mesonet_df["elon"].to_list()
ok_latitude_list = oklahoma_mesonet_df["nlat"].to_list()

In [None]:
ok_mesonet_lon_lat_list = []

for x, _ in enumerate(oklahoma_mesonet_df["stid"]):
    longitudes = ok_longitude_list[x]
    latitudes = ok_latitude_list[x]
    tuple_edit = (longitudes, latitudes)
    ok_mesonet_lon_lat_list.append(tuple_edit)

In [None]:
ok_mesonet_lon_lat_list

In [None]:
ok_lon_lat_df = pd.DataFrame()
ok_lon_lat_df["lon"] = ok_longitude_list
ok_lon_lat_df["lat"] = ok_latitude_list

In [None]:
# ok_lon_lat_df.to_csv('ok_coords.csv')

In [None]:
lulc2 = gh.nlcd_bycoords(ok_mesonet_lon_lat_list)
lulc2

In [None]:
# # analysis on data
lulc2["cover_2019"].plot.hist(bins=80, rwidth=0.9, color="blue")

In [None]:
lulc2[["cover_2019"]].describe()

In [None]:
lulc2["cover_2019"].mode()

In [None]:
lulc2["cover_2019"].value_counts()

In [None]:
lulc2["lon"] = ok_longitude_list
lulc2["lat"] = ok_latitude_list

In [None]:
lulc2["color"] = lulc2["cover_2019"].map(colordict)
lulc2["nlcd"] = lulc2["cover_2019"].map(descripdict)
lulc2

In [None]:
lulc2["lat"].max()

In [None]:
projPC = crs.PlateCarree()
latN = lulc2["lat"].max()
latS = lulc2["lat"].min()
lonW = lulc2["lon"].max()
lonE = lulc2["lon"].min()
cLat = (latN + latS) / 2
cLon = (lonW + lonE) / 2
projLccOK = crs.LambertConformal(central_longitude=cLon, central_latitude=cLat)

fig, ax = plt.subplots(figsize=(12, 9), subplot_kw={"projection": crs.PlateCarree()})
ax.legend()
ax.set_extent([lonW, lonE, latS, latN], crs=projPC)
ax.add_feature(cfeature.LAND)
ax.add_feature(cfeature.COASTLINE)
ax.add_feature(cfeature.BORDERS, linestyle="--")
ax.add_feature(cfeature.LAKES, alpha=0.5)
ax.add_feature(cfeature.STATES)
ax.xticklabels_top = False
ax.ylabels_right = False
ax.gridlines(
    crs=crs.PlateCarree(),
    draw_labels=True,
    linewidth=2,
    color="black",
    alpha=0.5,
    linestyle="--",
)

plt.scatter(
    lulc2["lon"],
    lulc2["lat"],
    c=lulc2["color"],
    cmap=cmap,
    transform=crs.PlateCarree(),
    zorder=5,
)

# legend
patch1 = mpatches.Patch(color=colors[0], label=leg_str[0])
patch2 = mpatches.Patch(color=colors[1], label=leg_str[1])
patch3 = mpatches.Patch(color=colors[2], label=leg_str[2])
patch4 = mpatches.Patch(color=colors[3], label=leg_str[3])
patch5 = mpatches.Patch(color=colors[4], label=leg_str[4])
patch6 = mpatches.Patch(color=colors[5], label=leg_str[5])
patch7 = mpatches.Patch(color=colors[6], label=leg_str[6])
patch8 = mpatches.Patch(color=colors[7], label=leg_str[7])
patch9 = mpatches.Patch(color=colors[8], label=leg_str[8])
patch10 = mpatches.Patch(color=colors[9], label=leg_str[9])
patch11 = mpatches.Patch(color=colors[10], label=leg_str[10])
patch12 = mpatches.Patch(color=colors[11], label=leg_str[11])
patch13 = mpatches.Patch(color=colors[12], label=leg_str[12])
patch14 = mpatches.Patch(color=colors[13], label=leg_str[13])
patch15 = mpatches.Patch(color=colors[14], label=leg_str[14])
patch16 = mpatches.Patch(color=colors[15], label=leg_str[15])
patch17 = mpatches.Patch(color=colors[16], label=leg_str[16])
patch18 = mpatches.Patch(color=colors[17], label=leg_str[17])
patch19 = mpatches.Patch(color=colors[18], label=leg_str[18])
patch20 = mpatches.Patch(color=colors[19], label=leg_str[19])
patch21 = mpatches.Patch(color=colors[20], label=leg_str[20])
patch22 = mpatches.Patch(color=colors[21], label=leg_str[21])
plt.legend(
    bbox_to_anchor=(1.05, 1),
    loc="upper left",
    borderaxespad=0,
    handles=[
        patch1,
        patch2,
        patch3,
        patch4,
        patch5,
        patch6,
        patch7,
        patch8,
        patch9,
        patch10,
        patch11,
        patch12,
        patch13,
        patch14,
        patch15,
        patch16,
        patch17,
        patch18,
        patch19,
        patch20,
        patch21,
        patch22,
    ],
)

In [None]:
lulc2["nlcd"].value_counts()

# Analysis of area within range of landtype

In [None]:
# Testing Kara's Code
# these are the buffered polygons for 28 mesonet sites in meters (epsg=3310)
lulc["geometry"].to_crs(epsg=3310).buffer(2000)

In [None]:
# grab just one site as a sample - you will eventually want to loop over all the sites...
sample_geom = lulc["geometry"].to_crs(epsg=3310).buffer(20000).iloc[0]

In [None]:
sample_geom

In [None]:
# individual POLYGONs in geometry column are shapely objects, so you can use .bounds method on this object to get a tuple of (minx, miny, maxx, maxy).
min_x = sample_geom.bounds[0]
min_y = sample_geom.bounds[1]
max_x = sample_geom.bounds[2]
max_y = sample_geom.bounds[3]

In [None]:
# get all points linearly spaced within min/max values at resolution of 30 m
x_array = np.linspace(min_x, max_x, 30)
y_array = np.linspace(min_y, max_y, 30)

In [None]:
# now take the two X and Y arrays and create a meshgrid, so you get all of the inner points of the grid
X, Y = np.meshgrid(x_array, y_array)

In [None]:
##create dataframe from all X and Y values
all_points = pd.DataFrame({"lat": X.flatten(), "lon": Y.flatten()})

In [None]:
all_points

In [None]:
##convert the dataframe to a geopandas dataframe & make sure to assign crs as "meters" then convert to lat/lon
all_points_gdf = gpd.GeoDataFrame(
    all_points,
    geometry=gpd.points_from_xy(all_points.lat, all_points.lon),
    crs=3310,  # meters
)

In [None]:
all_points_gdf.to_crs(epsg=4326, inplace=True)  # change from meters to lat/lon

In [None]:
##these are all the points within the 30-km grid that surrounds the mesonet site
all_points_gdf

In [None]:
##but, we want within 30km of the site, so we want a circle with radius, not a square...
# so lets take our original buffer and exclude any points that exist outside that buffer
sample_geom_ll = (
    lulc["geometry"].to_crs(epsg=3310).buffer(2000).to_crs(epsg=4326).iloc[0]
)  # original buffer but in lat/lon

In [None]:
sample_geom_ll

In [None]:
# now, grab only the points that are within the buffer (sample_geom_ll)
all_points_in_buffer_gdf = all_points_gdf.loc[
    all_points_gdf["geometry"].within(sample_geom_ll) == True
].reset_index(drop=True)

In [None]:
##these are your points!
all_points_in_buffer_gdf

In [None]:
get_coords = all_points_in_buffer_gdf["geometry"]

In [None]:
get_coords.plot()

In [None]:
nlcd_analysis_list_1 = []
lulc_geo_1 = gpd.GeoSeries(lulc["geometry"])
analysis_df_2 = pd.DataFrame()
lon_lat_list_2 = []

In [None]:
my_length = len(get_coords) - 1

In [None]:
xx, yy = get_coords[my_length].coords.xy
analysis_df_2["lon"] = xx
analysis_df_2["lat"] = yy
longitude_list_analysis_1 = analysis_df_2["lon"].to_list()
latitude_list_analysis_1 = analysis_df_2["lat"].to_list()

In [None]:
new_longitudes = longitude_list_analysis_1[0]
new_latitudes = latitude_list_analysis_1[0]
tuple_edit = (new_longitudes, new_latitudes)
lon_lat_list_2.append(tuple_edit)

In [None]:
my_nlcd_list = []
lulc_geo_1 = gpd.GeoSeries(lulc["geometry"])
anal_df = pd.DataFrame()
lon_lat_list_3 = []
lon_list = []
lat_list = []

for i in range(my_length):
    xx, yy = get_coords[i].coords.xy
    anal_df["lon"] = xx
    anal_df["lat"] = yy
    longitude_list_analysis_2 = anal_df["lon"].to_list()
    latitude_list_analysis_2 = anal_df["lat"].to_list()
    my_lon = longitude_list_analysis_2[0]
    my_lat = latitude_list_analysis_2[0]
    tuple_edit = (my_lon, my_lat)
    lon_lat_list_3.append(tuple_edit)
    lon_list.append(my_lon)
    lat_list.append(my_lat)

# print(len(latitude_list_analysis_2))
# longitudes_1 = longitude_list_analysis_2[i]
# latitudes_1 = latitude_list_analysis_2[i]
# tuple_edit = (longitudes_1, latitudes_1)
# lon_lat_list_3.append(tuple_edit)

In [None]:
lon_lat_list_4 = []

for i in range(my_length):
    xx, yy = get_coords[i].coords.xy
    my_lon = xx[0]
    my_lat = yy[0]
    tuple_edit = (my_lon, my_lat)
    lon_lat_list_4.append(tuple_edit)

In [None]:
lon_lat_list_4

In [None]:
anal_df1 = pd.DataFrame()
anal_df1["lons"] = lon_list
anal_df1["lats"] = lat_list
anal_df1

In [None]:
def scatterPlot(df, ax, lonW, lonE, latS, latN, fig):
    sc = df.plot.scatter(x="lons", y="lats", c="r", ax=ax, zorder=5)

    ax.legend()
    ax.set_extent([lonW, lonE, latS, latN], crs=projPC)
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS, linestyle="--")
    ax.add_feature(cfeature.LAKES, alpha=0.5)
    ax.add_feature(cfeature.STATES)
    ax.xticklabels_top = False
    ax.ylabels_right = False
    ax.gridlines(
        crs=crs.PlateCarree(),
        draw_labels=True,
        linewidth=2,
        color="black",
        alpha=0.5,
        linestyle="--",
    )

In [None]:
projPC = crs.PlateCarree()
latN = anal_df1["lats"].max()
latS = anal_df1["lats"].min()
lonW = anal_df1["lons"].max()
lonE = anal_df1["lons"].min()
cLat = (latN + latS) / 2
cLon = (lonW + lonE) / 2
projLccNY = crs.LambertConformal(central_longitude=cLon, central_latitude=cLat)

fig, ax = plt.subplots(figsize=(12, 9), subplot_kw={"projection": crs.PlateCarree()})

scatterPlot(anal_df1, ax, lonW, lonE, latS, latN, fig)

In [None]:
lon_lat_list_3.sort()

In [None]:
lulc_analysis_12 = gh.nlcd_bycoords(lon_lat_list_3)

In [None]:
lulc_analysis_12["cover_2019"].value_counts()

In [None]:
# # analysis on data
lulc_analysis_12["cover_2019"].plot.hist(bins=80, rwidth=0.9, color="blue")

In [None]:
lulc_analysis_12[["cover_2019"]].describe()

In [None]:
lulc_analysis_12["cover_2019"].mode()

In [None]:
mesonet_single_datetime_df["elev"].iloc[0]

In [None]:
elevations = pp.elevation_bycoords(mesonet_lon_lat_list)
elevations

In [None]:
# A large variance indicates that the data is spread out, - a small variance indicates that the data is clustered closely around the mean.
statistics.pvariance(elevations)

In [None]:
statistics.pstdev(elevations)

In [None]:
statistics.mode(elevations)

In [None]:
statistics.mean(elevations)

In [None]:
# For normally distributed data, the skewness should be about zero. For unimodal continuous distributions, a skewness value greater than zero means that there is more weight in the right tail of the distribution.

skew(elevations)

In [None]:
max(elevations)

In [None]:
min(elevations)