# The code below reads in a NYSM NetCDF datafile as a Pandas Dataframe, and creates a plot on a map of 4 variables, with the most recent date and time.
Each cell below contains hints in the comments. Follow the hints and you should be able to produce the final image!

import all the libraries you will need
some refs that might help:
- https://scitools.org.uk/cartopy/docs/v0.13/matplotlib/gridliner.html
- https://towardsdatascience.com/plotting-geospatial-data-with-cartopy-4b5ad0da0761
- https://blog.jpolak.org/?p=2158
- https://foundations.projectpythia.org/core/cartopy/cartopy.html

In [None]:
#conda install -c conda-forge cartopy

In [None]:
#conda install -c conda-forge eccodes

In [None]:
import pandas as pd
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import os
import cfgrib
from datetime import datetime, timedelta, date
import cartopy.crs as crs
import cartopy.feature as cfeature
from scipy.interpolate import griddata

## Define the mesonet data path

In [None]:
# this is my file path
mesonet_data_path = '/home/aevans/NYSM/archive/nysm/netcdf/proc'

get the current date time (hint, this value will change everytime you run this cell, so only rerun when you need to!

# Get current date and time

In [None]:
today = date.today()
now = datetime.now()
print(today)
print(now)

# function to round time to correct format

In [None]:
# to get minutes in correct format
def minute_rounder(t):
    # Rounds to nearest hour by adding a timedelta hour if minute >= 30
    minute = t.minute 
    if (minute)%5 != 0:
        newMinute = abs((t.minute % 5) - t.minute)
    else: 
        newMinute = t.minute
    
    return (t.replace(second=t.second, microsecond=t.microsecond, minute=newMinute, hour=t.hour))


print(now)
print(minute_rounder(now))

In [None]:
# this is my file path
mesonet_data_path = '/home/aevans/NYSM/archive/nysm/netcdf/proc'

# year
year = now.strftime("%Y")

# month
month = now.strftime("%m")

# day
day = now.strftime("%d")

print(year, month, day)

 - get only the dataframe rows that match the date & time you are looking for
- NOTE that you do not want to hard code this, as in the future you may want to make the date/time search dynamic!
- ALSO NOTE that the current time will not have an exact match in the file. So you will need to round to the nearest 5 minutes
 - ONE MORE NOTE that it takes time for the most recent mesonet data to be added to the file, so I suggest finding the rows that are CLOSEST to your current time

# Create dynamic filepath

In [None]:
# This will return the most recent data avail on mesonet 
# sort years
# find most recent year
dir_Year = sorted([int(f) for f in os.listdir(f"{mesonet_data_path}")])
data_point_Year = dir_Year[-1]
print(dir_Year)

# find most recent month
dir_Month = os.listdir(f"{mesonet_data_path}/{data_point_Year}")
data_point_Month = dir_Month[-1]

#this is your most recent directory
#don't use 'dir' because it will list attributes
dir1 = os.listdir(f"{mesonet_data_path}/{data_point_Year}/{data_point_Month}")

# this will give me the most recent data point
data_point = dir1[-1]

# this will return the year of the most recent data point
new_year = data_point[0:4]

# this will return the month of the most recent datapoint
new_month = data_point[4:6]

# this will return the day of the most recent datapoint
new_day = data_point[6:8]

# open the correct mesonet file (make this dynamic. Do not hardcode a specific date/time!)

In [None]:
# this is my file path
mesonet_data_path = '/home/aevans/NYSM/archive/nysm/netcdf/proc'

# year
year = new_year

# month
month = new_month

# day
day = new_day

# file path
file = year+month+day+'.nc'

mesonet_df = xr.open_dataset(f"{mesonet_data_path}/{year}/{month}/{file}").to_dataframe().reset_index()
mesonet_df

# This is to determine the most current time-stamp

In [None]:
current_time_df = mesonet_df.dropna(subset=['tair'])

last_value = current_time_df['time_5M'].iat[-1]
hour = last_value.hour
minute = last_value.minute
second = last_value.second

string_hour = str(hour)
string_minute = str(minute)
string_sec = str(second)

time = string_hour+':'+string_minute+':'+string_sec
print(time)

#get only the dataframe rows that match the date & time you are looking for
#NOTE that you do not want to hard code this, as in the future you may want to make the date/time search dynamic!
#ALSO NOTE that the current time will not have an exact match in the file. So you will need to round to the nearest 5 minutes
#ONE MORE NOTE that it takes time for the most recent mesonet data to be added to the file, so I suggest finding the rows that are CLOSEST to your current time

In [None]:
mesonet_df.reset_index(inplace=True)

# creating a new dataframe that is centered on the location in the dataframe
mesonet_single_datetime_df = mesonet_df.loc[mesonet_df['time_5M']==f"{year}-{month}-{day} {time}"] 
print(mesonet_single_datetime_df.head())

#this should result in a df with 126 rows
print(len(mesonet_single_datetime_df.index))

mesonet_single_datetime_df

In [None]:
mesonet_single_datetime_df.head(15)

# now with a DF with 126 rows at the correct time, lets make a visualization with 4 subplots, of 4 variables, plotted on NYS!

In [None]:
def scatterPlot(df, c_label, s_label, color, ax, lonW, lonE, latS, latN):    
    df.plot.scatter(x='lon', y='lat',c= c_label,s=s_label, colormap = color, ax = ax)
    ax.set_extent([lonW, lonE, latS, latN], crs=projPC)
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS, linestyle='--');
    ax.add_feature(cfeature.LAKES, alpha=0.5)
    ax.add_feature(cfeature.STATES)
    ax.xticklabels_top = False
    ax.ylabels_right = False
    ax.gridlines(crs=crs.PlateCarree(), draw_labels=True,
                  linewidth=2, color='black', alpha=0.5, linestyle='--')

In [None]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows = 2, 
                                             ncols = 2, 
                                             figsize=(20,15), 
                                             subplot_kw={'projection': crs.PlateCarree()})


projPC = crs.PlateCarree()
latN = mesonet_single_datetime_df['lat'].max()
latS = mesonet_single_datetime_df['lat'].min()
lonW = mesonet_single_datetime_df['lon'].max()
lonE = mesonet_single_datetime_df['lon'].min()
cLat = (latN + latS) / 2
cLon = (lonW + lonE) / 2
projLccNY = crs.LambertConformal(central_longitude=cLon, central_latitude=cLat)


scatterPlot(mesonet_single_datetime_df, 'tair', 'tair', 'jet', ax1, lonW, lonE, latS, latN)
scatterPlot(mesonet_single_datetime_df, 'wspd_merge', 'wspd_merge', 'jet', ax2, lonW, lonE, latS, latN)
scatterPlot(mesonet_single_datetime_df, 'pres', 'pres', 'jet', ax3, lonW, lonE, latS, latN)
scatterPlot(mesonet_single_datetime_df, 'precip', 'precip', 'jet', ax4, lonW, lonE, latS, latN)

In [None]:
contour_data = mesonet_single_datetime_df[['lat', 'lon', 'elev']].copy()

# create variables for indexing purposes 
Long = contour_data['lon']; 
Lat = contour_data['lat']; 
Elev = contour_data['elev']; 

# creating a matrix to reference for z-points
[x, y] = np.meshgrid(np.linspace(np.min(Long), np.max(Long), 124),
                 np.linspace(np.min(Lat),np.max(Lat),124));

# calculate z points using a linear method 
z = griddata((Long, Lat), Elev, (x, y), method='linear');
x = np.matrix.flatten(x); #Gridded longitude
y = np.matrix.flatten(y); #Gridded latitude
z = np.matrix.flatten(z); #Gridded elevation


# cartopy 
projPC = crs.PlateCarree()
latN = y.max()
latS = y.min()
lonW = x.max()
lonE = x.min()
cLat = (latN + latS) / 2
cLon = (lonW + lonE) / 2
projLccNY = crs.LambertConformal(central_longitude=cLon, central_latitude=cLat)

fig = plt.figure(figsize=(15, 10))
ax = plt.subplot(1, 1, 1, projection=projLccNY)
ax.set_extent([lonW, lonE, latS, latN], crs=projPC)
ax.add_feature(cfeature.LAND)
ax.add_feature(cfeature.COASTLINE)
ax.add_feature(cfeature.BORDERS, linestyle='--')
ax.add_feature(cfeature.LAKES, alpha=0.5)
ax.add_feature(cfeature.STATES)
ax.set_title('New York and Vicinity');
ax.gridlines(crs=crs.PlateCarree(), draw_labels=True,
                  linewidth=2, color='gray', alpha=0.5, linestyle='--')
ax.xlabels_top = False
ax.ylabels_left = True



plt.title('Mesonet Site Topography map')
plt.xlabel('Longitude [°]')
plt.ylabel('Latitude [°]')
plt.gca().set_aspect('equal')
plt.scatter(x,y,10,z,cmap='terrain', transform=crs.PlateCarree())
plt.colorbar(label='Elevation above sea level [m]')

# how can I make it so masks not NY land?
# false data points to make map nicer
#shapely file to mask?