In [1]:
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import os
import geopandas as gpd
from netCDF4 import Dataset
import pathlib

In [3]:
insitu_dir = 'qa/'

In [9]:
def load_pillow_nc(insitu_dir):
    obs_data = []
    for file in sorted(os.listdir(insitu_dir)):
        if file.endswith('.nc'):
            # Open the NetCDF file
            file_path = os.path.join(insitu_dir, file)
            dataset = Dataset(file_path, mode='r')
            
            # Extract data from the NetCDF file
            # Assuming you want to extract a specific variable, replace 'your_variable_name' with the actual variable name
            variable_name = 'your_variable_name'  # Replace with the actual variable name
            if variable_name in dataset.variables:
                data = dataset.variables[variable_name][:]
                obs_data.append(data)
            
            # Close the dataset
            dataset.close()
    insitu_locations = None
    for file in sorted(os.listdir(insitu_dir)):
        if 'obs_summary.shp' in file:
            insitu_locations = gpd.read_file(os.path.join(insitu_dir, file))
            insitu_locations = insitu_locations.set_crs('EPSG:4326')
            insitu_locations.rename(columns={'elevation_': 'elevation_m'}, inplace=True)
    return obs_data, insitu_locations
# Load data
obs_data, insitu_locations = load_pillow_nc(insitu_dir)

In [10]:
insitu_locations

Unnamed: 0,name,id,network,elevation_m,latitude,longitude,matching_f,median_ele,median_cou,missing_fl,corr_elev_,pil_elev_f,corr_ele_1,geometry
0,AGNEW PASS,AGP,cdec,2880.36,37.726631,-119.141731,47,9107.738281,1676,26,9718.852539,9450.000302,8614.581055,POINT (-119.14173 37.72663)
1,DANA MEADOWS,DAN,cdec,2987.04,37.896162,-119.25726,72,9070.583984,66362,1,9648.130859,9800.000314,8419.398438,POINT (-119.25726 37.89616)
2,DEADMAN CREEK,DDM,cdec,2819.4,38.331596,-119.654114,71,9590.981445,14464,2,10183.792969,9250.000296,8981.185547,POINT (-119.65411 38.3316)
3,DEVILS POSTPILE,DPO,cdec,2307.0312,37.62941,-119.084671,42,8925.460938,12876,31,9504.759766,7569.000242,8324.117188,POINT (-119.08467 37.62941)
4,VOGELSANG,FLV,cdec,3076.956,37.794571,-119.347404,15,9347.349609,184058,58,9986.211914,10095.000323,8637.858398,POINT (-119.3474 37.79457)
5,GEM PASS,GEM,cdec,3276.6,37.78,-119.17,60,9919.65625,26,13,11466.859375,10750.000344,8985.027344,POINT (-119.17 37.78)
6,GIN FLAT,GIN,cdec,2148.84,37.766887,-119.774907,62,8582.592773,62091,11,9242.079102,7050.000226,7926.728027,POINT (-119.77491 37.76689)
7,GIANELLI MEADOW,GNL,cdec,2560.32,38.204308,-119.893188,71,9255.415039,19338,2,9722.65918,8400.000269,8768.225586,POINT (-119.89319 38.20431)
8,GREEN MOUNTAIN,GRM,cdec,2407.92,37.549599,-119.232559,66,8333.356445,70845,7,9025.874023,7900.000253,7478.253418,POINT (-119.23256 37.5496)
9,HORSE MEADOW,HRS,cdec,2560.32,38.158,-119.662,66,9460.96875,126408,7,10033.056641,8400.000269,8834.308594,POINT (-119.662 38.158)


In [8]:
insitu_dir = 'USCATM/qa/'

In [11]:
insitu_locations.to_csv('tm_pillow_locations.csv', index=False)

In [3]:
# Specify the path to your CSV file
csv_file_path = 'USCASJ/total.csv'
# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)
# Print the first few rows of the DataFrame
print(df.head())

         time  aso_mean_bins_mm       AGP       BCB       BGP      BSH  \
0  2017-01-29        544.581278  1054.862  1688.084   570.992  623.316   
1  2017-03-09        844.992791  1382.522  2393.950  1071.372  887.730   
2  2017-04-02        770.517893  1421.892  2409.444  1116.584  901.192   
3  2017-04-30        826.494185       NaN       NaN  1060.958  825.500   
4  2017-06-06        416.856399       NaN       NaN    34.798    0.000   

        CHM       DAN      DPO  FLV  ...      SLK       STL  STR      SWM  \
0   819.912   867.664  945.388  NaN  ...  585.216   970.280  NaN  584.962   
1  1261.872  1248.664  964.946  NaN  ...  883.920  1533.398  NaN  848.868   
2  1197.864  1200.404  737.362  NaN  ...  859.536  1639.062  NaN  824.992   
3  1036.320  1244.600  515.112  NaN  ...  798.576  1772.666  NaN  881.634   
4     6.096   705.104    0.000  NaN  ...   15.240   381.254  0.0   23.876   

        TMR       TNY       TUM       UBC       VLC       WWC  
0   710.184  1051.306   736.

In [6]:
def get_all_and_baseline_pils(summary_table_fpath):
    # Load summary table
    df_summary_table = pd.read_csv(summary_table_fpath)
    
    # Convert time to datetime object
    df_summary_table['time'] = pd.to_datetime(df_summary_table['time'])
    
    # Create list of all pillows
    all_pils = df_summary_table.columns.to_list()
    all_pils.remove('time')
    all_pils.remove('aso_mean_bins_mm')
    
    # Identify baseline pillows
    df_year = df_summary_table.groupby(df_summary_table.time.dt.year)[all_pils].sum()
    pillow_w_flight_per_year = df_year.replace(0, pd.NA).dropna(axis=1,how='any').columns.to_list()
    pillows_cols = pillow_w_flight_per_year.copy()
    pillows_cols.append('time')
    slice_df = df_summary_table[pillows_cols]
    valid_time = slice_df.dropna(axis=0, how='any').time.values
    slice_df = df_summary_table[df_summary_table['time'].isin(valid_time)].dropna(axis=1, how='any')
    baseline_pils = slice_df.columns.to_list()
    baseline_pils.remove('time')
    baseline_pils.remove('aso_mean_bins_mm')
    
    return all_pils, baseline_pils

In [19]:
summary_table_fpath = 'USCASJ/total.csv'
df = get_all_and_baseline_pils(summary_table_fpath)
all_pils = pd.DataFrame(df[0], columns=['id'])
baseline_pils = pd.DataFrame(df[1], columns=['id'])

In [20]:
all_pils

Unnamed: 0,id
0,AGP
1,BCB
2,BGP
3,BSH
4,CHM
5,DAN
6,DPO
7,FLV
8,GEM
9,GRM


In [27]:
insitu_dir = 'qa/'

# Function to load snow pillow data and locations
def load_pillow_nc(insitu_dir):
    obs_data = []
    for file in sorted(os.listdir(insitu_dir)):
        if file.endswith('.nc'):
            # Open the NetCDF file
            file_path = os.path.join(insitu_dir, file)
            dataset = Dataset(file_path, mode='r')
            
            # Extract data from the NetCDF file
            # Assuming you want to extract a specific variable, replace 'your_variable_name' with the actual variable name
            variable_name = 'your_variable_name'  # Replace with the actual variable name
            if variable_name in dataset.variables:
                data = dataset.variables[variable_name][:]
                obs_data.append(data)
            
            # Close the dataset
            dataset.close()
    insitu_locations = None
    for file in sorted(os.listdir(insitu_dir)):
        if 'obs_summary.shp' in file:
            insitu_locations = gpd.read_file(os.path.join(insitu_dir, file))
            insitu_locations = insitu_locations.set_crs('EPSG:4326')
            insitu_locations.rename(columns={'elevation_': 'elevation_m'}, inplace=True)
    return obs_data, insitu_locations
# Load data
obs_data, insitu_locations = load_pillow_nc(insitu_dir)
# Extract longitude and latitude
if insitu_locations is not None:
    insitu_locations['longitude'] = insitu_locations.geometry.x
    insitu_locations['latitude'] = insitu_locations.geometry.y
# Display the DataFrame with longitude and latitude
insitu_locations_df = insitu_locations[['id', 'longitude', 'latitude', 'elevation_m']]

In [28]:
insitu_locations_df

Unnamed: 0,id,longitude,latitude,elevation_m
0,AGP,-119.141731,37.726631,2880.36
1,BCB,-118.77301,37.066685,3139.44
2,BGP,-118.476967,37.127815,2987.04
3,BSH,-118.557,37.1,3413.76
4,CHM,-119.492188,37.40839,2179.32
5,DAN,-119.25726,37.896162,2987.04
6,DPO,-119.084671,37.62941,2307.0312
7,FLV,-119.347404,37.794571,3076.956
8,GEM,-119.17,37.78,3276.6
9,GRM,-119.232559,37.549599,2407.92


In [29]:
all_pils_check = pd.merge(insitu_locations_df, all_pils, on='id')
all_pils_check

Unnamed: 0,id,longitude,latitude,elevation_m
0,AGP,-119.141731,37.726631,2880.36
1,BCB,-118.77301,37.066685,3139.44
2,BGP,-118.476967,37.127815,2987.04
3,BSH,-118.557,37.1,3413.76
4,CHM,-119.492188,37.40839,2179.32
5,DAN,-119.25726,37.896162,2987.04
6,DPO,-119.084671,37.62941,2307.0312
7,FLV,-119.347404,37.794571,3076.956
8,GEM,-119.17,37.78,3276.6
9,GRM,-119.232559,37.549599,2407.92


In [31]:
base_pils_check = pd.merge(insitu_locations_df, baseline_pils, on='id')
base_pils_check

Unnamed: 0,id,longitude,latitude,elevation_m
0,CHM,-119.492188,37.40839,2179.32
1,DAN,-119.25726,37.896162,2987.04
2,KSP,-119.103371,37.29818,2804.16
3,PSR,-119.520813,37.402821,2103.12
4,RCK,-118.735023,37.457275,2956.56
5,SLK,-118.56266,37.175903,2926.08
6,STL,-118.57325,36.926483,3169.92
7,SWM,-118.562592,37.161964,3108.96
8,TMR,-119.200531,37.16375,2301.24
9,TNY,-119.449875,37.837581,2484.12


In [32]:
source='https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/us-states.json'

In [33]:
source

'https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/us-states.json'

In [12]:
from bokeh.plotting import figure, show
from bokeh.tile_providers import get_provider, Vendors

# Use a pre-configured tile provider
tile_provider = get_provider(Vendors.CARTODBPOSITRON)

# Create the plot
p = figure(x_range=(-2000000, 6000000), y_range=(-1000000, 7000000),
           x_axis_type="mercator", y_axis_type="mercator")
p.add_tile(tile_provider)

show(p)

In [13]:
# Load the data from the CSV file
df = pd.read_csv('sj_pillow_locations.csv')

In [14]:
df

Unnamed: 0,name,id,network,elevation_m,latitude,longitude,geometry
0,AGNEW PASS,AGP,cdec,2880.36,37.726631,-119.141731,POINT (-119.141731 37.726631)
1,BLACKCAP BASIN,BCB,cdec,3139.44,37.066685,-118.77301,POINT (-118.77301 37.066685)
2,BIG PINE CREEK,BGP,cdec,2987.04,37.127815,-118.476967,POINT (-118.476967 37.127815)
3,BISHOP PASS,BSH,cdec,3413.76,37.1,-118.557,POINT (-118.557 37.1)
4,CHILKOOT MEADOW,CHM,cdec,2179.32,37.40839,-119.492188,POINT (-119.492188 37.40839)
5,DANA MEADOWS,DAN,cdec,2987.04,37.896162,-119.25726,POINT (-119.25726 37.896162)
6,DEVILS POSTPILE,DPO,cdec,2307.0312,37.62941,-119.084671,POINT (-119.084671 37.62941)
7,VOGELSANG,FLV,cdec,3076.956,37.794571,-119.347404,POINT (-119.347404 37.794571)
8,GEM PASS,GEM,cdec,3276.6,37.78,-119.17,POINT (-119.17 37.78)
9,GREEN MOUNTAIN,GRM,cdec,2407.92,37.549599,-119.232559,POINT (-119.232559 37.549599)


In [46]:
import numpy as np
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
from bokeh.tile_providers import get_provider, Vendors
from bokeh.models import WMTSTileSource
# Define the coordinates for some cities in California

def web_mercartor(df, lon="lon", lat="lat"):
    k = 6378137
    df["x"] = df[lon] * (k * np.pi/180.0)
    df["y"] = np.log(np.tan((90 + df[lat]) * np.pi /360)) * k
    return df

california_cities = {
    'City': ['San Francisco', 'Los Angeles', 'San Diego', 'Sacramento'],
    'lat': [37.7749, 34.0522, 32.7157, 38.5816],
    'lon': [-122.4194, -118.2437, -117.1611, -121.4944]
}

df = pd.DataFrame(california_cities)
map_df = web_mercartor(df)
print(map_df.head())

p = figure(title="California Cities", tools="pan,wheel_zoom,reset", 
           x_axis_type="mercator", y_axis_type="mercator",
           width=500, height=500)
# Create a ColumnDataSource
# source = ColumnDataSource(data=california_cities)
# Get the tile provider
tile_provider = get_provider(Vendors.CARTODBPOSITRON)
url = "http://a.basemaps.cartocdn.com/rastertiles/voyager/{Z}/{X}/{Y}.png"

# Add the tile provider
p.add_tile(WMTSTileSource(url=url))
#p.add_tile(tile_provider)
# Plot the cities
p.circle(x=df["x"], y=df["y"], size=10)
# Show the plot
show(p)

            City      lat       lon             x             y
0  San Francisco  37.7749 -122.4194 -1.362767e+07  4.547675e+06
1    Los Angeles  34.0522 -118.2437 -1.316283e+07  4.035813e+06
2      San Diego  32.7157 -117.1611 -1.304231e+07  3.857628e+06
3     Sacramento  38.5816 -121.4944 -1.352469e+07  4.661915e+06


In [19]:
california_cities['Latitude']

[37.7749, 34.0522, 32.7157, 38.5816]

In [20]:
california_cities['Longitude']

[-122.4194, -118.2437, -117.1611, -121.4944]

In [34]:
sample_1 = 37.7749

numpy_radian = np.radians(sample_1)
formula_2 = sample_1 * (6378137 * np.pi/180.0)
formula_3 = np.log(np.tan((90 + sample_1) * np.pi / 360)) * 6378137

In [35]:
print(numpy_radian, formula_2, formula_3)

0.659296379611606 4205082.632766831 4547675.354340559


In [39]:
df["lat"]

0    37.7749
1    34.0522
2    32.7157
3    38.5816
Name: lat, dtype: float64

In [43]:
df

Unnamed: 0,City,lat,lon,x,y
0,San Francisco,37.7749,-122.4194,-13627670.0,4547675.0
1,Los Angeles,34.0522,-118.2437,-13162830.0,4035813.0
2,San Diego,32.7157,-117.1611,-13042310.0,3857628.0
3,Sacramento,38.5816,-121.4944,-13524690.0,4661915.0


In [52]:
sj_pillow_df = pd.read_csv('sj_pillow_locations.csv')

# Covert coordinates to Mercarto Projection
def coor_conv(df, lon="longitude", lat="latitude"):
    k = 6378137
    df["x"] = df[lon] * (k * np.pi/180.0)
    df["y"] = np.log(np.tan((90 + df[lat]) * np.pi /360)) * k
    return

# Covert
coor_conv(sj_pillow_df)

In [53]:
sj_pillow_df

Unnamed: 0,name,id,network,elevation_m,latitude,longitude,geometry,x,y
0,AGNEW PASS,AGP,cdec,2880.36,37.726631,-119.141731,POINT (-119.141731 37.726631),-13262800.0,4540880.0
1,BLACKCAP BASIN,BCB,cdec,3139.44,37.066685,-118.77301,POINT (-118.77301 37.066685),-13221750.0,4448406.0
2,BIG PINE CREEK,BGP,cdec,2987.04,37.127815,-118.476967,POINT (-118.476967 37.127815),-13188800.0,4456938.0
3,BISHOP PASS,BSH,cdec,3413.76,37.1,-118.557,POINT (-118.557 37.1),-13197700.0,4453055.0
4,CHILKOOT MEADOW,CHM,cdec,2179.32,37.40839,-119.492188,POINT (-119.492188 37.40839),-13301810.0,4496185.0
5,DANA MEADOWS,DAN,cdec,2987.04,37.896162,-119.25726,POINT (-119.25726 37.896162),-13275660.0,4564767.0
6,DEVILS POSTPILE,DPO,cdec,2307.0312,37.62941,-119.084671,POINT (-119.084671 37.62941),-13256440.0,4527205.0
7,VOGELSANG,FLV,cdec,3076.956,37.794571,-119.347404,POINT (-119.347404 37.794571),-13285690.0,4550446.0
8,GEM PASS,GEM,cdec,3276.6,37.78,-119.17,POINT (-119.17 37.78),-13265940.0,4548394.0
9,GREEN MOUNTAIN,GRM,cdec,2407.92,37.549599,-119.232559,POINT (-119.232559 37.549599),-13272910.0,4515993.0


In [55]:
smaller_df = pd.read_csv('sj_pillow_qa_table.csv')

In [56]:
smaller_df.head()

Unnamed: 0,time,AGP,BCB,BGP,BSH,CHM,DAN,DPO,FLV,GEM,...,SNF,STL,STR,SWM,TMR,TNY,TUM,UBC,VLC,WWC
0,2012-10-01,,97.79,0.0,0.0,0.0,0.0,0.0,,0.0,...,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2012-10-02,,97.536,0.0,0.0,0.0,0.0,0.0,,0.0,...,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2012-10-03,2.032,97.028,0.0,0.0,0.0,0.0,0.0,,0.0,...,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2012-10-04,0.0,96.52,0.0,0.0,0.0,0.0,0.0,,0.0,...,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2012-10-05,0.0,97.79,0.0,0.0,0.0,0.0,0.0,,0.0,...,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
