Import relevant Libaries 

In [None]:
!pip install pyarrow

In [1]:
# Base libraries
import pandas as pd
import numpy as np
import os
import math as math
import datetime
from scipy import stats

# Visualizations
import plotly.express as px
import plotly.graph_objects as go

Set WD

In [2]:
import os
os.getcwd()
os.chdir('C:\\Users\\nifu18ab\\Desktop\\GEUS-Master-Thesis')

**Helper Functions**

In [89]:
def _load_data(data):
    # Ignore Warnings
    import warnings
    warnings.filterwarnings('ignore')

    # Load data
    if data == "GC Net":
        df = pd.read_parquet('data\df_daily.gzip', engine='pyarrow')
    elif data == "Promice":
        df = pd.read_parquet('data\promice_daily.gzip', engine='pyarrow')
    else: 
        raise ValueError("Only 'GC Net' & 'Promice' are accepted input values")
    
    return df

def _align_GC_PR():
    station = "file"
    datetime = "Datetime"
    dayofcentury = "DayOfCentury"
    dayofyear = 'DayOfYear'
    return station,datetime,dayofcentury,dayofyear

def _exclude():
    # List of columns to exclude from percentile calculation
    exclude = ['Year', 'MonthOfYear', 'DayOfMonth', 'HourOfDay(UTC)', 
               'DayOfYear', 'LongitudeGPS(degW)','HeightStakes(m)',
               'DayOfCentury', 'WindDirection(d)', 'TiltToEast(d)', 
               'TiltToNorth(d)', 'TimeGPS(hhmmssUTC)', 'LatitudeGPS(degN)', 
               'ElevationGPS(m)', 'HorDilOfPrecGPS', 'LoggerTemperature(C)',
               'FanCurrent(mA)', 'BatteryVoltage(V)', 'Month', 'Day', 'Hour',

              'air_temperature_1_max', 'air_temperature_1_min',
              'wind_speed_u1_max','wind_speed_u2_max',
              'wind_from_direction_1', 'wind_from_direction_2', 
              'height_wind_sensor_1', 'height_wind_sensor_2', 'battery_voltage',
              'shortwave_incoming_radiation_max',
              'shortwave_incoming_radiation_stdev', 'net_radiation_stdev',
              'air_temperature_2_max', 'air_temperature_2_min', 
              'wind_speed_u2_stdev', 'ref_temperature',   'wind_speed_u1_stdev',
              'net_radiation_maximum', 'season', 'year', 'month', 'DayOfYear',
              'DayOfCentury']
    return exclude

def _subset_df(date, df, station, datetime, measurement, dayofcentury, dayofyear, aws):
    exclude = _exclude()
    
    # Split date input into year, month, day
    year = int(date[0:4])
    month = int(date[5:7])
    day = int(date[8:10])

    # Subset df with date
    date_df = df.loc[(df['Datetime'].dt.year == year) & (df['Datetime'].dt.month == month) & (df['Datetime'].dt.day == day)]
    
    # Find the day of year
    day =  date_df["DayOfYear"].mean()
    
    # find day of century
    day_century = date_df[dayofcentury].mean() 
    
    #Create a list of relevant columns
    columns = df.select_dtypes(include=[np.number]).columns.difference(exclude)

    ## Subset by measurement
        #df = df[[station,datetime, measurement, dayofcentury,dayofyear]]
    if measurement == "All":
        df = df
    elif measurement in columns:
        # subset by measurement
        df = df[[station,datetime, measurement, dayofcentury, dayofyear]]
        #Update list of relevant column
        columns = df.select_dtypes(include=[np.number]).columns.difference(exclude)
    else: 
        raise ValueError(f"The input for the variable 'measurement' was not recognizable. Please use one of the following options: {columns}")
    
    ## Subset by station
        #df = df.loc[df[station] == aws]
    #Create a list of unique files (stations) from the dataset 
    unique_files = list(df[station].unique())
    if aws == "All":
        df = df
    elif aws in unique_files:
        # filter by aws
        df = df.loc[df[station] == aws]
        #Update list of unique files (stations) from the dataset 
        unique_files = list(df[station].unique())
    else: 
        raise ValueError(f"The input for the variable 'aws' was not recognizable. Please use one of the following options: {unique_files}")
    
    return day, df,columns, day_century, unique_files

def _subset_scope(scope, df,day, dayofyear):
    if scope == "Relative":
        # filter by calender day
        df = df.loc[df[dayofyear] == day]
    elif scope == "Absolute":
        df = df
    else: 
        raise ValueError("The input for the variable 'scope' was not recognizable. Please use one of the following options: 'Relative' , 'Absolute'")
    return df

def _percentiles(df, unique_files, station, columns,dayofcentury,day_century):
        #Create an empty list to hold the percentile values
    percentiles = []

    print("Calculating Percentiles .... ")
    #Loop through each file in the unique_files list
    for i, file in enumerate(unique_files):
        
        #Calculate the percentile of each numerical column for the specified datetime
        df_file = df[df[station] == file]

        #Create an empty dictionary
        percentile_dict = {}
        
        for col in columns:
          #Looping through each row of the dataframe
          for index, row in df_file.iterrows():
            row_date = row[dayofcentury]
            if row_date == day_century and ~ np.isnan(row[col]):
              # Retrieving Value 
              value = row[col]
              # Excluding NAN's for the calculation
              col_list = df_file[col].dropna().values.tolist()
              # Calculate the Percentiles
              percentile = stats.percentileofscore(col_list,value)
              #Count the number of values 
              count = len(col_list)
              # Assign file, value and, percentile to dictionary
              percentile_dict[col] = row[col]
              percentile_dict[f"{col}_pcte"] = percentile
              percentile_dict[f"{col}_n"] = count
              
        percentile_dict["Station"] = file

          #Add the percentile dictionary to the list
        percentiles.append({'Station': file,**percentile_dict})
    
    print("Finished Calculating Percentiles")
    
    #Create a dataframe from the list of dictionaries
    percentiles_df = pd.DataFrame(percentiles)

    print("Transforming Output...")
    return percentiles_df

def _transform_percentiles(percentiles_df):
    
    # Define a list of all the columns in the original dataframe
    columns_list = percentiles_df.columns
    # Split the list into two parts based on which columns have '_pcte' and '_n' in the name
    century_list = [i for i in columns_list if '_pcte' in i]
    number_list = [i for i in columns_list if '_n' in i]
    # Select the columns which do not have '_pcte' and '_n'
    value_list = [i for i in columns_list if i not in century_list and i not in number_list and i not in "Station"] 
    # Build the new dataframe from the lists
    transformed_df = pd.DataFrame(columns=['Station', 'Variable', 'Percentile', 'Number of Comparison Values', 'Original Values'])
    # Loop through each entry in the original dataframe
    for row in percentiles_df.iterrows():
        # Take the Station value and loop through all of the remaining values
        station_val = row[1]['Station']
        for value, century, number in zip(value_list, century_list, number_list):
            # Create a new entry for the transformed_df
            new_entry = [station_val, value, row[1][century], row[1][number], row[1][value]]
            transformed_df.loc[len(transformed_df)] = new_entry
     
    # Filter out extreme values       
    transformed_df = transformed_df[(transformed_df["Percentile"] > 90) | (transformed_df["Percentile"] < 10)].reset_index()
    
    return transformed_df

    

**Report Functions**

In [71]:
def get_data(data, measurement, aws, date = datetime.datetime.today().strftime('%Y-%m-%d') 
                   ,scope = "Relative", output = "Report"):
    """
    Function to return the underlying dataset of specified values given a selected date.
    
    Parameters:
    data (str): The dataset to be used.
    date (str): The date of the observations for which the percentile is calculated.
    aws (str): The automatic weather station.
    measurement (str): The measurement. 
    scope (str): Relative: values are compared to historical values of the day of year. 
                 Absolute: values are compared to historical values.

    output (str): Output as pd.df or as printed report. 
  
    """
    
    # Load Data
    df = _load_data(data)
    
    # Align GC Net & PROMICE Columns  
    station, datetime, dayofcentury, dayofyear = _align_GC_PR()
    
    # Subset Data (date, measurement, station)
    day, df,columns, day_century, unique_files = _subset_df(date,df, station, datetime, measurement, dayofcentury, dayofyear, aws)
    
    # Subset Data (Scope)
    df = _subset_scope(scope, df, day, dayofyear)

    ##### Output #######
    if output == "Report":
        from tabulate import tabulate
        #### Report OUTPUT ###########
        print(
          f"  Selected Date: {date} \n" ,
          f"Selected Station: {aws} \n" , 
          f"Selected Measurement: {measurement} \n"  , 
          f"Selected Data: {data} \n" ,
          f"Selected Scope: {scope} \n" ,
          "----------------------------------------------------------------------------------------------------------------------\n",
          f"                   Climatology Report\n" 
          )        
        print(tabulate(df, headers='keys', tablefmt='psql'))
       
   
    elif output == "Data":
        return df
    else: 
        raise ValueError("The input for the variable 'output' was not recognizable. Please use one of the following options: 'Report', 'Data'" )


    


In [90]:
def daily_report(data, date = datetime.datetime.today().strftime('%Y-%m-%d'), 
                 aws = "All", measurement = "All", scope = "Relative", output = "Report", aggregated = False):
    """
    Function to return the percentile of specified values given a selected date.
    
    Parameters:
    data (str): The dataset to be used.
    date (str): The date of the observations for which the percentile is calculated.
    aws (str): The automatic weather station.
    measurement (str): The measurement. 
    scope (str): Relative: values are compared to historical values of the day of year. 
                 Absolute: values are compared to historical values.
    aggregated (bool): False: output is not aggregated.
                       True: averages of stations are calculated.

    output (str): Output as pd.df or as printed report. 
  
    """
    
    # Load Data
    df = _load_data(data)
    
    # Align GC Net & PROMICE Columns  
    station, datetime, dayofcentury, dayofyear = _align_GC_PR()
 
    #Account for Aggregate variable
    if aggregated == False:
        df = df
    elif aggregated == True:
        # Group df by station 
        df = df ##### TBD #######
    else: 
        raise ValueError(f"The input for the variable 'aggregated' was not recognizable. Please use one of the following options: True, False")

    # Subset Data (date, measurement, station)
    #day, df = _subset_df(date,df, station, datetime, measurement, dayofcentury, dayofyear, aws)
    day, df,columns, day_century, unique_files = _subset_df(date, df, station, datetime, measurement, dayofcentury, dayofyear, aws)
    
    # Subset Data (Scope)
    df = _subset_scope(scope, df, day, dayofyear)
    
    # Calculate Percentiles
    percentiles_df = _percentiles(df, unique_files, station, columns, dayofcentury, day_century)
  
    # Transform Output
    transformed_df = _transform_percentiles(percentiles_df)
    
    print("Finished")
    print("----------------------------------------------------------------------------------------------------------------------\n")

   # print(tabulate(percentiles_df, headers='keys', tablefmt='psql'))
   # return percentiles_df
    if output == "Report":
        from tabulate import tabulate
        #### Report OUTPUT ###########
        print(
          f"  Selected Date: {date} \n" ,
          f"Selected Station: {aws} \n" , 
          f"Selected Measurement: {measurement} \n"  , 
          f"Selected Data: {data} \n" ,
          f"Selected Scope: {scope} \n" ,
          "----------------------------------------------------------------------------------------------------------------------\n",
          f"                   Climatology Report\n" 
          )        
        print(tabulate(transformed_df, headers='keys', tablefmt='psql'))
       
        input_ = input("Do you want to include graphics? (Y/N)")
        if input_ == "Y":
          ########## TBD: Replace with boxplot function #################
          print("Functionality is in development")
   
    elif output == "Data":
        return transformed_df
    else: 
        raise ValueError("The input for the variable 'output' was not recognizable. Please use one of the following options: 'Report', 'Data'" )

  


In [91]:
daily_report(data = "GC Net", date = "2022-01-12", measurement= "All", scope='Relative', aws="All", output = "Data")

Calculating Percentiles .... 
Finished Calculating Percentiles
Transforming Output...
Finished
----------------------------------------------------------------------------------------------------------------------



Unnamed: 0,index,Station,Variable,Percentile,Number of Comparison Values,Original Values
0,37,Swiss Camp,relative_humidity_1,100.0,18.0,95.29
1,38,Swiss Camp,relative_humidity_1_cor,100.0,18.0,112.84
2,39,Swiss Camp,relative_humidity_2,100.0,17.0,97.16
3,40,Swiss Camp,relative_humidity_2_cor,100.0,17.0,114.99
4,42,Swiss Camp,shortwave_outgoing_radiation,100.0,19.0,0.82
5,44,Swiss Camp,snow_temperature_10,94.444444,18.0,-7.48
6,45,Swiss Camp,snow_temperature_4,100.0,17.0,-4.54
7,46,Swiss Camp,snow_temperature_5,100.0,16.0,-4.73
8,47,Swiss Camp,snow_temperature_6,94.444444,18.0,-5.0
9,48,Swiss Camp,snow_temperature_7,94.117647,17.0,-5.41


In [74]:
get_data(data = "GC Net", date = "2022-01-12", measurement= "relative_humidity_2_cor", scope='Relative', output = "Data", aws="E-GRIP")

Unnamed: 0,file,Datetime,relative_humidity_2_cor,DayOfCentury,DayOfYear
122655,E-GRIP,2015-01-12,84.92,735122,12
128861,E-GRIP,2016-01-12,80.48,735487,12
135073,E-GRIP,2017-01-12,69.94,735852,12
141279,E-GRIP,2018-01-12,83.4,736217,12
147492,E-GRIP,2019-01-12,66.37,736582,12
153204,E-GRIP,2020-01-12,80.66,736947,12
158162,E-GRIP,2021-01-12,86.69,737312,12
162457,E-GRIP,2022-01-12,78.22,737677,12


***Test Suite***

In [None]:
def report(data, date, station, variable, scope):
    """
    Function to return the percentile of specified values given a selected date.
    
    Parameters:
    df (pd.DataFrame): The dataframe to be used.
    date (str): The date of the observations for which the percentile is calculated.
    
    Returns:
    df (pd.DataFrame): A dataframe with all numerical columns and the percentiles of the values of the selected date.
    """
  
    # Ignore Warnings
    import warnings
    warnings.filterwarnings('ignore')
  
    # Load data
    if data == "GC Net":
        df = pd.read_parquet('data\df_daily.gzip', engine='pyarrow')
    elif data == "Promice":
        df = pd.read_parquet('data\promice_daily.gzip', engine='pyarrow')
    else: 
        raise ValueError("Only 'GC Net' & 'Promice' are accepted input values")
  
    # Split date input into year, month, day
    year = int(date[0:4])
    month = int(date[5:7])
    day = int(date[8:10])
  
    # subset df with date and find day of year and day variable
    date_df = df.loc[(df['Datetime'].dt.year == year) & (df['Datetime'].dt.month == month) & (df['Datetime'].dt.day == day)]
      # select day of year
    day =  date_df["DayOfYear"].mean()
    datetime = date_df["Datetime"].max()
  
    # select the specific day of century and the related values
    day_century = date_df["DayOfCentury"].mean() 
    day_century_value = pd.DataFrame(pd.DataFrame.mean(date_df)).reset_index()
    day_century_value = pd.pivot_table(day_century_value, index=None, columns=['index'], aggfunc=max)
  
    if scope == "relative":
        # group by calender day
        df = df.loc[df['DayOfYear'] == day]
    elif scope == "absolute":
        df = df
    else: 
        raise ValueError("Only 'relative' & 'absolute' are accepted input values")
  
    # Find the index with the specified date
    row_index = int(df[df['Datetime'] == date].index[0])
    
    # Remove columns that do not contain numerical values & Subset df based on measure selection
    if variable == "All":
        df = df.select_dtypes(include=['int', 'float']).copy()
    else:
        df = pd.DataFrame({variable: df[variable]})
        df = df.select_dtypes(include=['int', 'float']).copy()
    
    # Create an empty dictionary for the output
    percentile_dict = {}
    
    # Iterate through the columns
    for col in df.columns:
        # Find the percentile of the value in the specified row and date
        value = df[col][row_index]
        percentile = df[col].rank(pct=True)[df[col] == value].iloc[0]*100 if not math.isnan(value) else math.nan
    
        # Add the percentile to the dictionary
        percentile_dict[col] = percentile
    
    # Create a dataframe with the output
    x = pd.DataFrame(percentile_dict, index=[0])
  
    import prettytable as pt
  
    table = pt.PrettyTable()
    table.field_names = ["Measurement", "Percentile"]
  
    for col in x.columns:
        if x[col].values > 90 or x[col].values < 10:
            table.add_row([f"\033[1m{col}\033[0m", f"\033[1m{int(x[col].values) if not np.isnan(x[col].values) else 'NaN'}%\033[0m"])
        else:
            table.add_row([col, f"{int(x[col].values) if not np.isnan(x[col].values) else 'NaN'}%"])
  
    print(
    f" Date: {date} \n" ,
    f"Station: {station} \n" , 
    f"Measurement: {variable} \n"  , 
    f"Data: {data} \n" ,
    "----------------------------------------------------------------------------------------------------------------------\n",
    f"                   Climatology Report\n" 
    )
    print(table)
    print("----------------------------------------------------------------------------------------------------------------------\n")
    
    import plotly.express as px
    # Create a list of columns to be plotted
    x = day_century_value
    columns_to_plot = [col for col in x.columns.values if col in df.columns.values]
  
    # Create a list of values from x to be highlighted
    values_to_highlight = x[columns_to_plot].values.flatten().tolist()
  
    # Create a list of subplots
    figs = []
    # Loop through list of columns
    for col, v in zip(columns_to_plot, values_to_highlight) : 
        # Create a subplot for each column 
        fig = px.box(df[col], orientation = "v",boxmode='group')
        # Format the axes
        fig.update_layout(title_text= f"Boxplot for {col}", xaxis_title='', yaxis_title='')
        # Highlight the values from x
        fig.add_scatter(x=[col], y=[v], name= f"Selected Value \n{col}",
    mode = 'markers',
    marker_symbol = 'circle-dot',
    marker_size = 8,
    marker_color = 'red')
        
        # Add figure to list of subplots 
        figs.append(fig)
    
    # Output
    for fig in figs:
        fig.show()

In [None]:
report("Promice", "2022-01-12", "THU_L_day_v03", variable = "All", scope = "relative")

GC Net Data

In [None]:
gc = pd.read_parquet('data\df_daily.gzip', engine='pyarrow')

In [None]:
gc.columns

*Test on one station*

In [None]:
gc = gc[gc['station_name'] == "Humboldt"] 

Promice Data

In [None]:
pc = pd.read_parquet('data\promice_hourly.gzip', engine='pyarrow')

In [None]:
pc

*Test Suite*

In [None]:
# define variables

# *Mandatory: Data
data = "Promice"

# *Mandatory: Date
date = "22-07-2008"

# Optional: Station
#station = "SCO_L_hour_v03"
station = "THU_L_day_v03"

# Optional: Measure
y = "All"

In [None]:
# subset dataframe 
pc = pc[pc['file'] == station] 

In [None]:
# select dato 
year= 2022
month = 1
day = 12

# subset df with date and find day of year and day variable
date_df = pc.loc[(pc['Datetime'].dt.year == year) & (pc['Datetime'].dt.month == month) & (pc['Datetime'].dt.day == day)]
  # select day of year
day =  date_df["DayOfYear"].mean()
datetime = date_df["Datetime"].max()

# select the specific day of century and the related values
day_century = date_df["DayOfCentury"].mean() 
day_century_value = pd.DataFrame(pd.DataFrame.mean(date_df)).reset_index()
day_century_value = pd.pivot_table(day_century_value, index=None, columns=['index'], aggfunc=max)

# group by calender day
pc_group = pc.loc[pc['DayOfYear'] == day]

In [None]:
def get_percentile(df, date):
  """
  Function to return the percentile of specified values given a selected date.
  
  Parameters:
  df (pd.DataFrame): The dataframe to be used.
  date (str): The date of the observations for which the percentile is calculated.
  
  Returns:
  df (pd.DataFrame): A dataframe with all numerical columns and the percentiles of the values of the selected date.
  """

  # Find the index with the specified date
  row_index = int(df[df['Datetime'] == date].index[0])
  
  # Remove columns that do not contain numerical values & Subset df based on measure selection
  if y == "All":
    df = df.select_dtypes(include=['int', 'float']).copy()
  else:
    df = pd.DataFrame(df[y])
    df = df.select_dtypes(include=['int', 'float']).copy()
  
  # Create an empty dictionary for the output
  percentile_dict = {}
  
  # Iterate through the columns
  for col in df.columns:
    # Find the percentile of the value in the specified row and date
    value = df[col][row_index]
    if(math.isnan(value)): 
      percentile = math.nan
    else: 
      # calculate the percentile with df.rank()
      percentile = df[col].rank(pct=True)[df[col] == value].iloc[0]*100
    
    # Add the percentile to the dictionary
    #percentile_dict[col + "_percentile"] = percentile
    percentile_dict[col] = percentile
  
  
  # Create a dataframe with the output
  df_percentiles = pd.DataFrame(percentile_dict, index=[0])
  return df_percentiles

In [None]:
x = get_percentile(pc, "2022-01-12")

In [None]:
#x = x.append(day_century_value.iloc[0], ignore_index=True)

In [None]:
x = get_percentile(pc_group, "2022-01-12")

In [None]:
import prettytable as pt

table = pt.PrettyTable()
table.field_names = ["Measurement", "Percentile"]

for col in x.columns:
    if x[col].values > 90 or x[col].values < 10:
        table.add_row([f"\033[1m{col}\033[0m", f"\033[1m{int(x[col].values) if not np.isnan(x[col].values) else 'NaN'}% \033[0m"])
    else:
        table.add_row([col, f"{int(x[col].values) if not np.isnan(x[col].values) else 'NaN'}%"])


print(
f" Date: {date} \n" ,
f"Station: {station} \n" , 
f"Measurment: {y} \n"  , 
f"Data: {data} \n" ,
"-----------------------------------------------------------\n",
f"                   Climatology \n" 
)

print(table)

*Visual test suite*

In [None]:
############################ Boxplots #########################################
ff = pc 
x = ff.loc[ff['Datetime'] == "2022-01-12"]

import plotly.express as px

# Create a list of columns to be plotted
columns_to_plot = [col for col in x.columns.values if col in ff.columns.values]

# Create a list of values from x to be highlighted
values_to_highlight = x[columns_to_plot].values.flatten().tolist()

# Create a list of subplots
figs = []
# Loop through list of columns
for col, v in zip(columns_to_plot,values_to_highlight) : 
  # Create a subplot for each column 
  fig = px.box(ff[col], orientation = "v",boxmode='group')
  # Format the axes
  fig.update_layout(title_text= f"Boxplot for {col}", xaxis_title='', yaxis_title='')
  # Highlight the values from x
  fig.add_scatter(x=[col], y=[v], name= f"Selected Value \n{col}",
                        mode = 'markers',
                        marker_symbol = 'circle-dot',
                        marker_size = 8,
                        marker_color = 'red')
  
  # Add figure to list of subplots 
  figs.append(fig)

# Show the plots
for fig in figs:
  fig.show()

In [None]:
# A function that calculates the percentiles of every column and their values

def percentile_df(df):
    for col in (df.columns):
        df[f'{col}_pcta'] = df[col].rank(pct=True)
        #df[f'{col}_pcta'] = df[col].rank(pct=True)[df[col] == value] *100

    return df

In [None]:
gg = percentile_df(pc)
gg = gg[["Datetime", "AirTemperature(C)", "AirTemperature(C)_pcta"]]
y = "AirTemperature(C)"
y_pcta = "AirTemperature(C)_pcta"

In [None]:
fig = go.Figure([
    go.Scatter(
        name='Air Pressure (hPa)',
        x=gg['Datetime'],
        y=gg[y],
        mode='lines',
        line=dict(color='rgb(31, 119, 180)'),
    ),
    go.Scatter(
        name='Upper Bound (20-80)',
        x=gg['Datetime'],
        y=(gg[y] * (gg[y_pcta])),
        mode='lines',
        marker=dict(color="#00BB00"),
        line=dict(width=0),
        showlegend=False
    ),
    go.Scatter(
        name='Lower Bound (20-80)',
        x=gg['Datetime'],
        y=(gg[y] * (gg[y_pcta])),
        marker=dict(color="#00BB00"),
        line=dict(width=0),
        mode='lines',
        fillcolor='rgba(0, 187, 0, 0.3)',
        fill='tonexty',
        showlegend=False
    ),
    go.Scatter(
        name='Upper Bound (0-20 & 80-100)',
        x=gg['Datetime'],
        y=(gg[y] * (gg[y_pcta]  )),
        mode='lines',
        marker=dict(color="#BB0000"),
        line=dict(width=0),
        showlegend=False
    ),
    go.Scatter(
        name='Lower Bound (0-20 & 80-100)',
        x=gg['Datetime'],
        y=(gg[y] * (gg[y_pcta] )),
        marker=dict(color="#BB0000"),
        line=dict(width=0),
        mode='lines',
        fillcolor='rgba(187, 0, 0, 0.3)',
        fill='tonexty',
        showlegend=False
    )
])
fig.update_layout(
    yaxis_title='Air Pressure (hPa)',
    title='Continuous, variable value error bars',
    hovermode="x"
)
fig.show()

In [None]:
gg.loc[(gg['Datetime'].dt.month == 12) & (gg['Datetime'].dt.day == 31)]