Import relevant Libaries 

In [None]:
!pip install pyarrow

In [1]:
# Base libraries
import pandas as pd
import numpy as np
import os
import math as math
import datetime
from scipy import stats
import prettytable as pr

# Visualizations
import plotly.express as px
import plotly.graph_objects as go

**Helper Functions**

In [2]:
import glob

def _get_data(source):
    # Define the directory path where the CSV files are located
    if source == 'GC_net':
        input_path =r'../data/gc_net/daily_data/'
    elif source == 'promice':
        input_path =r'../data/promice/new_promice/daily_data/'
    else: 
        raise ValueError("Only 'GC Net' & 'Promice' are accepted input values")
    
    pattern = '*.csv'
    #staion_list = ['SCO_L.csv','KAN_L.csv']

    # Use glob to get a list of all files that match the pattern
    filenames = glob.glob(input_path + pattern)

    data_collection = []
    for file in filenames:
        data = pd.read_csv(file, index_col=False)
        data_collection.append(data)
    data = pd.concat(data_collection)

    data = data.reset_index(drop=True)
    return data
df = _get_data('GC_net')

def _load_data(data):
    df_out = df.copy()
    
    return df_out


def _align_GC_PR():
    station = "file"
    datetime = "Datetime"
    dayofcentury = "DayOfCentury"
    dayofyear = 'DayOfYear'
    return station,datetime,dayofcentury,dayofyear

def _exclude():
    # List of columns to exclude from percentile calculation
    exclude = ['Year', 'MonthOfYear', 'DayOfMonth', 'HourOfDay(UTC)', 
               'DayOfYear', 'LongitudeGPS(degW)','HeightStakes(m)',
               'DayOfCentury', 'WindDirection(d)', 'TiltToEast(d)', 
               'TiltToNorth(d)', 'TimeGPS(hhmmssUTC)', 'LatitudeGPS(degN)', 
               'ElevationGPS(m)', 'HorDilOfPrecGPS', 'LoggerTemperature(C)',
               'FanCurrent(mA)', 'BatteryVoltage(V)', 'Month', 'Day', 'Hour',

              'air_temperature_1_max', 'air_temperature_1_min',
              'wind_speed_u1_max','wind_speed_u2_max',
              'wind_from_direction_1', 'wind_from_direction_2', 
              'height_wind_sensor_1', 'height_wind_sensor_2', 'battery_voltage',
              'shortwave_incoming_radiation_max',
              'shortwave_incoming_radiation_stdev', 'net_radiation_stdev',
              'air_temperature_2_max', 'air_temperature_2_min', 
              'wind_speed_u2_stdev', 'ref_temperature',   'wind_speed_u1_stdev',
              'net_radiation_maximum', 'season', 'year', 'month', 'DayOfYear',
              'DayOfCentury', 'Unnamed: 0']
    return exclude

def _subset_df(date, df, station, datetime, measurement, dayofcentury, dayofyear, aws):
    exclude = _exclude()
    df['Datetime'] = pd.to_datetime(df['Datetime'], format='%Y-%m-%d')
    # Split date input into year, month, day
    year = int(date[0:4])
    month = int(date[5:7])
    day = int(date[8:10])



    date_df = df.loc[(df['Datetime'].dt.year == year) & (df['Datetime'].dt.month == month) & (df['Datetime'].dt.day == day)]

    # Find the day of year
    day =  date_df["DayOfYear"].mean()
    
    # find day of century
    day_century = date_df[dayofcentury].mean() 
    
    #Create a list of relevant columns
    columns = df.select_dtypes(include=[np.number]).columns.difference(exclude)

    ## Subset by measurement
        #df = df[[station,datetime, measurement, dayofcentury,dayofyear]]
    if measurement == "All":
        df = df
    elif measurement in columns:
        # subset by measurement
        df = df[[station,datetime, measurement, dayofcentury, dayofyear, 'year']]
        #Update list of relevant column
        columns = df.select_dtypes(include=[np.number]).columns.difference(exclude)
    else: 
        raise ValueError(f"The input for the variable 'measurement' was not recognizable. Please use one of the following options: {columns}")
    
    ## Subset by station
        #df = df.loc[df[station] == aws]
    #Create a list of unique files (stations) from the dataset 
    unique_files = list(df[station].unique())

    if aws == "All":
        df = df
    elif aws in unique_files:
        # filter by aws
        df = df.loc[df[station] == aws]
        #Update list of unique files (stations) from the dataset 
        unique_files = list(df[station].unique())
        #display(df)
    else: 
        raise ValueError(f"The input for the variable 'aws' was not recognizable. Please use one of the following options: {unique_files}")
    
    return day, df,columns, day_century, unique_files

def _subset_scope(scope, window, df,date, day, dayofyear):
    
    def summarize_df(df,min_values , window):
        #Number by observation period (avoids classifying on year if 30 day period spans across Dec + Jan)
        df['group_number'] = (df
                              .groupby('file', group_keys=False)['Datetime']
                              .apply(lambda x: (x - x.shift(1)).fillna(pd.Timedelta(seconds=0)).dt.days.gt(2).cumsum())
        )
        if window == 'year':
            df['group_number'] = df['year'].astype(int).copy()
            
        grouped = df.groupby(['file', 'group_number'])
        
        #Adding column so all grouped observations has same year
        df['year'] = grouped['Datetime'].transform(lambda x: x.max().year)

        df['target_date'] = grouped['Datetime'].transform(lambda x: (x ==date).max())
        #display(df.head(20))
        # Specify columns containing numerical values to be averaged
        columns_to_average = (df
            .select_dtypes(exclude=['object'])
            .drop(columns=['Datetime', 'DayOfYear', 'DayOfCentury', 'group_number', 'year', 'target_date'])
            .columns)
        
        # Calculate the number of non-NaN values for each variable within each group
        valid_date_observations = grouped[columns_to_average].apply(lambda x: x.notna().sum() <= min_values)

        # Calculate average per day per station
        df_filtered = grouped[columns_to_average].mean()

        #Remove means with less than 20 observations per day
        df_masked = df_filtered.mask(valid_date_observations, np.nan)

        df_masked =  df_masked.merge(df[['file','group_number','year', 'target_date']].drop_duplicates(), how='left',on=['file','group_number'])
        
        df = df_masked.reset_index(drop=True).drop('group_number',axis=1)
        return df

    #set the range of days
    week = datetime.datetime.strptime(date,'%Y-%m-%d').date().isocalendar()[1]
    month = datetime.datetime.strptime(date,'%Y-%m-%d').month
    sliding_7_day = [x+366 if x < 0 else x for x in np.arange(day-6,day+1).tolist()] 
    sliding_30_day = [x+366 if x < 0 else x for x in np.arange(day-29, day+1).tolist()]
    year = [x for x in np.arange(0, day+1).tolist()]
    
    #exclude data from after the given date
    df = df.loc[df['Datetime'] <= date]
    
    if scope == "Relative":
        if window == 'day':
        # filter by calender day
            df = df.loc[df[dayofyear] == day]
            df = summarize_df(df, 0, window)

        elif window == 'week':
            df = df.loc[df['Datetime'].dt.isocalendar().week.astype(int) == week]
            df = summarize_df(df, 5, window)
        
        elif window == 'sliding_avg_7':
            df = df.loc[df[dayofyear].isin(sliding_7_day)]
            df = summarize_df(df, 5, window)
            
        elif window == 'month':
            df = df.loc[(df['Datetime'].dt.month == month)]
            df = summarize_df(df, 24, window)

        elif window == 'sliding_avg_30':
            df = df.loc[df[dayofyear].isin(sliding_30_day)]
            df = summarize_df(df, 24, window)
  
        elif window == 'year':
            df = df.loc[df[dayofyear].isin(year)]
            df = summarize_df(df, day*0.70, window='year')

        else:
            raise ValueError(f"The input for the variable 'window' was not recognizable. Please use one of the following options: 'day', 'week', 'month', 'year', 'sliding_avg_7' or 'sliding_avg30'")

    elif scope == "Absolute":
        df = df
    else: 
        raise ValueError("The input for the variable 'scope' was not recognizable. Please use one of the following options: 'Relative' , 'Absolute'")

    return df

def _percentiles(df, unique_files, station, columns,dayofcentury,day_century):

        #Create an empty list to hold the percentile values
    percentiles = []

    print("Calculating Percentiles .... ")
    #Loop through each file in the unique_files list
    for i, file in enumerate(unique_files):
        
        #Calculate the percentile of each numerical column for the specified datetime
        df_file = df[df[station] == file]
        #display(df_file)
        #Create an empty dictionary
        percentile_dict = {}
        for col in columns:
          #Looping through each row of the dataframe
          for index, row in df_file.iterrows():
            #row_date = row[dayofcentury]
            #print('row_date',row_date,'DAY_CENTURY', day_century, 'FILE:', file)
            if row['target_date'] == True and ~ np.isnan(row[col]):
            #if ~ np.isnan(row[col]):
                # Retrieving Value 
                #print('passed')
                value = row[col]
                
                # Excluding NAN's for the calculation
                col_list = df_file[col].dropna().values.tolist()
                
                # Calculate the Percentiles
                percentile = stats.percentileofscore(col_list,value, kind = "mean")
                #Count the number of values 
                count = len(col_list)
                #Median of the number of values 
                median = np.nanmedian(col_list, axis=0)
            
                # Assign file, value and, percentile to dictionary
                percentile_dict[col] = row[col]
                percentile_dict[f"{col}_pcte"] = percentile
                percentile_dict[f"{col}_n"] = count
                percentile_dict[f"{col}_median"] = median
                
        percentile_dict["Station"] = file

          #Add the percentile dictionary to the list
        percentiles.append({'Station': file,**percentile_dict})
    
    print("Finished Calculating Percentiles")
    
    #Create a dataframe from the list of dictionaries
    percentiles_df = pd.DataFrame(percentiles)

    print("Transforming Output...")

    return percentiles_df

def _transform_percentiles(percentiles_df):
    
    # Define a list of all the columns in the original dataframe
    columns_list = percentiles_df.columns
    
    # Split the list into three parts based on which columns have '_pcte', '_n', _median in the name
    century_list = [i for i in columns_list if '_pcte' in i]
    number_list = [i for i in columns_list if '_n' in i]
    median_list = [i for i in columns_list if '_median' in i]
    
    # Select the columns which do not have '_pcte' and '_n'
    value_list = [i for i in columns_list if i not in century_list and i not in number_list and i not in median_list and i not in "Station"] 
    # Build the new dataframe from the lists
    transformed_df = pd.DataFrame(columns=['Station', 'Measurement', 'Percentile', 'Number of Comparison Values', 'Original Value', 'Median'])
    # Loop through each entry in the original dataframe
    for row in percentiles_df.iterrows():
        # Take the Station value and loop through all of the remaining values
        station_val = row[1]['Station']
        for value, century, number, median in zip(value_list, century_list, number_list,median_list):
            # Create a new entry for the transformed_df
            new_entry = [station_val, value, row[1][century], row[1][number], row[1][value], row[1][median]]
            transformed_df.loc[len(transformed_df)] = new_entry
     
    # Filter out extreme values       
    transformed_df = transformed_df[(transformed_df["Percentile"] > 90) | (transformed_df["Percentile"] < 10)].reset_index(drop=True)
    
    return transformed_df

**visualization**

In [3]:

def comparison_visualization(station_in = 'NASA-U',measurement_in='shortwave_incoming_radiation', window='year', date='2020-12-31'):
    df = _load_data('test')
    station, datetime, dayofcentury, dayofyear = _align_GC_PR()
    day, df,columns, day_century, unique_files = _subset_df(date=date, df=df, station='file', datetime=datetime, measurement=measurement_in, dayofcentury=dayofcentury, dayofyear=dayofcentury, aws='All')
    df = df.loc[df['file'] == station_in ]

    df = df[[measurement_in,'Datetime']].set_index('Datetime')

    if window == 'week':
        df_grouped = df.groupby(pd.Grouper( freq='W'))          
        df_mean = df_grouped.mean()
        df = df_mean.dropna()

    elif window == 'sliding_avg_7':
        display(df)
        df['Datetime'] = df.index
        df['date_diff'] = (df['Datetime'].max() - df['Datetime']).dt.days
        df['week_group'] = (df['date_diff'] / 7).apply(lambda x: int(x) if x.is_integer() else int(x) + 1)
        df_grouped = df.groupby('week_group', group_keys=False)[measurement_in]
        weekly_mean = df_grouped.mean()
        weekly_mean = weekly_mean.reset_index()
        weekly_mean['last_date'] = weekly_mean['week_group'].apply(lambda x: df['Datetime'].max() - pd.to_timedelta(x * 7, unit='D'))
        weekly_mean.set_index('last_date', inplace=True)
        weekly_mean.drop(columns=['week_group'], inplace=True)
        df = weekly_mean.dropna()
        
    elif window == 'month':
        df_grouped = df.groupby(pd.Grouper(freq='M'))
        df_mean = df_grouped.mean()
        df = df_mean.dropna()
        

    elif window == 'sliding_avg_30':
        df['Datetime'] = df.index
        df['date_diff'] = (df['Datetime'].max() - df['Datetime']).dt.days
        df['week_group'] = (df['date_diff'] / 30).apply(lambda x: int(x) if x.is_integer() else int(x) + 1)
        df_grouped = df.groupby('week_group', group_keys=False)[measurement_in]
        monthly_mean = df_grouped.mean()
        monthly_mean = monthly_mean.reset_index()
        monthly_mean['last_date'] = monthly_mean['week_group'].apply(lambda x: df['Datetime'].max() - pd.to_timedelta(x * 30, unit='D'))
        monthly_mean.set_index('last_date', inplace=True)
        monthly_mean.drop(columns=['week_group'], inplace=True)
        df = monthly_mean.dropna()

    elif window == 'year':
        df_grouped = df.groupby(pd.Grouper(freq='M'))           
        df_mean = df_grouped.mean()
        df = df_mean.dropna()
    else:
        raise ValueError(f"The input for the variable 'window' was not recognizable.")

    selected_year = date[:4] 
    # Extract unique years from the index
    years = list(df.index.year.unique().sort_values(ascending=False))
    years.remove(int(selected_year))
    
    data = []

    # Calculate the grayscale step based on the number of years
    grayscale_step = 1.0 / (len(years) - 1)

    # Loop through the unique years and plot the development of the value over the year
    for i, year in enumerate(years):
        df_year = df[df.index.year == year]

        # Reset the year part of the datetime index to a constant year (e.g., 2000)
        df_year.index = df_year.index.map(lambda x: x.replace(year=2000))

        color = f'rgba({int(max(0, grayscale_step * (i - 1) * 255))}, {int(max(0, grayscale_step * (i - 1) * 255))}, {int(max(0, grayscale_step * (i - 1) * 255))}, 1)'
        linewidth = 1
        
        data.append(go.Scatter(x=df_year.index, y=df_year[measurement_in], name=f'{year}', line=dict(color=color, width=linewidth), mode='lines', legendgroup='custom', showlegend=False))

    df_year = df[df.index.year == int(selected_year)]

    # Reset the year part of the datetime index to a constant year (e.g., 2000)
    df_year.index = df_year.index.map(lambda x: x.replace(year=2000))
    color = 'red'
    linewidth = 2.5
    data.append(go.Scatter(x=df_year.index, y=df_year[measurement_in], name=f'{selected_year}', line=dict(color=color, width=linewidth), mode='lines', legendgroup='custom', showlegend=False))

    # Create a plot
    fig = go.Figure(data=data)

    # Set title, labels
    fig.update_layout(
        title= {
            'text':f'Development of {measurement_in} by year for {station_in}',
            'font':{'size':24}
        },
        xaxis_title='Month',
        yaxis_title=measurement_in,
        xaxis=dict(
            tickmode='array',
            tickvals=pd.date_range('2000-01-01', '2000-12-31', freq='M'),
            ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
        ),
        width=1400,
        height=400,
        legend=dict(title='Legend', orientation='v', yanchor='top', xanchor='left', y=1, x=1.06)
        )
    

    # Custom legend
    latest_date = df.index.max().strftime('%d-%m')
    fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name=f'{selected_year}', line=dict(color='red'), legendgroup='custom'))
    if selected_year == df.index.year.max():
        fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name=f'{years[1]}', line=dict(color='black'), legendgroup='custom'))
    else:
        fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name=f'{years[0]}', line=dict(color='black'), legendgroup='custom')) 
    fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name='...', line=dict(color='grey'), legendgroup='custom'))
    fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name=f'{years[-1]}', line=dict(color='lightgrey'), legendgroup='custom'))

    # Add a vertical line indicating the latest date in the chart
    if window != 'year':
        fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name=f'{latest_date}', line=dict(color='black', dash='dash'), legendgroup='custom'))
        fig.add_shape(type='line', x0=date, x1=date, y0=0, y1=1, yref='paper',
                    line=dict(color='black', dash='dash'))
    fig.write_image("../figures/Climatology/test_fig.png")
    # Display the plot
    fig.show()

#comparison_visualization()

**Report Functions**

In [4]:
def get_data(data, measurement, aws, date = datetime.datetime.today().strftime('%Y-%m-%d') 
                   ,scope = "Relative", window = 'day', output = "Report"):
    """
    Function to return the underlying dataset of specified values given a selected date.
    
    Parameters:
    data (str): The dataset to be used.
    date (str): The date of the observations for which the percentile is calculated.
    aws (str): The automatic weather station.
    measurement (str): The measurement. 
    scope (str): Relative: values are compared to historical values of the day of year. 
                 Absolute: values are compared to historical values.

    output (str): Output as pd.df or as printed report. 
  
    """
    
    # Load Data
    df = _load_data(data)
    
    # Align GC Net & PROMICE Columns  
    station, datetime, dayofcentury, dayofyear = _align_GC_PR()
    
    # Subset Data (date, measurement, station)
    day, df,columns, day_century, unique_files = _subset_df(date,df, station, datetime, measurement, dayofcentury, dayofyear, aws)
    
    # Subset Data (Scope)
    df = _subset_scope(scope, window, df, date, day, dayofyear)
    
    df = df.reset_index(drop=True).drop('target_date', axis=1)

    ##### Output #######
    if output == "Report":
        from tabulate import tabulate
        #### Report OUTPUT ###########
        print(
          f"  Selected Date: {date} \n" ,
          f"Selected Station: {aws} \n" , 
          f"Selected Measurement: {measurement} \n"  , 
          f"Selected Data: {data} \n" ,
          f"Selected Scope: {scope} \n" ,
          "----------------------------------------------------------------------------------------------------------------------\n",
          f"                   Climatology Report\n" 
          )        
        print(tabulate(df, headers='keys', tablefmt='psql'))
        #return df

    elif output == "Data":
        return df
    else: 
        raise ValueError("The input for the variable 'output' was not recognizable. Please use one of the following options: 'Report', 'Data'" )


    


In [5]:
def daily_report(data, date = datetime.datetime.today().strftime('%Y-%m-%d'), 
                 aws = "All", measurement = "All", scope = "Relative", window = 'day', output = "Report", aggregated = False):
    """
    Function to return the percentile of specified values given a selected date.
    
    Parameters:
    data (str): The dataset to be used.
    date (str): The date of the observations for which the percentile is calculated.
    aws (str): The automatic weather station.
    measurement (str): The measurement. 
    scope (str): Relative: values are compared to historical values of the day of year. 
                 Absolute: values are compared to historical values.
    aggregated (bool): False: output is not aggregated.
                       True: averages of stations are calculated.

    output (str): Output as pd.df or as printed report. 
  
    """
    # Load Data
    df = _load_data(data)
    
    # Align GC Net & PROMICE Columns  
    station, datetime, dayofcentury, dayofyear = _align_GC_PR()
 
    #Account for Aggregate variable
    if aggregated == False:
        df = df
    elif aggregated == True:
        # Group df by station 
        df = df ##### TBD #######
    else: 
        raise ValueError(f"The input for the variable 'aggregated' was not recognizable. Please use one of the following options: True, False")

    # Subset Data (date, measurement, station)
    #day, df = _subset_df(date,df, station, datetime, measurement, dayofcentury, dayofyear, aws)
    day, df,columns, day_century, unique_files = _subset_df(date, df, station, datetime, measurement, dayofcentury, dayofyear, aws)
    
    # Subset Data (Scope)
    df = _subset_scope(scope, window, df, date, day, dayofyear)
    
    # Calculate Percentiles
    percentiles_df = _percentiles(df, unique_files, station, columns, dayofcentury, day_century)
  
    # Transform Output
    transformed_df = _transform_percentiles(percentiles_df)
    
    print("Finished")
    print("----------------------------------------------------------------------------------------------------------------------\n")

   # print(tabulate(percentiles_df, headers='keys', tablefmt='psql'))
   # return percentiles_df
    if output == "Report":
        from tabulate import tabulate
        #### Report OUTPUT ###########
        print(
          f"  Selected Date: {date} \n" ,
          f"Selected Station: {aws} \n" , 
          f"Selected Measurement: {measurement} \n"  , 
          f"Selected Data: {data} \n" ,
          f"Selected Scope: {scope} \n" ,
          "----------------------------------------------------------------------------------------------------------------------\n",
          f"                   Climatology Report\n" 
          )        
        print(tabulate(transformed_df, headers='keys', tablefmt='psql'))
       
        input_ = input("Do you want to include graphics? (Y/N)")
        if input_ == "Y":
          ########## TBD: Replace with boxplot function #################
          print("Functionality is in development")
   
    elif output == "Data":
        print()
        return transformed_df
    else: 
        raise ValueError("The input for the variable 'output' was not recognizable. Please use one of the following options: 'Report', 'Data'" )

  


In [6]:
#daily_report(data = "GC Net", date = "2019-01-13", measurement= "All", scope='Relative', aws="All", window = 'week', output = "Data")

In [7]:
x = get_data(data = "GC Net", date = "2019-01-13", measurement= "relative_humidity_1", scope='Relative', output = "Data", aws="Swiss Camp 10m", window='day')
#display(x)

In [19]:
import plotly.express as px
import plotly.io as pio

def gen_report(data, date = datetime.datetime.today().strftime('%Y-%m-%d'), 
                 aws = "All", measurement = "All", scope = "Relative", window = 'day', output = "Report", aggregated = False):
    """
    Function to return the percentile of specified values given a selected date.
    
    Parameters:
    data (str): The dataset to be used.
    date (str): The date of the observations for which the percentile is calculated.
    aws (str): The automatic weather station.
    measurement (str): The measurement. 
    scope (str): Relative: values are compared to historical values of the day of year. 
                 Absolute: values are compared to historical values.
    aggregated (bool): False: output is not aggregated.
                       True: averages of stations are calculated.

    output (str): Output as pd.df or as printed report. 
  
    """
    
    # Load Data
    df = _load_data(data)
    
    # Align GC Net & PROMICE Columns  
    station, datetime, dayofcentury, dayofyear = _align_GC_PR()
 
    day, df,columns, day_century, unique_files = _subset_df(date, df, station, datetime, measurement, dayofcentury, dayofyear, aws)
    
    # Subset Data (Scope)
    df = _subset_scope(scope, window, df, date, day, dayofyear)

    if measurement == 'All':
        cols = df.select_dtypes(include=['float64']).columns.to_list()
    else:
        cols = [measurement]
    #display(df)
    row_index = int(df[df['target_date'] == True].index[0])
    #print(row_index)
    percentile_dict = {}
    test_dict = {}#print(cols)
    #display(df)
    for col in cols:
        #print(col)
        value = df[col][row_index]
        #display(df[col].rank(pct=True)[df[col] == value].iloc[0])
        #print(df[col].rank(pct=True))
        test_dict[col] = df[col].rank(pct=True) 
        percentile = df[col].rank(pct=True)[df[col] == value].iloc[0]*100 if not math.isnan(value) else math.nan
        percentile_dict[col] = percentile

    x = pd.DataFrame(percentile_dict, index=[0])

    print('displaying percentile df')
    import prettytable as pt

    table = pt.PrettyTable()
    table.field_names = ["measurement", "Percentile"]
#    if x[col].values > 90 or x[col].values < 10:
#        table.add_row([f"\033[1m{col}\033[0m", f"\033[1m{int(x[col].values) if not np.isnan(x[col].values) else 'NaN'}%\033[0m"])
#    else:
    for col in x.columns:
        table.add_row([col, f"{int(x[col].values) if not np.isnan(x[col].values) else 'NaN'}%"])
    
      
    print(
    f" Date: {date} \n" ,
    f"Station: {station} \n" , 
    f"Measurement: {measurement} \n"  , 
    f"Data: {data} \n" ,
    "----------------------------------------------------------------------------------------------------------------------\n",
    f"                   Climatology Report\n" 
    )
    print(table)
    print("----------------------------------------------------------------------------------------------------------------------\n")

    import plotly.express as px
    # Create a list of columns to be plotted
    columns_to_plot = [col for col in x.columns.values if col in df.columns.values]
  
    # Create a list of values from x to be highlighted
    values_to_highlight = x[columns_to_plot].values.flatten().tolist()
  
    # Create a list of subplots
    figs = []
    # Loop through list of columns
    for col, v in zip(columns_to_plot, values_to_highlight) : 
        # Create a subplot for each column 
        fig = px.box(df[col], orientation = "v",boxmode='group')
        # Format the axes
        fig.update_layout(title_text= f"Boxplot for {col}", xaxis_title='', yaxis_title='')
        # Highlight the values from x
        fig.add_scatter(x=[col], y=[v], name= f"Selected Value \n{col}",
    mode = 'markers',
    marker_symbol = 'circle-dot',
    marker_size = 8,
    marker_color = 'red')
        
        # Add figure to list of subplots 
        figs.append(fig)
    
    # Output
    for fig in figs:
        fig.show()
    save_folder = '../figures/Test_markdown_fig'
    for i, fig in enumerate(figs):
        fig.write_image(os.path.join(save_folder,f'box_plot_{date}_{measurement}_{station}.png'.format(date=date, measurement=measurement, station=aws)), engine='orca')
        fig.write_image(os.path.join(save_folder,f'fig1.png'), engine='orca')
    return x

t = gen_report(data = "GC Net", date = "2019-01-13", measurement= "relative_humidity_2_cor", scope='Relative', aws="Swiss Camp 10m", window = 'week', output = "Data")
#display(t)

displaying percentile df
 Date: 2019-01-13 
 Station: file 
 Measurement: relative_humidity_2_cor 
 Data: GC Net 
 ----------------------------------------------------------------------------------------------------------------------
                    Climatology Report

+-------------------------+------------+
|       measurement       | Percentile |
+-------------------------+------------+
| relative_humidity_2_cor |    93%     |
+-------------------------+------------+
----------------------------------------------------------------------------------------------------------------------



<h2>Extreme Weather Events Detection</h2>
<p>Meterological outliers for 2019-01-13, measured as mean of week compared to similar period previous years</p>

<hr />

<h3>Outliers by percentiler (filtered top/bottom 10%)</h3>

|    | Station        | Measurement                  | Percentile   |   Number of Comparison Values |   Original Value |        Median |
|---:|:---------------|:-----------------------------|:-------------|------------------------------:|-----------------:|--------------:|
|  0 | Swiss Camp 10m | relative_humidity_2_cor      | 90.6%        |                            16 |       88.5957    |  81.6657      |
|  1 | Swiss Camp     | relative_humidity_1_cor      | 91.2%        |                            17 |      109.02      | 100.726       |
|  2 | Swiss Camp     | relative_humidity_2_cor      | 96.9%        |                            16 |      115.697     | 103.655       |
|  3 | Swiss Camp     | snow_temperature_10          | 91.2%        |                            17 |       -7.90857   |  -9.63714     |
|  4 | Swiss Camp     | snow_temperature_4           | 96.9%        |                            16 |       -4.66857   |  -8.19357     |
|  5 | Swiss Camp     | snow_temperature_6           | 91.2%        |                            17 |       -5.19      |  -8.11714     |
|  6 | Swiss Camp     | snow_temperature_7           | 90.6%        |                            16 |       -5.68429   |  -8.19286     |
|  7 | Swiss Camp     | snow_temperature_8           | 96.7%        |                            15 |       -4.57143   |  -8.52857     |
|  8 | Swiss Camp     | snow_temperature_9           | 90.6%        |                            16 |       -7.11857   |  -8.59        |
|  9 | NASA-U         | shortwave_incoming_radiation | 90.6%        |                            16 |        0.0214286 |   0.000714286 |
| 10 | NASA-U         | shortwave_outgoing_radiation | 97.1%        |                            17 |        0.0128571 |   0           |
| 11 | NASA-U         | snow_depth_1                 | 96.7%        |                            15 |       18.3814    |   9.90571     |
| 12 | NASA-U         | snow_depth_2                 | 96.7%        |                            15 |       18.0643    |   9.30429     |
| 13 | Tunu-N         | shortwave_incoming_radiation | 96.2%        |                            13 |        0.127143  |   0.0485714   |
| 14 | Tunu-N         | snow_depth_1                 | 96.4%        |                            14 |        7.81286   |   3.25571     |
| 15 | Tunu-N         | snow_depth_2                 | 96.4%        |                            14 |        7.54      |   3.41571     |
| 16 | DYE2           | relative_humidity_1_cor      | 7.5%         |                            20 |       91.2029    |  95.4807      |
| 17 | DYE2           | snow_depth_1                 | 97.5%        |                            20 |       13.38      |   8.96643     |
| 18 | DYE2           | snow_depth_2                 | 97.5%        |                            20 |       15.2043    |  10.4579      |
| 19 | Saddle         | air_pressure                 | 92.1%        |                            19 |      732.733     | 722.094       |
| 20 | Saddle         | air_temperature_2            | 92.5%        |                            20 |      -24.9329    | -30.3336      |
| 21 | Saddle         | relative_humidity_1          | 97.4%        |                            19 |       93.2271    |  75.4329      |
| 22 | Saddle         | relative_humidity_1_cor      | 97.4%        |                            19 |      118.826     |  97.5843      |
| 23 | Saddle         | snow_depth_1                 | 91.2%        |                            17 |       18.3371    |  10.1514      |
| 24 | Saddle         | snow_depth_2                 | 97.1%        |                            17 |       19.7214    |  10.6814      |
| 25 | South Dome     | air_temperature_2            | 91.7%        |                            18 |      -22.34      | -28.0143      |
| 26 | South Dome     | relative_humidity_1          | 91.7%        |                            18 |       93.7871    |  76.4064      |
| 27 | South Dome     | relative_humidity_1_cor      | 97.2%        |                            18 |      117.536     | 101.948       |
| 28 | South Dome     | relative_humidity_2          | 90.6%        |                            16 |       85.6071    |  77.3793      |
| 29 | South Dome     | relative_humidity_2_cor      | 96.9%        |                            16 |      106.274     |  99.8864      |
| 30 | South Dome     | shortwave_incoming_radiation | 90.6%        |                            16 |        5.96571   |   4.63048     |
| 31 | South Dome     | specific_humidity_1          | 92.9%        |                             7 |        0.875714  |   0.495714    |
| 32 | South Dome     | specific_humidity_2          | 91.7%        |                             6 |        0.85      |   0.581429    |
| 33 | NASA-SE        | relative_humidity_2_cor      | 96.7%        |                            15 |      112.553     | 100.831       |
| 34 | NASA-SE        | shortwave_incoming_radiation | 90.6%        |                            16 |        2.86143   |   1.70429     |
| 35 | NASA-SE        | snow_depth_1                 | 96.4%        |                            14 |       33.33      |  10.0693      |
| 36 | NASA-SE        | snow_depth_2                 | 96.4%        |                            14 |       31.76      |  11.1343      |
| 37 | NEEM           | air_temperature_1            | 5.0%         |                            10 |      -45.6529    | -36.1943      |
| 38 | NEEM           | air_temperature_2            | 5.0%         |                            10 |      -45.0371    | -35.9807      |
| 39 | NEEM           | relative_humidity_2          | 5.6%         |                             9 |       58.2257    |  68.7543      |
| 40 | NEEM           | relative_humidity_2_cor      | 5.6%         |                             9 |       88.9786    |  97.0271      |

|                             | 0                       |
|:----------------------------|:------------------------|
| Station                     | Swiss Camp 10m          |
| Measurement                 | relative_humidity_2_cor |
| Percentile                  | 90.6%                   |
| Number of Comparison Values | 16.0                    |
| Original Value              | 88.59571428571428       |
| Median                      | 81.66571428571429       |

<p><img alt="Boxplot" src="/../GEUS-Master-Thesis/figures/to_markdown/fig1.png" /></p>

<details> <summary>This collapsible can contain more detailed information </summary> 
 <br/> insert text here  
 </details>



In [10]:
from markdown import markdown as md
from tabulate import tabulate


#percentiles = daily_report(data = "GC Net", date = "2019-01-13", measurement= "All", scope='Relative', aws="All", window = 'week', output = "Data")
print(os.getcwd())
def generate_markdown(date='2019-01-13', window = 'month', filename='../markdown_reports/output_report_draft2.md', df=None):
    output = []
    def add_element(element):
        nonlocal output
        if isinstance(element, list):
            for i in element:
                output.append(i)
        else:
            output.append(element)
        #percentiles['Percentile'] = percentiles['Percentile'].div(100).map('{:.1%}'.format)

    def make_report(elements, filename):
        with open(filename, 'w') as f:
            for element in elements:
                f.write(element)
                f.write('\n\n')
    
    if isinstance(df,pd.DataFrame):
        percentiles = df
    else:
        percentiles = daily_report(data = "GC Net", date = date, measurement= "All", scope='Relative', aws="All", window = window, output = "Data")
    percentiles_out = (percentiles
                       .to_markdown()
    )
    #percentiles_out = percentiles.to_markdown(tablefmt='grid')
    header = md('## Extreme Weather Events Detection \n Meterological outliers for {d}, measured as mean of {w} compared to similar period previous years'.format(d=date,w=window))
    line = md('-------------------------------------------------------------------------------------------------')
    body1 = md('### Outliers by percentiler (filtered top/bottom 10%)')
    add_element([header, line, body1, percentiles_out])

    picture_folder = ()
    row_out = percentiles.iloc[0,:].to_markdown()
    figure_out = md('![Boxplot](/../GEUS-Master-Thesis/figures/to_markdown/fig1.png)')#.format(date=date,aws='relative',measurement='relative_humidity_2_cor_file'))
    print(figure_out)
    add_element([row_out,figure_out])
    #GEUS-Master-Thesis\figures\to_markdown\box_plot_2019-01-13_relative_humidity_2_cor_file.png
    collapse_element = md('<details> <summary>This collapsible can contain more detailed information </summary> \n <br/> insert text here  \n </details>''')
    add_element(collapse_element)
    

    make_report(output, filename)
generate_markdown()


c:\Users\mabj16ac\Desktop\Thesis\GEUS-Master-Thesis\scripts
Calculating Percentiles .... 
Finished Calculating Percentiles
Transforming Output...
Finished
----------------------------------------------------------------------------------------------------------------------




IndexError: single positional indexer is out-of-bounds

In [43]:
percentiles = daily_report(data = "GC Net", date = "2019-01-13", measurement= "All", scope='Relative', aws="All", window = 'month', output = "Data")

display(percentiles)
df = get_data(data = "GC Net", date = "2019-01-13", measurement= "All", scope='Relative', output = "Data", aws="All", window='day')
#display(x)

x = percentiles.copy()
display(df)
display(x)
display(percentiles)

KeyError: 'Datetime'

In [354]:
columns_to_plot = [col for col in x.columns.values if col in df.columns.values]
  
    # Create a list of values from x to be highlighted
    values_to_highlight = x[columns_to_plot].values.flatten().tolist()
  
    # Create a list of subplots
    figs = []
    # Loop through list of columns
    for col, v in zip(columns_to_plot, values_to_highlight) : 
        # Create a subplot for each column 
        fig = px.box(df[col], orientation = "v",boxmode='group')
        # Format the axes
        fig.update_layout(title_text= f"Boxplot for {col}", xaxis_title='', yaxis_title='')
        # Highlight the values from x
        fig.add_scatter(x=[col], y=[v], name= f"Selected Value \n{col}",
    mode = 'markers',
    marker_symbol = 'circle-dot',
    marker_size = 8,
    marker_color = 'red')
        
        # Add figure to list of subplots 
        figs.append(fig)
    
    # Output
    for fig in figs:
        fig.show()
    save_folder = '../figures/Test_markdown_fig'
    for i, fig in enumerate(figs):
        fig.write_image(os.path.join(save_folder,f'box_plot_{date}_{measurement}_{station}.png'.format(date=date, measurement=measurement, station=aws)), engine='orca')
        fig.write_image(os.path.join(save_folder,f'fig1.png'), engine='orca')
    return x

Unnamed: 0_level_0,shortwave_incoming_radiation
Datetime,Unnamed: 1_level_1
2014-01-31,0.024545
2014-03-31,84.192609
2014-04-30,193.904333
2014-05-31,337.471613
2014-06-30,363.214333
2014-07-31,326.53
2014-08-31,211.609032
2014-09-30,92.022333
2014-10-31,22.538065
2014-11-30,0.477667


In [228]:
display()

***Test Suite***

In [396]:
# Excluding NAN's for the calculation
col_list =  x.iloc[:,2].dropna().values.tolist()

x['Percentile'] = x.apply(lambda row : stats.percentileofscore(col_list, row[2], kind = "mean"), axis=1)


In [397]:
x

Unnamed: 0,file,relative_humidity_1,year,Percentile
0,Swiss Camp 10m,,1991,1.724138
1,Swiss Camp 10m,,1992,5.172414
2,Swiss Camp 10m,,1993,8.62069
3,Swiss Camp 10m,,1994,12.068966
4,Swiss Camp 10m,,1995,15.517241
5,Swiss Camp 10m,,1996,18.965517
6,Swiss Camp 10m,,1997,22.413793
7,Swiss Camp 10m,,1998,25.862069
8,Swiss Camp 10m,66.55,1999,29.310345
9,Swiss Camp 10m,89.52,2000,32.758621


In [398]:
# Using plotly.express
import plotly.express as px

#fig = px.line(x, x='Datetime', y="relative_humidity_1")
#fig.show()
fig = go.Figure([go.Scatter(x=x['year'], y=x['relative_humidity_1'])])
fig.show()

In [406]:
def report(data, date, station, variable, scope):
    """
    Function to return the percentile of specified values given a selected date.
    
    Parameters:
    df (pd.DataFrame): The dataframe to be used.
    date (str): The date of the observations for which the percentile is calculated.
    
    Returns:
    df (pd.DataFrame): A dataframe with all numerical columns and the percentiles of the values of the selected date.
    """
  
    # Load Data
    df = _load_data(data)
    
    # Split date input into year, month, day
    year = int(date[0:4])
    month = int(date[5:7])
    day = int(date[8:10])
  
    # subset df with date and find day of year and day variable
    date_df = df.loc[(df['Datetime'].dt.year == year) & (df['Datetime'].dt.month == month) & (df['Datetime'].dt.day == day)]
      # select day of year
    day =  date_df["DayOfYear"].mean()
    datetime = date_df["Datetime"].max()
  
    # select the specific day of century and the related values
    day_century = date_df["DayOfCentury"].mean() 
    day_century_value = pd.DataFrame(pd.DataFrame.mean(date_df)).reset_index()
    day_century_value = pd.pivot_table(day_century_value, index=None, columns=['index'], aggfunc=max)
  
    if scope == "relative":
        # group by calender day
        df = df.loc[df['DayOfYear'] == day]
    elif scope == "absolute":
        df = df
    else: 
        raise ValueError("Only 'relative' & 'absolute' are accepted input values")
  
    # Find the index with the specified date
    row_index = int(df[df['Datetime'] == date].index[0])
    
    # Remove columns that do not contain numerical values & Subset df based on measure selection
    if variable == "All":
        df = df.select_dtypes(include=['int', 'float']).copy()
    else:
        df = pd.DataFrame({variable: df[variable]})
        df = df.select_dtypes(include=['int', 'float']).copy()
    
    # Create an empty dictionary for the output
    percentile_dict = {}
    
    # Iterate through the columns
    for col in df.columns:
        # Find the percentile of the value in the specified row and date
        value = df[col][row_index]
        percentile = df[col].rank(pct=True)[df[col] == value].iloc[0]*100 if not math.isnan(value) else math.nan
    
        # Add the percentile to the dictionary
        percentile_dict[col] = percentile
    print('displaying percentile dict')
    print(percentile_dict)
    # Create a dataframe with the output
    x = pd.DataFrame(percentile_dict, index=[0])
    print('displaying percentile df')
    import prettytable as pt
  
    table = pt.PrettyTable()
    table.field_names = ["Measurement", "Percentile"]
  
    for col in x.columns:
        if x[col].values > 90 or x[col].values < 10:
            table.add_row([f"\033[1m{col}\033[0m", f"\033[1m{int(x[col].values) if not np.isnan(x[col].values) else 'NaN'}%\033[0m"])
        else:
            table.add_row([col, f"{int(x[col].values) if not np.isnan(x[col].values) else 'NaN'}%"])
  
    print(
    f" Date: {date} \n" ,
    f"Station: {station} \n" , 
    f"Measurement: {variable} \n"  , 
    f"Data: {data} \n" ,
    "----------------------------------------------------------------------------------------------------------------------\n",
    f"                   Climatology Report\n" 
    )
    print(table)
    print("----------------------------------------------------------------------------------------------------------------------\n")
    
    import plotly.express as px
    # Create a list of columns to be plotted
    x = day_century_value
    columns_to_plot = [col for col in x.columns.values if col in df.columns.values]
  
    # Create a list of values from x to be highlighted
    values_to_highlight = x[columns_to_plot].values.flatten().tolist()
  
    # Create a list of subplots
    figs = []
    # Loop through list of columns
    for col, v in zip(columns_to_plot, values_to_highlight) : 
        # Create a subplot for each column 
        fig = px.box(df[col], orientation = "v",boxmode='group')
        # Format the axes
        fig.update_layout(title_text= f"Boxplot for {col}", xaxis_title='', yaxis_title='')
        # Highlight the values from x
        fig.add_scatter(x=[col], y=[v], name= f"Selected Value \n{col}",
    mode = 'markers',
    marker_symbol = 'circle-dot',
    marker_size = 8,
    marker_color = 'red')
        
        # Add figure to list of subplots 
        figs.append(fig)
    
    # Output
    for fig in figs:
        fig.show()

In [405]:
report("Promice", "2022-01-12", "THU_L_day_v03", variable = "AirPressure(hPa)", scope = "relative")

displaying percentile dict
{'AirPressure(hPa)': 61.940298507462686}
displaying percentile df
 Date: 2022-01-12 
 Station: THU_L_day_v03 
 Measurement: AirPressure(hPa) 
 Data: Promice 
 ----------------------------------------------------------------------------------------------------------------------
                    Climatology Report

+------------------+------------+
|   Measurement    | Percentile |
+------------------+------------+
| AirPressure(hPa) |    61%     |
+------------------+------------+
----------------------------------------------------------------------------------------------------------------------



GC Net Data

In [None]:
gc = pd.read_parquet('data\df_daily.gzip', engine='pyarrow')

In [None]:
gc.columns

*Test on one station*

In [None]:
gc = gc[gc['station_name'] == "Humboldt"] 

Promice Data

In [None]:
pc = pd.read_parquet('data\promice_hourly.gzip', engine='pyarrow')

In [None]:
pc

*Test Suite*

In [None]:
# define variables

# *Mandatory: Data
data = "Promice"

# *Mandatory: Date
date = "22-07-2008"

# Optional: Station
#station = "SCO_L_hour_v03"
station = "THU_L_day_v03"

# Optional: Measure
y = "All"

In [None]:
# subset dataframe 
pc = pc[pc['file'] == station] 

In [None]:
# select dato 
year= 2022
month = 1
day = 12

# subset df with date and find day of year and day variable
date_df = pc.loc[(pc['Datetime'].dt.year == year) & (pc['Datetime'].dt.month == month) & (pc['Datetime'].dt.day == day)]
  # select day of year
day =  date_df["DayOfYear"].mean()
datetime = date_df["Datetime"].max()

# select the specific day of century and the related values
day_century = date_df["DayOfCentury"].mean() 
day_century_value = pd.DataFrame(pd.DataFrame.mean(date_df)).reset_index()
day_century_value = pd.pivot_table(day_century_value, index=None, columns=['index'], aggfunc=max)

# group by calender day
pc_group = pc.loc[pc['DayOfYear'] == day]

In [None]:
def get_percentile(df, date):
  """
  Function to return the percentile of specified values given a selected date.
  
  Parameters:
  df (pd.DataFrame): The dataframe to be used.
  date (str): The date of the observations for which the percentile is calculated.
  
  Returns:
  df (pd.DataFrame): A dataframe with all numerical columns and the percentiles of the values of the selected date.
  """

  # Find the index with the specified date
  row_index = int(df[df['Datetime'] == date].index[0])
  
  # Remove columns that do not contain numerical values & Subset df based on measure selection
  if y == "All":
    df = df.select_dtypes(include=['int', 'float']).copy()
  else:
    df = pd.DataFrame(df[y])
    df = df.select_dtypes(include=['int', 'float']).copy()
  
  # Create an empty dictionary for the output
  percentile_dict = {}
  
  # Iterate through the columns
  for col in df.columns:
    # Find the percentile of the value in the specified row and date
    value = df[col][row_index]
    if(math.isnan(value)): 
      percentile = math.nan
    else: 
      # calculate the percentile with df.rank()
      percentile = df[col].rank(pct=True)[df[col] == value].iloc[0]*100
    
    # Add the percentile to the dictionary
    #percentile_dict[col + "_percentile"] = percentile
    percentile_dict[col] = percentile
  
  
  # Create a dataframe with the output
  df_percentiles = pd.DataFrame(percentile_dict, index=[0])
  return df_percentiles

In [None]:
x = get_percentile(pc, "2022-01-12")

In [None]:
#x = x.append(day_century_value.iloc[0], ignore_index=True)

In [None]:
x = get_percentile(pc_group, "2022-01-12")

In [399]:
import prettytable as pt

table = pt.PrettyTable()
table.field_names = ["Measurement", "Percentile"]

for col in x.columns:
    if x[col].values > 90 or x[col].values < 10:
        table.add_row([f"\033[1m{col}\033[0m", f"\033[1m{int(x[col].values) if not np.isnan(x[col].values) else 'NaN'}% \033[0m"])
    else:
        table.add_row([col, f"{int(x[col].values) if not np.isnan(x[col].values) else 'NaN'}%"])


print(
f" Date: {date} \n" ,
f"Station: {station} \n" , 
f"Measurment: {y} \n"  , 
f"Data: {data} \n" ,
"-----------------------------------------------------------\n",
f"                   Climatology \n" 
)

print(table)

ModuleNotFoundError: No module named 'prettytable'

*Visual test suite*

In [None]:
############################ Boxplots #########################################
ff = pc 
x = ff.loc[ff['Datetime'] == "2022-01-12"]

import plotly.express as px

# Create a list of columns to be plotted
columns_to_plot = [col for col in x.columns.values if col in ff.columns.values]

# Create a list of values from x to be highlighted
values_to_highlight = x[columns_to_plot].values.flatten().tolist()

# Create a list of subplots
figs = []
# Loop through list of columns
for col, v in zip(columns_to_plot,values_to_highlight) : 
  # Create a subplot for each column 
  fig = px.box(ff[col], orientation = "v",boxmode='group')
  # Format the axes
  fig.update_layout(title_text= f"Boxplot for {col}", xaxis_title='', yaxis_title='')
  # Highlight the values from x
  fig.add_scatter(x=[col], y=[v], name= f"Selected Value \n{col}",
                        mode = 'markers',
                        marker_symbol = 'circle-dot',
                        marker_size = 8,
                        marker_color = 'red')
  
  # Add figure to list of subplots 
  figs.append(fig)

# Show the plots
for fig in figs:
  fig.show()

In [None]:
# A function that calculates the percentiles of every column and their values

def percentile_df(df):
    for col in (df.columns):
        df[f'{col}_pcta'] = df[col].rank(pct=True)
        #df[f'{col}_pcta'] = df[col].rank(pct=True)[df[col] == value] *100

    return df

In [None]:
gg = percentile_df(pc)
gg = gg[["Datetime", "AirTemperature(C)", "AirTemperature(C)_pcta"]]
y = "AirTemperature(C)"
y_pcta = "AirTemperature(C)_pcta"

In [None]:
fig = go.Figure([
    go.Scatter(
        name='Air Pressure (hPa)',
        x=gg['Datetime'],
        y=gg[y],
        mode='lines',
        line=dict(color='rgb(31, 119, 180)'),
    ),
    go.Scatter(
        name='Upper Bound (20-80)',
        x=gg['Datetime'],
        y=(gg[y] * (gg[y_pcta])),
        mode='lines',
        marker=dict(color="#00BB00"),
        line=dict(width=0),
        showlegend=False
    ),
    go.Scatter(
        name='Lower Bound (20-80)',
        x=gg['Datetime'],
        y=(gg[y] * (gg[y_pcta])),
        marker=dict(color="#00BB00"),
        line=dict(width=0),
        mode='lines',
        fillcolor='rgba(0, 187, 0, 0.3)',
        fill='tonexty',
        showlegend=False
    ),
    go.Scatter(
        name='Upper Bound (0-20 & 80-100)',
        x=gg['Datetime'],
        y=(gg[y] * (gg[y_pcta]  )),
        mode='lines',
        marker=dict(color="#BB0000"),
        line=dict(width=0),
        showlegend=False
    ),
    go.Scatter(
        name='Lower Bound (0-20 & 80-100)',
        x=gg['Datetime'],
        y=(gg[y] * (gg[y_pcta] )),
        marker=dict(color="#BB0000"),
        line=dict(width=0),
        mode='lines',
        fillcolor='rgba(187, 0, 0, 0.3)',
        fill='tonexty',
        showlegend=False
    )
])
fig.update_layout(
    yaxis_title='Air Pressure (hPa)',
    title='Continuous, variable value error bars',
    hovermode="x"
)
fig.show()

In [None]:
gg.loc[(gg['Datetime'].dt.month == 12) & (gg['Datetime'].dt.day == 31)]