# Plotter 3.0

In [123]:
import base64
import os, io
from datetime import date, datetime, timedelta
import pandas as pd
import seaborn as sns
from windrose import WindroseAxes
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
from PIL import Image
import numpy as np
import google.cloud.logging
from google.cloud import storage
import logging
from alive_progress import alive_bar

In [124]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../service_account.json'

### Create Date Range 

In [125]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ----- ----- ----- -----    CREATE DATERANGE    ----- ----- ----- -----
# Create list of all dates between start_date and end_date
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + timedelta(n)

# ======================================================================

### Truncate CMAP

In [126]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ----- ----- ----- -----   TRUNCATE COLORMAP    ----- ----- ----- -----
# Can be used to remove white / black bookended values
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
    new_cmap = colors.LinearSegmentedColormap.from_list(
          'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
          cmap(np.linspace(minval, maxval, n)))
    return new_cmap

# ======================================================================

### Import Weather Data

In [127]:
# -------------------------------------------------------------------------------
# IMPORT DATASET
# Function to import all weather data
# Data Patching completed in this step
# -------------------------------------------------------------------------------
def import_weather_data(storage_client):
    # Connect to Google Cloud Storage
    # -------------------------------
    storage_client = storage.Client()

    # Open Bucket
    # -----------
    bucket_name = 'weather_aurorabc'
    bucket = storage_client.bucket(bucket_name)

    # Open Blob
    # ---------
    blob_name = 'weather_ampm.csv'
    #blob_name = 'backups/weather-2023-09-26.csv'
    blob = bucket.blob(blob_name)

    # Read Weather Data from Blob
    # ---------------------------
    with blob.open("r") as f:
        weather_data = pd.read_csv(f)

    # Cast all data variables to float
    weather_data = weather_data.astype(
        {'Height':'float',
        'Temp':'float',
        'DewPoint':'float',
        'Relative_Humidity':'float',
        'Mean_Mixed_Layer':'float',
        'Wind_Direction':'float',
        'Wind_Speed':'float',
        'Potential_Temp':'float',
        'Equivalent_Potential_Temp':'float',
        'Virtual_Potential_Temp':'float',
        })

    return weather_data

# Plot Temps
---
Updated function to plot temperature graphs of varying data ranges for a range of dates for individual locations

In [128]:
def plot_temps(storage_client, location, dates, plot_range):

    for d in dates:
        for r in plot_range:
            
            # Set figure details
            # ~~~~~~~~~~~~~~~~~~
            plt.figure(figsize=(20,6))
            plt.xlim([0, r -1])
            plt.grid()
            plt.axhline(0, color='black')
            plt.xticks(rotation = 'vertical')
            plt.title("Temperature °C - {} day".format(r))
            if r > 30:
                plt.xticks(np.arange(0, r+1, 7.0))

            # Set color palette for the graph
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            palette = sns.color_palette("Paired", n_colors=6)
            palette_order = [1, 0, 3, 2, 5, 4]
            palette = [palette[idx] for idx in palette_order]
            hue_order = [850, 700]


            # Create new df containing only data within plot range
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            mask = (location['data']['Date'] >= str(d - timedelta(days=r))) & (location['data']['Date'] <= str(d))
            plot_data = location['data'].loc[mask]
            
            # Calculate IQR for column Temp
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            Q1 = plot_data['Temp'].quantile(0.10)
            Q3 = plot_data['Temp'].quantile(0.90)
            IQR = Q3 - Q1

            # Identify and remove outliers
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            threshold = 1.5
            outliers = plot_data[(plot_data['Temp'] < Q1 - threshold * IQR) | (plot_data['Temp'] > Q3 + threshold * IQR)]
            plot_data = plot_data.drop(outliers.index)
            
            # Skip plot if there are no numerical values within the subset
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            if plot_data.dropna(subset=['Temp']).empty:
                print("COULD NOT GENERATE GRAPH")
                print("Location: {}".format(location['name']))
                print("Date: {}".format(d))
                print("Plot Range: {}".format(r))
                print("Data: {}\n".format(plot_data))
                continue

            # Plot graph
            # ~~~~~~~~~~
            try:
                g = sns.lineplot(x='Date', y='Temp', hue='Pressure', data=plot_data, palette=palette[location['palette_order'][0]: location['palette_order'][1]], hue_order=hue_order)
            except Exception as e:
                print(e)
                print(plot_data)

            # Rename legend with detailed labels
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            l = plt.legend()
            labels = ['850', '700']
            for x, label in enumerate(labels):
                l.get_texts()[x].set_text(label)
                
            # Define save details
            # ~~~~~~~~~~~~~~~~~~~
            save_name = '{}_temp'.format(location['save_name'])
            save_loc = "./figures/temp/{}".format(d.strftime("%Y-%m-%d"))
            if not os.path.isdir(save_loc):
                os.makedirs(save_loc)
                
            # Publish graph to Google Cloud
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            bucket_name = 'weather_aurorabc'
            bucket = storage_client.bucket(bucket_name)

            buff = io.BytesIO()
            plt.savefig(buff, format='webp',dpi=150, bbox_inches="tight")

            blob_name = 'graphs_test/{}/{}_{}d_{}.webp'.format(d.strftime("%Y-%m-%d"), save_name, r, d.strftime("%Y-%m-%d"))
            blob = bucket.blob(blob_name)
            blob.upload_from_string(buff.getvalue(), content_type='image/png')
            plt.close()
            del buff

# Plot ALL Temps
---
Updated function to plot temperature graphs of varying data ranges for a range of dates for all locations on a single graph

In [129]:
def plot_all_temps(storage_client, location, dates, plot_range):
    
    for d in dates:
        for r in plot_range:
            
            # Set figure details
            # ~~~~~~~~~~~~~~~~~~
            plt.figure(figsize=(20,6))
            plt.xlim([0, r -1])
            plt.grid()
            plt.axhline(0, color='black')
            plt.xticks(rotation = 'vertical')
            plt.title("Temperature °C - {} day".format(r))
            if r > 30:
                plt.xticks(np.arange(0, r+1, 7.0))
                
            # Set color palette for the graph
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            palette = sns.color_palette("Paired", n_colors=6)
            palette_order = [1, 0, 3, 2, 5, 4]
            palette = [palette[idx] for idx in palette_order]
            hue_order = [850, 700]
            
            # Loop through each location and plot data on graph
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            for location in locations:
                # Create truncated data object with only correct date range
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                mask = (location['data']['Date'] >= str(d - timedelta(days=r))) & (location['data']['Date'] <= str(d))
                plot_data = location['data'].loc[mask]
                
                # Calculate IQR for column Temp
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                Q1 = plot_data['Temp'].quantile(0.10)
                Q3 = plot_data['Temp'].quantile(0.90)
                IQR = Q3 - Q1
                
                # Identify and remove outliers
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                threshold = 1.5
                outliers = plot_data[(plot_data['Temp'] < Q1 - threshold * IQR) | (plot_data['Temp'] > Q3 + threshold * IQR)]
                plot_data = plot_data.drop(outliers.index)
                
                # Skip plot if there are no numerical values within the subset
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                if plot_data.dropna(subset=['Temp']).empty:
                    print("COULD NOT GENERATE GRAPH")
                    print("Location: {}".format(location['name']))
                    print("Date: {}".format(d))
                    print("Plot Range: {}".format(r))
                    print("Data: {}\n".format(plot_data))
                    continue

                # Add data for single location to graph
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                g = sns.lineplot(x='Date', y='Temp', hue='Pressure', data=plot_data, palette=palette[location['palette_order'][0]: location['palette_order'][1]], hue_order=hue_order)
                
            
            # Rename legend with detailed labels
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            l = plt.legend()
            labels = []
            for location in locations:
                for pressure in [700, 850]:
                    labels.append('{} {}'.format(location['name'], pressure))
                    
            labels = [labels[idx] for idx in palette_order]
            for x, label in enumerate(labels):
                l.get_texts()[x].set_text(label)
                
            
            # Generate Save Path
            # ~~~~~~~~~~~~~~~~~~
            save_name = 'all_temp'
            save_loc = "./figures/wind/{}".format(d)
            if not os.path.isdir(save_loc):
                os.makedirs(save_loc)
                
                
            # Publish graph to Google Cloud
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            bucket_name = 'weather_aurorabc'
            bucket = storage_client.bucket(bucket_name)

            buff = io.BytesIO()
            plt.savefig(buff, format='webp',dpi=150, bbox_inches="tight")

            blob_name = 'graphs_test/{}/{}_{}d_{}.webp'.format(d.strftime("%Y-%m-%d"), save_name, r, d.strftime("%Y-%m-%d"))
            blob = bucket.blob(blob_name)
            blob.upload_from_string(buff.getvalue(), content_type='image/png')
            plt.close()
            del buff

# Plot Wind
---
Updated function to plot wind graphs of varying data ranges for a range of dates for individual locations

In [130]:
def plot_wind(storage_client, location, dates, plot_range):
    
    # Define color map and truncate to apropriate range
    cmap = cm.viridis_r
    cmap = truncate_colormap(cmap, minval=0, maxval=0.9)
    
    for d in dates:
        for r in plot_range:
            
            # Create new df containing only data within plot range
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            mask = (location['data']['Date'] >= str(d - timedelta(days=r))) & (location['data']['Date'] <= str(d))
            plot_data = location['data'].loc[mask]
            
            # Calculate IQR for column Temp
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            Q1 = plot_data['Wind_Speed'].quantile(0.05)
            Q3 = plot_data['Wind_Speed'].quantile(0.95)
            IQR = Q3 - Q1

            # Identify and remove outliers
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            threshold = 1.5
            outliers = plot_data[(plot_data['Wind_Speed'] < Q1 - threshold * IQR) | (plot_data['Wind_Speed'] > Q3 + threshold * IQR)]
            plot_data = plot_data.drop(outliers.index)
            plot_data = plot_data.dropna(subset=['Wind_Speed'])
            
            # Skip plot if there are no numerical values within the subset
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            if plot_data.empty:
                print("COULD NOT GENERATE GRAPH")
                print("Location: {}".format(location['name']))
                print("Date: {}".format(d))
                print("Plot Range: {}".format(r))
                print("Data: {}\n".format(plot_data))
                continue
                
            # Populate speed and direction vars from DataFrame
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  
            wd = plot_data['Wind_Direction'].to_list()
            ws = plot_data['Wind_Speed'].to_list()


            # Plot wind rose for data set
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
            ax = WindroseAxes.from_ax()
            #ax.contourf(wd, ws, bins=np.arange(0, 100, 10), cmap=cmap)
            ax.contourf(wd, ws, cmap=cmap)
            ax.set_legend()
            
            # Save figure
            # ~~~~~~~~~~~
            plt.title("{} Wind - {} day".format(location['name'], r))
            plt.legend(title="Wind Speed (km/h)", loc=4, fontsize='small', fancybox=True)
            
            # Generate Save Path
            # ~~~~~~~~~~~~~~~~~~
            save_name = '{}_wind'.format(location['name'].lower().replace(' ', ''))
            save_loc = "./figures/wind/{}".format(d)
            
            if not os.path.isdir(save_loc):
                os.makedirs(save_loc)

            # Save image as webp
            # ~~~~~~~~~~~~~~~~~~
            path = '{}/{}_{}d_{}'.format(save_loc, save_name, r, d)
            plt.savefig('{}.webp'.format(path), format='webp',dpi=150)
            
            # Publish graph to Google Cloud
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            bucket_name = 'weather_aurorabc'
            bucket = storage_client.bucket(bucket_name)

            buff = io.BytesIO()
            plt.savefig(buff, format='webp',dpi=150, bbox_inches="tight")

            blob_name = 'graphs_test/{}/{}_{}d_{}.webp'.format(d, save_name, r, d)
            blob = bucket.blob(blob_name)
            blob.upload_from_string(buff.getvalue(), content_type='image/png')
            plt.close()
            del buff

### Main Application Body

In [131]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 
# ----- ----- ----- -----          START         ----- ----- ----- -----
#  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

def main():
    # Collect weather data
    # ~~~~~~~~~~~~~~~~~~~~
    storage_client = storage.Client()
    weather_data = import_weather_data(storage_client)

    # Create Date Range
    # ~~~~~~~~~~~~~~~~~
    start_date = datetime(2020, 1, 1)
    end_date = datetime.today()
    dates = []
    for single_date in daterange(start_date, end_date):
        dates.append(single_date.date())
        
    # Define plot ranges
    # ~~~~~~~~~~~~~~~~~~
    plot_range = [3, 5, 10, 30, 90, 180]
    
    # Define location specific params
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    locations = [{
            'name': "Vernon",
            'palette_order': [2, 4], # green
            'save_name': "vernon",
            'data': weather_data[weather_data['Station'] == "Vernon"]
        },
        {
            'name': "Port Hardy",
            'palette_order': [4, 6], # red
            'save_name': "porthardy",
            'data': weather_data[weather_data['Station'] == "Port Hardy"]
        },
        {
            'name': "Quillayute",
            'palette_order': [0, 2], # blue
            'save_name': "quillayute",
            'data': weather_data[weather_data['Station'] == "Quillayute"]
        }]
    
    with alive_bar(len(dates), force_tty=True) as bar:
        for date in dates:
            print('{} --------------------'.format(date))
            print("Plotting ALL Temps")
            plot_all_temps(storage_client, locations, [date], plot_range)

            for location in locations:

                print("Plotting Wind: {}".format(location['name']))
                plot_wind(storage_client, location, [date], plot_range)

                print("Plotting Temp: {}\n".format(location['name']))
                plot_temps(storage_client, location, [date], plot_range)
            bar()
        
    
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 
# ----- ----- ----- -----           END          ----- ----- ----- -----
#  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
main()

on 0: 2020-01-01 --------------------                                                                                   
on 0: Plotting ALL Temps                                                                                                
on 0: Plotting Wind: Vernon                                                                                             
on 0: Plotting Temp: Vernon                                                                                             
on 0: Plotting Wind: Port Hardy                                                                                         
on 0: Plotting Temp: Port Hardy                                                                                         
on 0: Plotting Wind: Quillayute                                                                                         
on 0: Plotting Temp: Quillayute                                                                                         
on 1: 2020-01-02 ---------------

on 8: Plotting Temp: Vernon                                                                                             
on 8: Plotting Wind: Port Hardy                                                                                         
on 8: Plotting Temp: Port Hardy                                                                                         
on 8: Plotting Wind: Quillayute                                                                                         
on 8: Plotting Temp: Quillayute                                                                                         
on 9: 2020-01-10 --------------------                                                                                   
on 9: Plotting ALL Temps                                                                                                
on 9: Plotting Wind: Vernon                                                                                             
on 9: Plotting Temp: Vernon     

on 16: Plotting Wind: Quillayute                                                                                        
on 16: Plotting Temp: Quillayute                                                                                        
on 17: 2020-01-18 --------------------                                                                                  
on 17: Plotting ALL Temps                                                                                               
on 17: Plotting Wind: Vernon                                                                                            
on 17: Plotting Temp: Vernon                                                                                            
on 17: Plotting Wind: Port Hardy                                                                                        
on 17: Plotting Temp: Port Hardy                                                                                        
on 17: Plotting Wind: Quillayute

on 25: Plotting ALL Temps                                                                                               
on 25: Plotting Wind: Vernon                                                                                            
on 25: Plotting Temp: Vernon                                                                                            
on 25: Plotting Wind: Port Hardy                                                                                        
on 25: Plotting Temp: Port Hardy                                                                                        
on 25: Plotting Wind: Quillayute                                                                                        
on 25: Plotting Temp: Quillayute                                                                                        
on 26: 2020-01-27 --------------------                                                                                  
on 26: Plotting ALL Temps       

on 33: Plotting Wind: Port Hardy                                                                                        
on 33: Plotting Temp: Port Hardy                                                                                        
on 33: Plotting Wind: Quillayute                                                                                        
on 33: Plotting Temp: Quillayute                                                                                        
on 34: 2020-02-04 --------------------                                                                                  
on 34: Plotting ALL Temps                                                                                               
on 34: Plotting Wind: Vernon                                                                                            
on 34: Plotting Temp: Vernon                                                                                            
on 34: Plotting Wind: Port Hardy

on 41: Plotting Temp: Quillayute                                                                                        
on 42: 2020-02-12 --------------------                                                                                  
on 42: Plotting ALL Temps                                                                                               
on 42: Plotting Wind: Vernon                                                                                            
on 42: Plotting Temp: Vernon                                                                                            
on 42: Plotting Wind: Port Hardy                                                                                        
on 42: Plotting Temp: Port Hardy                                                                                        
on 42: Plotting Wind: Quillayute                                                                                        
on 42: Plotting Temp: Quillayute

on 50: Plotting Wind: Vernon                                                                                            
on 50: Plotting Temp: Vernon                                                                                            
on 50: Plotting Wind: Port Hardy                                                                                        
on 50: Plotting Temp: Port Hardy                                                                                        
on 50: Plotting Wind: Quillayute                                                                                        
on 50: Plotting Temp: Quillayute                                                                                        
on 51: 2020-02-21 --------------------                                                                                  
on 51: Plotting ALL Temps                                                                                               
on 51: Plotting Wind: Vernon    

on 58: Plotting Temp: Port Hardy                                                                                        
on 58: Plotting Wind: Quillayute                                                                                        
on 58: Plotting Temp: Quillayute                                                                                        
on 59: 2020-02-29 --------------------                                                                                  
on 59: Plotting ALL Temps                                                                                               
on 59: Plotting Wind: Vernon                                                                                            
on 59: Plotting Temp: Vernon                                                                                            
on 59: Plotting Wind: Port Hardy                                                                                        
on 59: Plotting Temp: Port Hardy

In [None]:
# Create function to find missing graphs
# Create function to plot all temps together
# Create function to plot wind --- DONE

In [112]:
# Plotting Temps Error
# missing data is removed and therefore so are x axis entries
# can this wait until other issues are resolved?