# Part I: Importing Libraries and retrieval/download of files from GovCanada webpage

* *Part I running time: ~20 mins*

##  Import libraries

In [1]:
# Data analysis 
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Plotly libraries
import fastparquet
import pyarrow

# Web scraping and file retrieval
import os
import glob
import datetime

# Database processing
import sqlite3
import dask.dataframe as dd

# Visualization and geo-data imports
import geopandas as gpd
import chart_studio.plotly as py
import plotly.graph_objects as go

# Offline mode
import plotly.offline as py
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

# Hyperlink / Web display
from IPython.core.display import display, HTML

# Formatting options
pd.options.display.float_format = '{:,.3f}'.format
np.set_printoptions(precision=2)

print('Library imports complete!')

Library imports complete!


## Download Hydrometric Station Location data from Hydat.sqlite3 database

* *Select STATIONS where HYD_STATUS = A (Active)*
* *Ignore STATIONS where HYD_STATUS = D (Deactivated/Inactive)*
* *Export the database table to a .csv file*
* *Check the file DataFrame file to see that it works!*
* *Close the connection*

In [2]:
os.chdir(r'C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/sqlite3')

# Create connection to Hydat database
con = sqlite3.connect("Hydat.sqlite3")
cursor = con.cursor()

#all_database_stuff = cursor.execute("SELECT * FROM sqlite_master").fetchall()
df_stations = pd.read_sql_query('SELECT * from STATIONS where HYD_STATUS = "A"', con) #A = active stations

# Verify that result of SQL query is stored in the dataframe
df_stations.rename(columns={df_stations.columns[0]: "ID"}, inplace=True)
df_stations.rename(columns={df_stations.columns[2]: "PROV_TERR"}, inplace=True)
df_stations.rename(columns={df_stations.columns[8]: "DRAINAGE_AREA_GROSS_KM2"}, inplace=True)

df_stations['PROV_TERR_ID'] = df_stations['PROV_TERR'] + df_stations['ID']

# Drop extra columns 
df_stations.drop(df_stations.loc[:, 'DRAINAGE_AREA_EFFECT': 'DATUM_ID'], inplace=True, axis=1)
df_stations.drop(labels = ['SED_STATUS'], inplace = True, axis = 1)

print(df_stations.info())

df_stations.to_csv('Hydat.csv')

# Close the connection
con.close()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2786 entries, 0 to 2785
Data columns (total 9 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   ID                       2786 non-null   object 
 1   STATION_NAME             2786 non-null   object 
 2   PROV_TERR                2786 non-null   object 
 3   REGIONAL_OFFICE_ID       2786 non-null   object 
 4   HYD_STATUS               2786 non-null   object 
 5   LATITUDE                 2786 non-null   float64
 6   LONGITUDE                2786 non-null   float64
 7   DRAINAGE_AREA_GROSS_KM2  2406 non-null   float64
 8   PROV_TERR_ID             2786 non-null   object 
dtypes: float64(3), object(6)
memory usage: 196.0+ KB
None


## Download multiple .csv files from Hydrometric .url directory using wget

* *Hyperlink:* <a href="https://dd.weather.gc.ca/hydrometric/csv/">[https://dd.weather.gc.ca/hydrometric/csv/](https://dd.weather.gc.ca/hydrometric/csv/)</a>

* *Downloading time: ~4 mins*

* *All provincial files for download:*

    "AB_daily_hydrometric.csv, BC_daily_hydrometric.csv, SK_daily_hydrometric.csv, /
    MB_daily_hydrometric.csv, ON_daily_hydrometric.csv, QC_daily_hydrometric.csv, /
    NB_daily_hydrometric.csv, NS_daily_hydrometric.csv, PE_daily_hydrometric.csv, /
    NL_daily_hydrometric.csv, NT_daily_hydrometric.csv, NU_daily_hydrometric.csv, /
    YT_daily_hydrometric.csv"

In [3]:
os.chdir(r"C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/CanadaDaily")

# Download hydrometric daily sub directories fast:
!wget --wait=2 -r -np -nH -nd -e robots=off --cut-dirs=3 --reject "index.html*" --accept "AB_daily_hydrometric.csv, BC_daily_hydrometric.csv, SK_daily_hydrometric.csv, MB_daily_hydrometric.csv, ON_daily_hydrometric.csv, QC_daily_hydrometric.csv, NB_daily_hydrometric.csv, NS_daily_hydrometric.csv, PE_daily_hydrometric.csv, NL_daily_hydrometric.csv, NT_daily_hydrometric.csv, NU_daily_hydrometric.csv, YT_daily_hydrometric.csv" https://dd.weather.gc.ca/hydrometric/csv/ --no-check-certificate
    
# Template for file downloads
"""
"AB_daily_hydrometric.csv, BC_daily_hydrometric.csv, SK_daily_hydrometric.csv, /
MB_daily_hydrometric.csv, ON_daily_hydrometric.csv, QC_daily_hydrometric.csv, /
NB_daily_hydrometric.csv, NS_daily_hydrometric.csv, PE_daily_hydrometric.csv, /
NL_daily_hydrometric.csv, NT_daily_hydrometric.csv, NU_daily_hydrometric.csv, /
YT_daily_hydrometric.csv"

"""
print("Completed downloads!")

Completed downloads!


--2022-01-23 20:50:43--  https://dd.weather.gc.ca/hydrometric/csv/
Resolving dd.weather.gc.ca (dd.weather.gc.ca)... 205.189.10.47
Connecting to dd.weather.gc.ca (dd.weather.gc.ca)|205.189.10.47|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1935 (1.9K) [text/html]
Saving to: 'index.html.tmp'

     0K .                                                     100% 33.0M=0s

2022-01-23 20:50:44 (33.0 MB/s) - 'index.html.tmp' saved [1935/1935]

Removing index.html.tmp since it should be rejected.

--2022-01-23 20:50:46--  https://dd.weather.gc.ca/hydrometric/csv/?C=N;O=D
Connecting to dd.weather.gc.ca (dd.weather.gc.ca)|205.189.10.47|:443... connected.
Unable to establish SSL connection.
--2022-01-23 20:50:48--  https://dd.weather.gc.ca/hydrometric/csv/?C=M;O=A
Connecting to dd.weather.gc.ca (dd.weather.gc.ca)|205.189.10.47|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1935 (1.9K) [text/html]
Saving to: 'index.html@C=M;O=A.tmp'

     0K . 

# Part II: Concatenate (Combine) all provincial DAILY .csv files into one DAILY .csv file

* *Runtime ~4 mins*

In [4]:
ext = '.csv'
fnames = [i for i in glob.glob('*{}'.format(ext))]
print(fnames)
print('/n')

# Combine all files in the list and export as .csv
print('Concatenating all provincial daily hydrometric files into national file..\n')

dtypes = {" ID": str,
          "Date": str, 
          "Water Level / Niveau d'eau (m)": np.float32, 
          "Discharge / Débit (cms)": np.float32}
          
cols = list(dtypes.keys())

path = r"C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/CanadaDaily"
df_daily = pd.concat((pd.read_csv(f, usecols=cols, dtype=dtypes, 
                                  sep=',', low_memory=False) for f in fnames))

# Cleanup new file (rename and fill N/A values)
column_indices = [0, 1, 2, 3]
new_names = ["ID", "DATE", "WATER_LEVEL_M", "FLOWRATE_CMS"]
old_names = df_daily.columns[column_indices]
df_daily.rename(columns=dict(zip(old_names, new_names)), inplace=True)

# Fill N/A values
df_daily.fillna(value='', inplace=True)

# Merge the DAILY DataFrame with the HYDRO STATION DATABASE DataFrame
df_daily = pd.merge(df_daily, df_stations, on='ID')

# Convert numeric columns to float32
cols = ["WATER_LEVEL_M", "FLOWRATE_CMS", "LATITUDE", "LONGITUDE", "DRAINAGE_AREA_GROSS_KM2"]
df_daily[cols] = pd.to_numeric(df_daily[cols].stack(), errors='coerce', downcast='float').unstack()

df_daily['STATION'] = df_daily['PROV_TERR_ID'] + "_" + df_daily['STATION_NAME']

# Replace blank spaces with _
df_daily['STATION'] = df_daily['STATION'].str.replace(' ', '_')
df_daily['STATION'] = df_daily['STATION'].str.replace('//', '_')

# Drop unneeded columns
df_daily.drop(columns=['PROV_TERR_ID', 'REGIONAL_OFFICE_ID', 'HYD_STATUS', 'STATION_NAME'], inplace=True)

df_daily.info()

Length: 753 [text/html]
Saving to: 'index.html.tmp'

     0K                                                       100% 21.8M=0s

2022-01-23 20:51:10 (21.8 MB/s) - 'index.html.tmp' saved [753/753]

Removing index.html.tmp since it should be rejected.

--2022-01-23 20:51:12--  https://dd.weather.gc.ca/hydrometric/csv/ON/
Connecting to dd.weather.gc.ca (dd.weather.gc.ca)|205.189.10.47|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 753 [text/html]
Saving to: 'index.html.tmp'

     0K                                                       100% 19.1M=0s

2022-01-23 20:51:12 (19.1 MB/s) - 'index.html.tmp' saved [753/753]

Removing index.html.tmp since it should be rejected.

--2022-01-23 20:51:14--  https://dd.weather.gc.ca/hydrometric/csv/PE/
Connecting to dd.weather.gc.ca (dd.weather.gc.ca)|205.189.10.47|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 753 [text/html]
Saving to: 'index.html.tmp'

     0K                                    

['AB_daily_hydrometric.csv', 'BC_daily_hydrometric.csv', 'NL_daily_hydrometric.csv', 'NT_daily_hydrometric.csv', 'NU_daily_hydrometric.csv', 'ON_daily_hydrometric.csv', 'QC_daily_hydrometric.csv', 'SK_daily_hydrometric.csv']


Concatenating all provincial daily hydrometric files into national file..



HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: 'index.html@C=N;O=D.tmp'

     0K .......... .......... ..........                        288K=0.1s

2022-01-23 20:54:20 (288 KB/s) - 'index.html@C=N;O=D.tmp' saved [31438]

Removing index.html@C=N;O=D.tmp since it should be rejected.

--2022-01-23 20:54:22--  https://dd.weather.gc.ca/hydrometric/csv/AB/daily/?C=M;O=A
Connecting to dd.weather.gc.ca (dd.weather.gc.ca)|205.189.10.47|:443... connected.
Unable to establish SSL connection.
--2022-01-23 20:54:24--  https://dd.weather.gc.ca/hydrometric/csv/AB/daily/?C=S;O=A
Connecting to dd.weather.gc.ca (dd.weather.gc.ca)|205.189.10.47|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: 'index.html@C=S;O=A.tmp'

     0K .......... .......... ..........                        529K=0.06s

2022-01-23 20:54:25 (529 KB/s) - 'index.html@C=S;O=A.tmp' saved [31438]

Removing index.html@C=S;O=A.tmp since i

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11125080 entries, 0 to 11125079
Data columns (total 9 columns):
 #   Column                   Dtype  
---  ------                   -----  
 0   ID                       object 
 1   DATE                     object 
 2   WATER_LEVEL_M            float32
 3   FLOWRATE_CMS             float32
 4   PROV_TERR                object 
 5   LATITUDE                 float32
 6   LONGITUDE                float32
 7   DRAINAGE_AREA_GROSS_KM2  float32
 8   STATION                  object 
dtypes: float32(5), object(4)
memory usage: 894.6+ MB


# Part III: Create and Clean Master DataFrame

## Create or Clean Master DataFrame (CAN_daily_hydrometric_master.csv)

- Read in the MASTER df using Dask
- Concat both DataFrames on the DATE column. This adds the newest dates to the MASTER df.
- Convert all numeric values to float32 and convert DATE column to datetime64[ns, utc] format.
- Compare the lengths of the DAILY df and the MASTER df
- Export the new MASTER df to .csv and overwrite the old MASTER .csv file

**Notes:**
     
* *The .csv file will continue to grow and is easiest to process using Dask for in-memory processing
  as processing memory above 16GB can be selected*

* *To save time and money, multiple VMs (Virtual Machine) such as Amazon EC2 or Microsoft Azure VM
  are the fastest method to plug into big computing power. Only consider this for very large real-time 
  datasets as it becomes pricey depending on the size of the datasets (GB, TB, PB, etc.)*
 
* *Runtime ~ 6 mins*

In [5]:
import os.path

# If Master file exists in filepath..
if os.path.isfile(r'C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/CanadaDaily/og/CAN_daily_hydrometric_master.parquet'):
    
    print("File exists! Merge Daily DataFrame with Master DataFrame..")
    
    os.chdir(r'C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/CanadaDaily/og/')

    f = r'CAN_daily_hydrometric_master.parquet'

    # Import the new MASTER.csv as a DataFrmae and follow same cleaning process as above.
    df_master = pd.read_parquet(f, engine='pyarrow') 

    # Replace all NaN (null and N/A) numeric columns with np.NaN value
    float_cols_na = ['WATER_LEVEL_M', 
                     'FLOWRATE_CMS', 
                     'LATITUDE', 
                     'LONGITUDE', 
                     'DRAINAGE_AREA_GROSS_KM2']
    
    df_master[float_cols_na] = df_master[float_cols_na].replace({'NaN': np.nan})

    # Drop all null rows
    df_master.dropna(axis=0, inplace=True)

    print('Merge master and daily DataFrames. Add newest DATE rows to the master DataFrame..')
    df_final = pd.concat([df_master, df_daily], ignore_index=True)
    df_final.info()
    
    df_final['DATE'] = pd.to_datetime(df_final['DATE'], errors='coerce', utc=True)
    
    keep_cols = ['ID',
                'DATE',
                'WATER_LEVEL_M',
                'FLOWRATE_CMS',
                'PROV_TERR',
                'LATITUDE',
                'LONGITUDE',
                'DRAINAGE_AREA_GROSS_KM2',
                'STATION']
    
    # Drop unneeded columns
    df_final.drop(columns=[col for col in df_final if col not in keep_cols], inplace=True)
        
    df_final.reset_index(inplace=True, drop=True)
    df_final.info()
    
    print('Export Master DataFrame to .parq format..')
    
    master_csv_file = r'CAN_daily_hydrometric_master.parquet'
    df_final.to_parquet(master_csv_file)
    
    len_df_daily = len(df_daily)
    len_df_final = len(df_final)

    print(f'Daily DataFrame length: {len_df_daily} \n')
    print(f'Master DataFrame length: {len_df_final} \n')
    
else:  
        
    print('CAN_daily_hydrometric_master file does not exist! Create Master .csv file...')
    
    # Replace all NaN (null and N/A) numeric columns with np.NaN value
    float_cols = ["FLOWRATE_CMS", 
                  "WATER_LEVEL_M", 
                  "DRAINAGE_AREA_GROSS_KM2", 
                  "LATITUDE", 
                  "LONGITUDE"]

    df_daily[float_cols] = df_daily[float_cols].replace({'NaN': np.nan})

    # Drop all null rows
    df_daily.dropna(axis=0, inplace=True)
    
    keep_cols = ['ID',
                'DATE',
                'WATER_LEVEL_M',
                'FLOWRATE_CMS',
                'PROV_TERR',
                'LATITUDE',
                'LONGITUDE',
                'DRAINAGE_AREA_GROSS_KM2',
                'STATION']
    
    df_daily.drop(columns=[col for col in df_daily if col not in keep_cols], inplace=True)
    
    df_daily.reset_index(inplace=True, drop=True)
    df_daily.info()
        
    print("Master DataFrame and master .csv file created!")
    
    # Export DataFrame to .csv (named master)
    os.chdir(r'C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/CanadaDaily/og')

    # Create new MASTER.csv file from DAILY DataFrame
    master_csv_file = r"CAN_daily_hydrometric_master.parquet"
    df_daily.to_parquet(master_csv_file, engine='pyarrow')

File exists! Merge Daily DataFrame with Master DataFrame..
Merge master and daily DataFrames. Add newest DATE rows to the master DataFrame..
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34367608 entries, 0 to 34367607
Data columns (total 9 columns):
 #   Column                   Dtype  
---  ------                   -----  
 0   ID                       object 
 1   DATE                     object 
 2   WATER_LEVEL_M            float32
 3   FLOWRATE_CMS             float32
 4   PROV_TERR                object 
 5   LATITUDE                 float32
 6   LONGITUDE                float32
 7   DRAINAGE_AREA_GROSS_KM2  float32
 8   STATION                  object 
dtypes: float32(5), object(4)
memory usage: 1.7+ GB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34367608 entries, 0 to 34367607
Data columns (total 9 columns):
 #   Column                   Dtype              
---  ------                   -----              
 0   ID                       object             
 1   DATE  

## Delete individual provincial .csv files in CanadaDaily directory   

In [6]:
import os
import glob
import time

# Delete all provincial .csv files
for CleanUp in glob.glob(r'C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/CanadaDaily/*.*'):
    print(f'Files in CanadaDaily folder: {CleanUp}')
    
    if CleanUp.endswith('.csv'):    
        os.remove(CleanUp)

Files in CanadaDaily folder: C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/CanadaDaily\AB_daily_hydrometric.csv
Files in CanadaDaily folder: C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/CanadaDaily\BC_daily_hydrometric.csv
Files in CanadaDaily folder: C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/CanadaDaily\NL_daily_hydrometric.csv
Files in CanadaDaily folder: C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/CanadaDaily\NT_daily_hydrometric.csv
Files in CanadaDaily folder: C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/CanadaDaily\NU_daily_hydrometric.csv
Files in CanadaDaily folder: C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/CanadaDaily\ON_daily_hydrometric.csv
Files in CanadaDaily folder: C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/CanadaDaily\QC_daily_hydrometric.csv
Files in CanadaDaily folder: C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/CanadaDaily\SK_daily_hydrometric.csv


## List of final files

**Files:**

* **Canadian Daily Hydrometric Stations: CAN_daily_hydrometric.csv**
   
   * *Location: C:/Users/pdudar/anaconda3/projects/CanadaWatQual/CanadaDaily*         
  
   
* **Canadian Daily Hydrometric Stations (MASTER): CAN_daily_hydrometric.master.csv**
   
   * *Location: C:/Users/pdudar/anaconda3/projects/CanadaWatQual/CanadaDaily/og*

# Part IV: Plotly Chart generation

## Create Hydrometric Daily Plotly Time Series Charts

* *Create .html charts for all Hydrometric Stations across Canada*
* *Discharge/Flow Rate (cms) is displayed on LEFT side of chart*
* *Water Level (m) is displayed on RIGHT side of chart*

In [9]:
master_file_path = r'C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/CanadaDaily/og/CAN_daily_hydrometric_master.parquet'

if os.path.isfile(master_file_path):
    
    os.chdir(r'C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/plotly')

    f = r'CAN_daily_hydrometric_master.parquet'
    df_master = pd.read_parquet(f, engine='fastparquet')

    d_plotly = dict(tuple(df_master.groupby(['STATION'])))

    k = 1
    l = 0
    len_stations = len(df_master.STATION.unique())

    convert = float(3.0 * 0.0167) # Seconds to minutes
    chart_time = convert * len_stations

    print(f'Number of charts to create: {len_stations}')
    print(f'Chart processing time: {chart_time} minutes')

    ##### ----- Make Plotly Time Series Charts ----- #####
    from plotly import __version__ 
    import cufflinks as cf 
    from plotly.offline import init_notebook_mode, iplot 

    import plotly.graph_objects as go
    import plotly.express as px
    from plotly.subplots import make_subplots

    # Create figure with secondary y-axis
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    # to get the connection 
    init_notebook_mode(connected = True) 
 
    cf.go_offline 

    print('Creating Plotly charts.. \n')

    d_plotly = dict(tuple(df_master.groupby(['STATION'])))

    k = 1
    l = 0
    len_stations = len(df_master.STATION.unique())

    start = datetime.datetime.now()

    for key in d_plotly.keys():
        d_plotly[key] = df_master[:][df_master.STATION == key]
        df_export_plotly = d_plotly[key]

        # Create figure with secondary y-axis
        subfig = make_subplots(specs=[[{"secondary_y": True}]])

        # Generate a Plotly TimeSeries chart for each monitoring station
        fig = px.line(df_export_plotly, 
                        x =  'DATE',
                        y = 'FLOWRATE_CMS',
                        title=key + ': ' + 'Canada Hydrometric Flow Data - Time Series Info',
                        hover_name = 'STATION', 
                        hover_data = ['FLOWRATE_CMS',
                                      'LATITUDE',
                                      'LONGITUDE',
                                      'DRAINAGE_AREA_GROSS_KM2']         
                        )

        fig2 = px.line(df_export_plotly, 
                        x='DATE',
                        y='WATER_LEVEL_M', 
                        hover_name = 'STATION', 
                        hover_data = ['WATER_LEVEL_M',
                                      'LATITUDE',
                                      'LONGITUDE',
                                      'DRAINAGE_AREA_GROSS_KM2']
                                   
                        )
           
        fig2.update_traces(yaxis="y2")

        subfig.add_traces(fig.data + fig2.data)
        subfig.layout.xaxis.title="Date"
        subfig.layout.yaxis.title="Flowrate (cms)"
        subfig.layout.yaxis2.type="linear"
        subfig.layout.yaxis2.title="Water Level (m)"

        # recoloring is necessary otherwise lines from fig und fig2 would share each color
        # e.g. Linear-, Log- = blue; Linear+, Log+ = red... we don't want this
        subfig.for_each_trace(lambda t: t.update(line=dict(color=t.marker.color))) 

        # Add figure title
        subfig.update_layout(
            title_text= key + ': ' + 'Canada Hydrometric Flow Data - Time Series Info'
        )

        # Set y-axes titles
        subfig.update_yaxes(title_text="<b>Flow Rate (cms)</b>", secondary_y=False)
        subfig.update_yaxes(title_text="<b>Water Level (m)</b>", secondary_y=True)

        subfig.update_xaxes(
            rangeslider_visible=True,
            rangeselector=dict(
                buttons=list([
                    dict(count=1, label="1DAY", step="day", stepmode="backward"),
                    dict(count=7, label="1WK", step="day", stepmode="backward"),
                    dict(count=14, label="2WK", step="day", stepmode="backward"),
                    dict(count=1, label="1MNTH", step="month", stepmode="backward"),
                    dict(count=3, label="3MNTH", step="month", stepmode="backward"),
                    dict(count=6, label="6MNTH", step="month", stepmode="backward"),
                    dict(count=1, label="YTD", step="year", stepmode="todate"),
                    dict(count=1, label="1YR", step="year", stepmode="backward"),
                    dict(step="all")
                ])
            )
        )
        
        # Chart options

        subfig.update_layout(plot_bgcolor = "RGB(45,45,48)")  	#2d2d30
        subfig.update_layout(paper_bgcolor = "RGB(37,37,38)") #252526
        subfig.update_layout(
        font_color="RGB(131,148,150)",  	#839496
        title_font_color="RGB(131,148,150)",  	#839496
        legend_title_font_color="RGB(131,148,150)") #839496

        # Append station names to .html files
        subfig.write_html(key + '.html', include_plotlyjs='cdn')

        print('{} seconds: Completed {} plotly charts!'.format((datetime.datetime.now() - start).seconds, k))
        if l == len_stations:
            break
        else:
            l+=1

        k+=1

    print('Plotly charts completed! Charts saved in following '
        'directory: /CanadaWatQual/Hydro/plotly')
    
else:
    
    print('Error. No file in master.csv directory.')

FileNotFoundError: [Errno 2] No such file or directory: 'CAN_daily_hydrometric_master.parquet'

## Compress all Plotly.html files to .zip folder

In [None]:
os.chdir(r'C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/plotly')

# Compress all Plotly.html files to .zip folder
import os
import glob
import zipfile
try:
    import zlib
    mode = zipfile.ZIP_DEFLATED
except:
    mode = zipfile.ZIP_STORED

file_path = r'/CanadaDailyHydrometricPlotly'
zipfile_path = r'C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/plotly/daily_hydro_plotly_html.zip'
plotly_zip = zipfile.ZipFile(zipfile_path, 'w', mode)

for folder, subfolders, files in os.walk(file_path):
    for file in files:
        if file.endswith('*.html'):
            # Write all .html files to .zip file
            plotly_zip.write(os.path.join(folder, file), compress_type=mode)

# Close the .zip file and end process                             
plotly_zip.close()

# Delete all leftover .html files in /CanadaDailyHydrometricPlotly folder
folder = r'C:/Users/pdudar/anaconda3/projects/CanadaWatQual/Hydro/plotly'
for CleanUp in glob.glob(folder):
    print(CleanUp)

    if CleanUp.endswith('*.html'):    
        os.remove(CleanUp)
        
print('Plotly Hydrometric charts zipped to .zip file!')