# Introduction
View and Explore data files listed on https://airtable.com/shr03m1opFmo8MJdr
Note: due to airtable's API limitations, you'll have be a part of the workspace to download this data.

Go to https://airtable.com/account to generate your own API key, and place `AIRTABLE_KEY=YOUR_KEY_HERE` in a file named `.env`

### Imports
Import libraries and write settings here.

In [10]:
%load_ext autoreload
%autoreload
%load_ext dotenv
%dotenv

import pandas as pd
import airtable #import Airtable
import zipfile
import os
import requests
from urllib.parse import urlparse
import pdb

AIRTABLE_URL = "https://airtable.com/shr03m1opFmo8MJdr"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


# Download data

In [11]:
def download_and_unzip(url, filename):
    verify_exceptions = ['www.cer-rec.gc.ca']
    try:
        if os.path.exists(filename):
            os.remove(filename)
        with open(filename, 'wb') as f:
            verify = False if urlparse(url).netloc in verify_exceptions else True
            resp = requests.get(url, verify=verify)
            f.write(resp.content)
            filenames = [filename]
        if filename.split(".")[-1] == "zip":
            with zipfile.ZipFile(filename, 'r') as zip_ref:
                filenames = zip_ref.namelist()
                zip_ref.extractall()
        print("Successfully downloaded")
        return filenames
    except Exception as e:
        print(e)

In [12]:
data_files = airtable.Airtable("appHJQOgkMxgv7LJ6", "Table 1", os.environ["AIRTABLE_KEY"])

In [13]:

table_1 = data_files.get_all()
# Link is mandatory field
mandatory_fields = ['Link', 'Name']
for field in mandatory_fields:
    table_1 = [entry for entry in table_1 if field in entry['fields'].keys()]
%cd data
for entry in table_1:
    url = entry['fields']['Link']
    filename = os.path.split(url)[-1]
    if True or not os.path.exists(filename):
        print("Downloading \'{}\'".format(entry['fields']['Name']))
        filenames = download_and_unzip(url, filename)
    entry['fields']['filenames'] = filenames
%cd ..

/home/ryderb/Documents/Sideprojects/CarbonPolicy/EnergyFlowSankeyCanada/canada-sankey/data
Downloading 'Another visualization of supply and demand of energy, as above:'
Successfully downloaded
Downloading 'Manufacturing industries, total annual energy fuel consumption in natural units'
Successfully downloaded
Downloading 'Supply and demand of primary and secondary energy in terajoules, annual'
Successfully downloaded
Downloading 'Natural Gas Exports and Imports Annual'




Successfully downloaded
Downloading 'Electric power generation, annual fuel consumed by electric utility thermal plants'
Successfully downloaded
Downloading 'Electricity Exports and Imports Annual'




Successfully downloaded
Downloading 'Natural Gas Exports and Imports Annual Data Dictionary'




Successfully downloaded
Downloading 'Transportation Sector Energy Use - By Energy Source '
Successfully downloaded
Downloading 'Household energy consumption, by type of dwelling, Canada and provinces
'
Successfully downloaded
Downloading 'Electric Power Generation by type of Electricity (includes, wind, solar, other)
'
Successfully downloaded
Downloading 'Imports Exports Data Visualization'




Successfully downloaded
Downloading 'Electricity Exports and Imports Data Dictionary'




Successfully downloaded
Downloading 'Household energy consumption, Canada and provinces'
Successfully downloaded
/home/ryderb/Documents/Sideprojects/CarbonPolicy/EnergyFlowSankeyCanada/canada-sankey


In [14]:
%ls data

 25100015.csv
 [0m[01;31m25100015-eng.zip[0m
 25100015_MetaData.csv
 25100017.csv
 [01;31m25100017-eng.zip[0m
 25100017_MetaData.csv
 25100025.csv
 25100025-dirty.df
 [01;31m25100025-eng.zip[0m
 25100025_MetaData.csv
 25100025_MetaData-dirty.df
 25100029.csv
 25100029-dirty.df
 [01;31m25100029-eng.zip[0m
 25100029_MetaData.csv
 25100029_MetaData-dirty.df
 25100060.csv
 25100060-dirty.df
 [01;31m25100060-eng.zip[0m
 25100060_MetaData.csv
 25100060_MetaData-dirty.df
 25100061.csv
 25100061-dirty.df
 [01;31m25100061-eng.zip[0m
 25100061_MetaData.csv
 25100061_MetaData-dirty.df
 [01;32mdownload_data.sh[0m*
 electricity-exports-and-imports-annual.csv
 electricity-exports-and-imports-annual-dirty.df
 electricity-exports-and-imports-data-dictionary.csv
 imports-exports-data-visualization.csv
 natural-gas-exports-and-imports-annual.csv
 natural-gas-exports-and-imports-annual-dirty.df
 natural-gas-exports-and-imports-data-dictionary.csv
 sankey_e

# Load & Explore Data

In [15]:
datasets = []
%cd data

def read_file_to_df(filename, file_ext, table_type, data_entry, save=True):
    """
    table_type: array containing 'Table' or 'Dictionary'
    """
    read_method = {"csv": pd.read_csv, "xlsx": pd.read_excel, "xls": pd.read_excel}
    
    try:
        if file_ext in read_method.keys():
            if 'Table' in table_type:
            # Could have just passed the url, but some datasets were zipped
                try:
                    entry['dataframe'] = read_method[file_ext](filename)
                except UnicodeDecodeError:
                    entry['dataframe'] = read_method[file_ext](filename, encoding='latin-1')
                datasets.append(entry['dataframe'])
                pd.to_pickle(entry['dataframe'], filename.split('.')[0] + "-dirty.df")
            if 'Dictionary' in table_type:
                # Could have just passed the url, but some datasets were zipped
                try:
                    entry['data-dictionary'] = read_method[file_ext](filename)
                except UnicodeDecodeError:
                    entry['data-dictionary'] = read_method[file_ext](filename, encoding='latin-1')
                pd.to_pickle(entry['dataframe'], filename.split('.')[0] + "-dirty.df")
                
    except Exception as e:
        print(e)

for entry in table_1:
    url = entry['fields']['Link']
    filename = os.path.split(url)[-1]
    file_ext = filename.split('.')[-1]
    if file_ext == "zip":
        # Figure out which filename is data, and which is dictionary
        for filename in entry['fields']['filenames']:
            file_ext = filename.split('.')[-1]
            # This works so far for statscan datasets
            if "meta" in filename.lower():
                read_file_to_df(filename, file_ext, ["Dictionary"], entry)
            else:
                read_file_to_df(filename, file_ext, ["Table"], entry)
                
    read_file_to_df(filename, file_ext, entry['fields']['Type'], entry)
%cd ..
        

/home/ryderb/Documents/Sideprojects/CarbonPolicy/EnergyFlowSankeyCanada/canada-sankey/data
'dataframe'
Error tokenizing data. C error: Expected 1 fields in line 9, saw 2

'dataframe'
/home/ryderb/Documents/Sideprojects/CarbonPolicy/EnergyFlowSankeyCanada/canada-sankey


  if (await self.run_code(code, result,  async_=asy)):


In [16]:
display([entry['fields']['Name'] for entry in table_1])



['Another visualization of supply and demand of energy, as above:',
 'Manufacturing industries, total annual energy fuel consumption in natural units',
 'Supply and demand of primary and secondary energy in terajoules, annual',
 'Natural Gas Exports and Imports Annual',
 'Electric power generation, annual fuel consumed by electric utility thermal plants',
 'Electricity Exports and Imports Annual',
 'Natural Gas Exports and Imports Annual Data Dictionary',
 'Transportation Sector Energy Use - By Energy Source ',
 'Household energy consumption, by type of dwelling, Canada and provinces\n',
 'Electric Power Generation by type of Electricity (includes, wind, solar, other)\n',
 'Imports Exports Data Visualization',
 'Electricity Exports and Imports Data Dictionary',
 'Household energy consumption, Canada and provinces']