In [1]:
from IPython.display import display, Markdown

with open('README.md', 'r') as fh:
    content = fh.read()

display(Markdown(content))

# *enviro*

## Energy data Visualization Routine

###### A tool for creating visuals from historical energy data (e.g. the EIA monthly energy review).
 
This tool is designed to provide insightful, aesthetic and more flexible visualizations of the Energy Information Administration (EIA) monthly energy review datasets. The datasets contain information about the sources of energy Americans have relied on for power since the middle of the 20th century. The datasets begin in 1949 with annual energy production, consumption, import, and export values, and extend up until the present. Monthly energy datapoints are reported starting in 1973.

The basic energy sources are reported in the following groups:  

###### Fossil Fuels
* Coal
* Natural Gas
* Petroleum

###### Renewables
* Wind
* Solar
* Hydroelectric
* Geothermal
* Biomass

###### Nuclear
* Fission

The data is published monthly on the [EIA's website](https://www.eia.gov/totalenergy/data/monthly/), and as of July 5th, 2017 records were provided up through March 2017.

All reported values are in units of quadrillion british thermal units (1.0E15 btu). Be aware that the datasets may provide [more precision](https://www.eia.gov/totalenergy/data/monthly/dataunits.php) than is published in the PDF reports.

--------------------------------------------------------------

Outside dependencies are numpy and glob.

--------------------------------------------------------------


## Structure

This toolkit is designed so that a user can specify any of the reported energy sources and simply construct visuals to demonstrate trends in that energy's usage over time, as well make comparisons between the use of energy sources over time. 

To accomplish this goal, there are two classes that are defined: an energy class (EClass) and a visualization class (VClass). 

#### EClass
The EClass collects information from the specified energy source. Within this class will be methods for retrieving data according to specific attributes of the energy source, such as energy consumed from that source per decade, per year, or all years in which more than a certain amount of energy was consumed from that source.

#### VClass
The VClass takes one or more EClasses as input upon initialization. The VClass will then feature methods to visualize the data, including the ability to make comparisons if more than one EClass is given. These visualizations could include histograms, line graphs, pie charts and animations.

--------------------------------------------------------------

Upload the dataset (a CSV) in the form of a numpy array.

In [None]:
# %load load_data.py
"""
Load EIA Monthly Energy Review (MER) datasets.

Functions
---------
load_dataset
    Loads an EIA MER dataset from a csv file.
get_default
    Finds the default MER dataset csv file.
find_most_recent
    Finds the most MER dataset csv file.
"""
import numpy as np


def load_dataset(dataset_label='default'):
    """
    Loads an EIA MER dataset from a csv file.
    
    Parameters
    ----------
    dataset_label : str
        The identifier of the dataset; 'default' and 'newest' are current options
        (specific dataset dates to be added).
        
    Returns
    -------
    ndarray
        Data from the EIA MER dataset.
    """
    EIA_MER_DATA_PATH = '/Users/mitch/Documents/Energy_Analysis/enviro/'
    if dataset_label == 'default': EIA_MER_DATA_FILE = get_default()
    #elif dataset_label == 'newest': EIA_MER_DATA_FILE = get_newest()
    else: raise ValueError('"Default" is the only dataset label currently implemented.')
    return np.genfromtxt(EIA_MER_DATA_PATH+EIA_MER_DATA_FILE,float,delimiter=',')[1:,1:4]


def get_default():
    """
    Gets the filename of the default dataset from your filesystem.
    
    Returns
    -------
    str
        The filename of the default dataset.
    """
    return 'EIA_MER.csv'


def get_newest():
    """
    Gets the filename of the most recent dataset from your filesystem.
    
    Returns
    -------
    str
        The filename of the most recently downloaded dataset.
    """
    print('Currently unimplemented.')



In [None]:
# %load eia_codes.py
"""
Provide utilities for working with EIA dataset codes.

Functions
---------
name_to_code
    Converts energy names into EIA energy codes.
date_to_code
    Converts date string into EIA data code.
"""


def name_to_code(name):
    """
    Convert an energy source name to its corresponding EIA dataset numeric code.
    
    Parameters
    ----------
    name : str
        The name of an EIA energy or energy group.
        
    Returns
    -------
    name_code : int
        The code corresponding to the energy source provided.
    """
    key_name = name.lower()
    Ecodes = {'coal':1,
              'natural gas':2,
              'petroleum':3,
              'fossil fuel':4,
              'nuclear':5,
              'hydro':6,
              'geothermal':7,
              'solar':8,
              'wind':9,
              'biomass':10,
              'renewable':11,
              'primary':12}
        
    if key_name not in Ecodes:
        raise KeyError('Key "{}" was not found in the EIA dataset; see documentation for implemented energy sources.'.format(key_name))
    else:
        name_code = Ecodes[key_name]
    return name_code


def date_to_code(date):
    """
    Convert an input date to its corresponding EIA dataset numeric date code.
    
    Parameters
    ----------
    date : str
        A date, given in the format 'YYYYMM', 'YYYY-MM', or 'MM-YYYY'. 
        Dashes ("-") can be substituted for periods ("."), underscores ("_"), or forward slashes ("/").
        
    Returns
    -------
    date_code : int
        The code corresponding to the energy source provided.
    """
    bad_format_err_msg = 'Date "{}" was not given in an acceptable format; try formatting date as "YYYYMM".'.format(date)
    acceptable_separators = ["-",".","/","_"]
    
    # Convert date to code
    if len(date) == 6:
        date_code_string = date
    elif len(date) == 7:
        if date[4] in acceptable_separators:
            date_code_string = date.replace(date[4],'')
        elif date[2] in acceptable_separators:
            date_code_string = (date[3:]+date[:3]).replace(date[2],'')
    else:
        raise ValueError(bad_format_err_msg)
        
    # Check reasonability of date provided
    try:
        year = int(float(date_code_string[0:4]))
        month = int(float(date_code_string[4:6]))
        if year < 1900 or year > 3000:
            raise ValueError('No data exists for this time period.')
        if month > 13:  # 13 denotes full year sum
            raise ValueError('A month must be given as a number 1-12')
        date_code = int(float(date_code_string))
    except:
        raise ValueError(bad_format_err_msg)
    return date_code


### Energy Class

In [None]:
# %load eclass.py
from energy_codes import name_to_code
from energy_codes import date_to_code

class EClass:
    
    """
    Collect energy consumption data for a user-defined energy source.
    
    Retrieves data from the specified energy source according to specific 
    attributes, such as energy consumed per decade, per year, or all years in 
    which more than a certain amount of energy was consumed from that source.
    """
    
    def __init__(self,energy_source,dataset=eia_data):
        """
        Receive energy source (as numeric code) and collect corresponding data from the input dataset
        
        Parameters
        ----------
        Ecode : int
            The energy source to be pulled from the dataset.
        dataset : array_like, optional
            The dataset from which to pull information. Must be three ordered columns
            –date, energy quantity, energy code–with no headings.
        """
        # Determine Ecode from energy source name
        Ecode = name_to_code(energy_source)

        # Eliminate 'nan' data from source
        dataset = dataset[np.logical_not(np.isnan(dataset[:,1]))]
        
        # Isolate this energy's data from source, and remove (now superfluous) Ecode
        self.data = dataset[dataset[:,2]==Ecode,:2]
                
        # Get the oldest and newest datapoint dates for this energy" 
        self.idate = int(min(self.data[:,0]))
        self.fdate = int(max(self.data[:,0]))
    
    def daterange(self,start_date,end_date):
        """
        Resize the dataset to cover only the date range specified.
        
        Parameters
        ----------
        start_date, end_date : int
            The dataset start/end dates (both inclusive) as integers (YYYYMM)
            
        Returns
        -------
        bound_data : ndarray
            A 2 column array corresponding to the specified range.
        """
        half_bounded_data = self.data[self.data[:,0] >= start_date]
        bounded_data = half_bounded_data[half_bounded_data[:,0] <= end_date]
        return bounded_data
        
     
    def totals(self,start_date=None,end_date=None,freq='yearly'):
        """
        Get the energy consumption totals over a given period.
        
        Parameters
        ----------
        start_date, end_date : str
            The user specified dataset starting and ending dates (both inclusive); 
            acceptable formats are 'YYYYMM', 'YYYY-MM', or 'MM-YYYY'. Dashes ("-") can 
            be substituted for periods ("."), underscores ("_"), or forward slashes ("/").
        freq : str
            The frequency for gathering totals ('monthly','yearly',or 'cumulative').
            
        Returns
        -------
        totals_array : ndarray
            A 2 column array giving dates at the given frequency and corresponding totals.
        """
        start_date = self.idate if start_date == None else date_to_code(str(start_date))
        end_date = self.fdate if end_date == None else date_to_code(str(end_date))
        
        # Bound data by start and end dates
        totals_data = self.daterange(start_date,end_date)
        
        # Extract data according to frequency
        month_codes = np.array([int(float(str(date_code)[4:])) for date_code in totals_data[:,0]])
        if freq == 'monthly':
            # Create a numpy array selecting only monthly totals (denoted as YYYYMM where MM is 01-12 in the date_code)
            totals_data = totals_data[month_codes!=13]
        elif freq == 'yearly':
            # Create a numpy array selecting only yearly totals (denoted as YYYY13 in the date_code)
            totals_data = self.data[month_codes==13]
        elif freq == 'cumulative':
            # Create a numpy array (1x2) of the energy source's cumulative total
            date_indices_to_sum = month_codes==13
            for i in range(len(date_indices_to_sum)):
                if date_indices_to_sum[-i-1] == False:
                    date_indices_to_sum[-i-1] = True
                else: break
            totals_data = np.array([np.sum(self.data[date_indices_to_sum],axis=0)])
            totals_data[-1,0] = self.data[-1,0]
        else:
            raise ValueError('"{}" is not a frequency compatible with this dataset; see documentation for permissible frequencies.'.format(freq))
        totals_array = totals_data
        return totals_array
        
    #def extrema(self,extremum,start_date,end_date,interval):
        """
        Get the maximum/minimum energy ever consumed over a given interval.
        
        Parameters
        ----------
        extremum : str
            The exteme value to be found ('max' or 'min).
        start_date, end_date : str
            The user specified dataset starting/ending points; 
            acceptable formats are 'YYYYMMDD', 'YYYY-MM-DD', MM-DD-YYYY', or 'MM/DD/YYYY'.
        interval : str
            The time intervals considered for extrema comparison ('yearly' or 'monthly').
        """


        
    #def more_than(self,amount,start_date,end_date,interval):
        """
        Get data for time interval where more than the given amount of energy was consumed
        
        Parameters
        ----------
        amount: float
            The lower boundary (exclusive) for which data may be included in the dataset.
        start_date, end_date : str
            The user specified dataset starting/ending points; 
            acceptable formats include 'YYYYMMDD', 'MM-DD-YYYY', and 'MM/DD/YYYY'.
        interval : str
            The time intervals considered for extrema comparison ('yearly',or 'monthly').
        """
        
    
    
    """
    Additonal potential options to add:
        - average yearly energy consumed
        - average seasonal energy consumed
    """
 



In [55]:
%load test_eclass.py

    
    
    
# expected test behavior
TEC=TestEClass()
TEC.test_Preprocessing()
TEC.test_FindingInitialDate()
TEC.test_FindingFinalDate()
TEC.test_daterange_BoundingDates()
TEC.test_totals_FindingMonthlyTotals()
TEC.test_totals_FindingYearlyTotals()
TEC.test_totals_FindingCumulativeTotal()
                                

### Visualization Class

In [None]:
# Contents of vclass.py module


class VClass:
    
    """
    Create visuals based on energy data.
    
    Takes one or more EClasses as input upon initialization, providing
    methods to visualize the data (including the ability to make comparisons
    across energy sources if more than one EClass is given. Visualizations 
    include histograms, line graphs, pie charts, and animations.   
    """
    
    def __init__(self,*energies):
        """
        Receive energy source(s) and assemble the appropriate class(es).
        
        Parameters
        ----------
        *energies : str
            The energy source(s) to be visualized.
        """
                
        # Convert energy name to code
        Eset = set()
        for Ename in args:
            Ecode = name_to_code(Ename)
            Eset.add(Ecode)    
        # Assemble energy classes
        self.EClasses = [EClass(Ecode) for Ecode in Eset]
        
    def select_data(self,interval,method,amount=None):
        """
        Filter data according to user requirements.
        
        Parameters
        ----------
        interval : str
            The filter applied by the user to select a dataset timeframe
            (i.e. `monthly`,`yearly`, or `lifetime`); `lifetime` can only be used
            with method-keyword `totals`.
        method : str
            The filter applied by the user to gather data attributes 
            (i.e. `totals`,`max`,`more_than`).
        amount : float
            The filter applied by the user specifying a lower bound for energy consumption.
        """
    
        if method == 'total':
            if interval == 'lifetime':
                for EClass in self.EClasses:
                    pass
                    
        
        

In [11]:
# %load test_eia_codes.py
from eia_codes import name_to_code as ntc
from eia_codes import date_to_code as dtc

class TestNTC:
    
    def test_ntc_ConvertingName(self):
        assert ntc('coal') == 1
    
    def test_ntc_IgnoringCase(self):
        assert ntc('Renewable') == 11

class TestDTC:

    def test_dtc_ReadingFormat1(self):
        assert dtc('20170812') == float(20170812)

    def test_dtc_ReadingFormat2(self):
        assert dtc('2017-08-12') == float(20170812)
        
    def test_dtc_ReadingFormat3(self):
        assert dtc('08-12-2017') == float(20170812)

    def test_dtc_ReadingFormat4(self):
        assert dtc('08/12/2017') == float(20170812)



In [None]:
# Contents of plot_graphs.py module
from matplotlib import pyplot as plt


def line_graph(ax,indep,*dep):
    pass
    
    
def bar_chart(ax,indep,*dep):
    pass
    
    
def pie_chart(ax,indep,*dep):
    pass
    
    
def style(ax,title,xlabel,ylabel,ticks=None,):
    pass

