In [2]:
from IPython.display import display, Markdown

with open('README.md', 'r') as fh:
    content = fh.read()

display(Markdown(content))

# *enviro*

## Energy data Visualization Routine

###### A tool for creating visuals from historical energy data (e.g. the EIA monthly energy review).
 
This tool is designed to provide insightful, aesthetic and more flexible visualizations of the Energy Information Administration (EIA) monthly energy review datasets. The datasets contain information about the sources of energy Americans have relied on for power since the middle of the 20th century. The datasets begin in 1949 with annual energy production, consumption, import, and export values, and extend up until the present. Monthly energy datapoints are reported starting in 1973.

The basic energy sources are reported in the following groups:  

###### Fossil Fuels
* Coal
* Natural Gas
* Petroleum

###### Renewables
* Wind
* Solar
* Hydroelectric
* Geothermal
* Biomass

###### Nuclear
* Fission

The data is published monthly on the [EIA's website](https://www.eia.gov/totalenergy/data/monthly/), and as of July 5th, 2017 records were provided up through March 2017.

All reported values are in units of quadrillion british thermal units (1.0E15 btu). Be aware that the datasets may provide [more precision](https://www.eia.gov/totalenergy/data/monthly/dataunits.php) than is published in the PDF reports.

--------------------------------------------------------------


## Structure

This toolkit is designed so that a user can specify any of the reported energy sources and simply construct visuals to demonstrate trends in that energy's usage over time, as well make comparisons between the use of energy sources over time. 

To accomplish this goal, there are two classes that are defined: an energy class (EClass) and a visualization class (VClass). 

#### EClass
The EClass collects information from the specified energy source. Within this class will be methods for retrieving data according to specific attributes of the energy source, such as energy consumed from that source per decade, per year, or all years in which more than a certain amount of energy was consumed from that source.

#### VClass
The VClass takes one or more EClasses as input upon initialization. The VClass will then feature methods to visualize the data, including the ability to make comparisons if more than one EClass is given. These visualizations could include histograms, line graphs, pie charts and animations.

--------------------------------------------------------------

### Programmatic Overhead

Import packages and modules that will be used in construction and visualization.

In [3]:
import numpy as np

Upload the dataset (a CSV) in the form of a numpy array.

In [29]:
EIA_DATA_PATH = '/Users/mitch/Documents/Energy_Analysis/enviro/'
EIA_DATA_FILE = 'EIA_MER.csv'

eia_data = np.genfromtxt(EIA_DATA_PATH+EIA_DATA_FILE,float,delimiter=',')[1:,1:4]
print(eia_data)


[[  1.94913000e+05   1.19809050e+01   1.00000000e+00]
 [  1.95013000e+05   1.23471090e+01   1.00000000e+00]
 [  1.95113000e+05   1.25529960e+01   1.00000000e+00]
 ..., 
 [  2.01613000e+05   9.73990850e+01   1.20000000e+01]
 [  2.01701000e+05   8.95493200e+00   1.20000000e+01]
 [  2.01702000e+05   7.62353900e+00   1.20000000e+01]]


### Energy Class

In [48]:
# Contents of eclass.py module
'from energy_codes import name_to_code'#remove ''


class EClass:
    
    """
    Collect energy consumption data for a user-defined energy source.
    
    Retrieves data from the specified energy source according to specific 
    attributes, such as energy consumed per decade, per year, or all years in 
    which more than a certain amount of energy was consumed from that source.
    """
    
    def __init__(self,Ecode,dataset=eia_data):
        """
        Receive energy source (as numeric code) and collect corresponding data from the input dataset
        
        Parameters
        ----------
        Ecode : int
            The energy source to be pulled from the dataset.
        dataset : array_like, optional
            The dataset from which to pull information. Must be three ordered columns
            –date, energy quantity, energy code–with no headings.
        """
        # Eliminate 'nan' data from source
        dataset = dataset[np.logical_not(np.isnan(dataset[:,1]))]
        
        # Isolate this energy's data from source, and remove (now superfluous) Ecode
        self.data = dataset[dataset[:,2]==Ecode,:2]
                
        # Get the oldest and newest datapoint dates for this energy" 
        self.start_yr = int(float(str(min(self.data[:,0]))[:4]))
        self.end_yr = int(float(str(max(self.data[:,0]))[:4]))
        self.start_mo = int(float(str(min(self.data[:,0]))[4:]))
        self.end_mo = int(float(str(max(self.data[:,0]))[4:]))
        
        
     
    def totals(self,start_date,end_date,freq='yearly'):
        """
        Get the energy consumption totals over a given period.
        
        Parameters
        ----------
        start_date, end_date : str
            The user specified dataset starting/ending points; 
            acceptable formats include 'YYYYMMDD', 'MM-DD-YYYY', or 'MM/DD/YYYY'.
        freq : str
            The frequency for gathering totals ('lifetime','yearly',or 'monthly').
        """
        
        return lifetime_total
        
    def extrema(self,extremum,start_date,end_date,interval):
        """
        Get the maximum/minimum energy ever consumed over a given interval.
        
        Parameters
        ----------
        extremum : str
            The exteme value to find ('max' or 'min).
        start_date, end_date : str
            The user specified dataset starting/ending points; 
            acceptable formats include 'YYYYMMDD', 'MM-DD-YYYY', or 'MM/DD/YYYY'.
        interval : str
            The time intervals considered for extrema comparison ('yearly',or 'monthly').
        """


        
    def more_than(self,amount,start_date,end_date,interval):
        """
        Get data for time interval where more than the given amount of energy was consumed
        
        Parameters
        ----------
        amount: float
            The lower boundary (exclusive) for which data may be included in the dataset.
        start_date, end_date : str
            The user specified dataset starting/ending points; 
            acceptable formats include 'YYYYMMDD', 'MM-DD-YYYY', or 'MM/DD/YYYY'.
        interval : str
            The time intervals considered for extrema comparison ('yearly',or 'monthly').
        """
        
    
    
    """
    Additonal potential options to add:
        - average yearly energy consumed
        - average seasonal energy consumed
    """
 


In [51]:
 # Contents of test_eclass.py module
'from eclass.py import EClass'


class TestEClass:
     
    def test_preproc(self):
        testdata = np.array([[195001,1,1],
                             [195101,None,1],
                             [195201,1,1]],
                           float)
        EC = EClass(1,testdata)
        # eliminate nan, remove ecode column
        assert np.array_equal(EC.data,testdata[[0,2],:2])

    def test_start_yr(self):
        testdata = np.array([[195001,1,1],
                             [195101,1,1]],
                           float)
        EC = EClass(1,testdata)
        assert EC.start_yr == 1950
    
    def test_end_yr(self):
        testdata = np.array([[195001,1,1],
                             [195101,1,1]],
                           float)
        EC = EClass(1,testdata)
        assert EC.end_yr == 1951
    
    def test_start_mo(self):
        testdata = np.array([[195001,1,1],
                             [195102,1,1]],
                           float)
        EC = EClass(1,testdata)
        assert EC.start_mo == 1
    
    def test_end_mo(self):
        testdata = np.array([[195001,1,1],
                             [195102,1,1]],
                           float)
        EC = EClass(1,testdata)
        assert EC.end_mo == 2

    #def test_yearly_totals(self,start_date,end_date,freq='yearly'):
        #assert np.array_equal(yr_tots,np.array([1951]))
    
# expected test behavior
TEC=TestEClass()
TEC.test_preproc()
TEC.test_start_yr()
TEC.test_end_yr()
TEC.test_start_mo()
TEC.test_end_mo()


### Visualization Class

In [5]:
# Contents of vclass.py module


class VClass:
    
    """
    Create visuals based on energy data.
    
    Takes one or more EClasses as input upon initialization, providing
    methods to visualize the data (including the ability to make comparisons
    across energy sources if more than one EClass is given. Visualizations 
    include histograms, line graphs, pie charts, and animations.   
    """
    
    def __init__(self,*energies):
        """
        Receive energy source(s) and assemble the appropriate class(es).
        
        Parameters
        ----------
        *energies : str
            The energy source(s) to be visualized.
        """
                
        # Convert energy name to code
        Eset = set()
        for Ename in args:
            Ecode = name_to_code(Ename)
            Eset.add(Ecode)    
        # Assemble energy classes
        self.EClasses = [EClass(Ecode) for Ecode in Eset]
        
    def select_data(self,interval,method,amount=None):
        """
        Filter data according to user requirements.
        
        Parameters
        ----------
        interval : str
            The filter applied by the user to select a dataset timeframe
            (i.e. `monthly`,`yearly`, or `lifetime`); `lifetime` can only be used
            with method-keyword `totals`.
        method : str
            The filter applied by the user to gather data attributes 
            (i.e. `totals`,`max`,`more_than`).
        amount : float
            The filter applied by the user specifying a lower bound for energy consumption
        """
    
        if method == 'total':
            if interval == 'lifetime':
                for EClass in self.EClasses:
                    pass
                    
        
        

In [23]:
# %load eia_codes.py
"""
Provide utilities for working with EIA dataset codes.

Functions
---------
name_to_code
    Converts energy names into EIA energy codes
"""

def name_to_code(name):
    """
    Convert an energy source name to its corresponding EIA dataset numeric code.
    
    Parameters
    ----------
    name : str
        The name of an EIA energy or energy group.
        
    Returns
    -------
    code : int
        The code corresponding to the energy source provided.
    """
    key_name = name.lower()
    Ecodes = {'coal':1,
              'natural gas':2,
              'petroleum':3,
              'fossil fuel':4,
              'nuclear':5,
              'hydro':6,
              'geothermal':7,
              'solar':8,
              'wind':9,
              'biomass':10,
              'renewable':11,
              'primary':12}
        
    if key_name not in Ecodes:
        raise KeyError('"%s" was not found in the EIA dataset; \
                        see documentation for implemented energy sources')
    else:
        code = Ecodes[key_name]
    return code



In [24]:
# %load test_eia_codes.py
from eia_codes import name_to_code as ntc


class TestNTC:
    
    def test_ntc_code(self):
        assert ntc('coal') == 1
    
    def test_ntc_case(self):
        assert ntc('Renewable') == 11


In [10]:
# Contents of plot_graphs.py module
from matplotlib import pyplot as plt


def line_graph(ax,indep,*dep):
    pass
    
    
def bar_chart(ax,indep,*dep):
    pass
    
    
def pie_chart(ax,indep,*dep):
    pass
    
    
def style(ax,title,xlabel,ylabel,ticks=None,):
    pass

