In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas
#import pyodbc
from IPython.display import IFrame

# Code Quality Assurance Practices

## Summary
 * How do you define quality code
 * What are the common QA practices?
 * How do you conduct a peer review?
 * What kinds of tools can we use?
 * Common pitfalls

## How do you define quality code?
### Basic Tenets:
 * DRY: Don't Repeat Yourself
 * KISS: Keep it Simple, Stupid!
 * SRP: Single Responsibility Principle
 

### In practice
* Don't reinvent the wheel
   * But don't blindly trust other wheels!
 * Be clear:
   * function names should mean something
   * variable names should me something
   * lines shouldn't be too long
   * functions should have descriptions if sufficiently complex
 * Results: Does the code produced expected results from known input?
 * Error handling: are edge and corner cases properly handled?

## Work through a basic example: retrieving values from a database 
(`import pyodbc` is implied)

In [None]:
def db_locs():
    ''' Gets a list of locations from the database
    '''
    cnn = pyodbc.connect(database='prjTEST', server='pmtester-02')
    cur = cnn.cursor()
    query = "select locname from locations"
    cur.execute(query)
    
    results = [row['locname'] for row in cur]
    cur.close()
    cnn.close()
    return results

In [None]:
def db_samps():
    ''' Gets a list of samples from the database
    '''
    cnn = pyodbc.connect(database='prjTEST', server='pmtester-02')
    cur = cnn.cursor()
    query = "select samplename from samples"
    cur.execute(query)
    
    samples = [row['samplename'] for row in cur]
    cur.close()
    cnn.close()
    return samples

### What do the names mean? Are we getting or inserting?
### Apply some DRY and clear up names

In [None]:
def connectToDB(cmd=None):
    cnn = pyodbc.connect(db='prjTEST', server='pmtester-02')
    cur = cnn.cursor()
    if cmd is not None:
        cur.execute(cmd)
        
    return cnn, cur

def closeConnections(cnn, cur):
    cur.close()
    cnn.close()
    
def getLocations():
    query = "select locname from locations"
    cnn, cur = connectToDB(cmd=query)
    results = [row['locname'] for row in cur]
    closeConnections(cnn, cur)
    return results
    
def getSamples():
    query = "select samplename from samples"
    cnn, cur = connectToDB(cmd=query)
    samples = [row['samplename'] for row in cur]
    closeConnections(cnn, cur)
    return samples
    

### And more DRY still...

In [None]:
def connectToDB(cmd=None):
    cnn = pyodbc.connect(db='prjTEST', server='pmtester-02')
    cur = cnn.cursor()
    if cmd is not None:
        cur.execute(cmd)
    return cnn, cur

def closeConnections(cnn, cur):
    cur.close()
    cnn.close()

def _get_single_col_from_table(column, table):
    query = "select {} from {}".format(column, table)
    cnn, cur = connectToDB(cmd=query)
    values = [row[column] for row in cur]
    closeConnections(cnn, cur)
    return values

def getLocations():
    return _get_single_col_from_table('locname', 'locations')

def getSamples():
    return _get_single_col_from_table('samplename', 'samples')

## Now use previously invented wheels

In [None]:
import pyodbc
import pandas

def connectToDB():
    return pyodbc.connect(db='prjTEST', server='pmtester-02')

def _get_single_col_from_table(column, table):
    query = "select {} from {}".format(column, table)
    with connectToDB() as cnn:
        values = pandas.read_sql(query, cnn)    
    return values[column].tolist()

def getLocations():
    return _get_single_col_from_table('locname', 'locations')

def getSamples():
    return _get_single_col_from_table('samplename', 'samples')

## Recall where we started

In [None]:
def db_locs():
    ''' Gets a list of locations from the database
    '''
    cnn = pyodbc.connect(database='prjTEST', server='pmtester-02')
    cur = cnn.cursor()
    query = "select locname from locations"
    cur.execute(query)
    
    results = [row['locname'] for row in cur]
    cur.close()
    cnn.close()
    return results

def db_samps():
    ''' Gets a list of samples from the database
    '''
    cnn = pyodbc.connect(database='prjTEST', server='pmtester-02')
    cur = cnn.cursor()
    query = "select samplename from samples"
    cur.execute(query)
    
    samples = [row['samplename'] for row in cur]
    cur.close()
    cnn.close()
    return samples

## So the code looks nice, but can we trust it?

### The main goal of things like DRY, SRP, and KISS is to facilitate QA.

### QA comes in few primary ways:

 * Peer review
   * Code is read far more than it is written
   * Intent should be communicated through function/variable names
   * Code should execute in a very linear fashion, telling a story
     where classes/variables are nouns acted upon by verbs 
     (methods/functions)
   * Project-related output should be reproduced by someone else & verified manually
 * Unit testing
   * Small, simple functions are easier to test:
   * range of possible inputs and outputs shrinks
   * error handling simplifies
 * Continuous Integration (CI)
   * As code evolves CI systems automically run tests
   * Test failures trigger an alert
   * Code metrics (such as test coverage) can be reported automatically

## To quote Wes McKinney (author of pandas)

> The test suite is where a library hangs its dirty laundry

After the initial main code base is reviewed to for the
main concepts, it's time to really dig into the test
suite.

## Q: What's a test suite?
## A: Test suites collections of special functions and class that confirm that each library function and class behaves as expected. They also guard against small errors propogating into other parts of a code base.

## Q: But I wrote a bunch of code without a test suite? How are you going to review it?
## A1: I'm going to make you write a test suite
## A2: You should have written your tests before your wrote your code

## Caveat: A test suite is only as good as you make it.

### You should strive to really stress your functions and seek out edge and corner cases

## Test-driven development Example

### PM needs a database summarized. Main output should be a table with the following columns:
   1. Site Area
   2. Pollutant
   3. Median concentration, three sig figs with qualifier
   4. Maximum concentration, three sig figs with qualifier
   

   
### So we need functions to 
   1. connect to the database
   2. retrieve data from the database
   3. format a number of any order of magnitude to 3 sig figs
   4. find the maximum result for a given site area and its qualifier
   5. find the median results ...
   6. combine the formatted result and qualifier into a string
   7. write all of the output to a table

## Formatting by significant figures

In [None]:
%%load test_sigFigs.py
import nose.tools as nt

from sigfigs import sigFigs

class base_sigFigs(object):
    @nt.raises(ValueError)
    def test_inf(self):
        sigFigs(np.inf)

    @nt.raises(ValueError)
    def test_NaN(self):
        sigFigs(np.nan)

    @nt.raises(ValueError)
    def test_too_few_sig_figs(self):
        sigFigs(1.234, 0)
    
    def test_7(self):
        nt.assert_equal(sigFigs(self.maininput), self.known_7)

    def test_3(self):
        nt.assert_equal(sigFigs(self.maininput), self.known_3)

    def test_3_roundup(self):
        nt.assert_equal(sigFigs(self.rndup_input), self.known_roundup)

class test_sigFigs_LT1(base_sigFigs):
    def setup(self):
        self.maininput = 0.1234567
        self.rndup_input = 0.1239567
        self.known_3 = 0.123
        self.known_7 = 0.1230000
        self.known_roundup = 0.124

class test_sigFigs_GT1(base_sigFigs):
    def setup(self):
        self.maininput = 1.234567
        self.rndup_input = 1.239567
        self.known_3 = 1.23
        self.known_7 = 1.230000
        self.known_roundup = 1.24

class test_sigFigs_LargeInt(base_sigFigs):
    def setup(self):
        self.maininput = 123456789
        self.rndup_input = 123956789
        self.known_3 = 123000000
        self.known_7 = 123456700
        self.known_roundup = 124000000


In [None]:
def sigFigs(value, N):
    if not np.isfinite(value):
        raise ValueError("`value` must be a finite number")

    if N < 1:
        raise ValueError("you need at least 1 sig fig")
        
    return round(value, N)


```
[wqiodev] C:\Users\phobson\work\GDAG2015
$ nosetests
F.F.....F...FFF...
======================================================================
FAIL: test_sigFigs.test_sigFigs_GT1.test_3
----------------------------------------------------------------------
Traceback (most recent call last):
  File "C:\Miniconda3\envs\wqiodev\lib\site-packages\nose\case.py", line 198, in runTest
    self.test(*self.arg)
  File "C:\Users\phobson\work\GDAG2015\test_sigFigs.py", line 23, in test_3
    nt.assert_equal(sigFigs(self.maininput, 3), self.known_3)
AssertionError: 1.235 != 1.23

======================================================================
FAIL: test_sigFigs.test_sigFigs_GT1.test_7
----------------------------------------------------------------------
Traceback (most recent call last):
  File "C:\Miniconda3\envs\wqiodev\lib\site-packages\nose\case.py", line 198, in runTest
    self.test(*self.arg)
  File "C:\Users\phobson\work\GDAG2015\test_sigFigs.py", line 20, in test_7
    nt.assert_equal(sigFigs(self.maininput, 7), self.known_7)
AssertionError: 1.234567 != 1.23
```

### All errors are listed

In [None]:
import numpy as np
def sigFigs(value, N):
    if not np.isfinite(value):
        raise ValueError("`value` must be a finite number")

    if N < 1:
        raise ValueError("you need at least 1 sig fig")

    order = np.floor(np.log10(np.abs(value)))
    decimal_places = int(N - 1 - order)
    if decimal_places <= 0:
        output = '{0:,.0f}'.format(round(value, decimal_places))

    else:
        fmt = '{0:,.%df}' % decimal_places
        output = fmt.format(value)

    return output


```
[wqiodev] C:\Users\phobson\work\GDAG2015
$ nosetests --verbose
test_sigFigs.test_sigFigs_GT1.test_3 ... ok
test_sigFigs.test_sigFigs_GT1.test_3_roundup ... ok
test_sigFigs.test_sigFigs_GT1.test_7 ... ok
test_sigFigs.test_sigFigs_GT1.test_NaN ... ok
test_sigFigs.test_sigFigs_GT1.test_inf ... ok
test_sigFigs.test_sigFigs_GT1.test_too_few_sig_figs ... ok
test_sigFigs.test_sigFigs_LT1.test_3 ... ok
test_sigFigs.test_sigFigs_LT1.test_3_roundup ... ok
test_sigFigs.test_sigFigs_LT1.test_7 ... ok
test_sigFigs.test_sigFigs_LT1.test_NaN ... ok
test_sigFigs.test_sigFigs_LT1.test_inf ... ok
test_sigFigs.test_sigFigs_LT1.test_too_few_sig_figs ... ok
test_sigFigs.test_sigFigs_LargeInt.test_3 ... ok
test_sigFigs.test_sigFigs_LargeInt.test_3_roundup ... ok
test_sigFigs.test_sigFigs_LargeInt.test_7 ... ok
test_sigFigs.test_sigFigs_LargeInt.test_NaN ... ok
test_sigFigs.test_sigFigs_LargeInt.test_inf ... ok
test_sigFigs.test_sigFigs_LargeInt.test_too_few_sig_figs ... ok

----------------------------------------------------------------------
Ran 18 tests in 0.105s

```

# Audience participation
## What did we miss in our testing?
```python
class base_sigFigs(object):
    @nt.raises(ValueError)
    def test_inf(self):
        sigFigs(np.inf, 3)

    @nt.raises(ValueError)
    def test_NaN(self):
        sigFigs(np.nan, 3)

    @nt.raises(ValueError)
    def test_too_few_sig_figs(self):
        sigFigs(1.234, 0)

    def test_7(self):
        nt.assert_equal(sigFigs(self.maininput, 7), self.known_7)

    def test_3(self):
        nt.assert_equal(sigFigs(self.maininput, 3), self.known_3)

    def test_3_roundup(self):
        nt.assert_equal(sigFigs(self.rndup_input, 3), self.known_roundup)
```

```python
class test_sigFigs_LT1(base_sigFigs):
    def setup(self):
        self.maininput = 0.1234567
        self.rndup_input = 0.1239567
        self.known_3 = '0.123'
        self.known_7 = '0.1234567'
        self.known_roundup = '0.124'

class test_sigFigs_GT1(base_sigFigs):
    def setup(self):
        self.maininput = 1.234567
        self.rndup_input = 1.239567
        self.known_3 = '1.23'
        self.known_7 = '1.234567'
        self.known_roundup = '1.24'

class test_sigFigs_LargeInt(base_sigFigs):
    def setup(self):
        self.maininput = 123456722
        self.rndup_input = 123956789
        self.known_3 = '123,000,000'
        self.known_7 = '123,456,700'
        self.known_roundup = '124,000,000'
```

# Some things we might want to test for:
 * Feeding strings for `value`
 * negative numbers
 * behavior with `np.nan` vs. `None`

# Documentation

## The numpy community has created a standard format for documentation

```python
import numpy as np

def sigFigs(value, N):
    """Formats a number to the specified signficant figures
    
    Parameters
    ----------
    values : int or float
        The numeric value to be formatted
    N : int
        The final number of signficant figures in the output
        
    Returns
    ------
    rounded : str
        The formmatted number
        
    Examples
    --------
    >>> sigFigs(1.24, 4)
        "1.240"

    """

    if not np.isfinite(value):
        raise ValueError("`value` must be a finite number")

    if N < 1:
        raise ValueError("you need at least 1 sig fig")

    order = np.floor(np.log10(np.abs(value)))
    decimal_places = int(N - 1 - order)
    if decimal_places <= 0:
        rounded = '{0:,.0f}'.format(round(value, decimal_places))

    else:
        fmt = '{0:,.%df}' % decimal_places
        rounded = fmt.format(value)

    return output
```

## Functions as classes docmented this way can be automatically compiled into the HTML and PDF documents

In [5]:
IFrame("http://web.stanford.edu/~mwaskom/software/seaborn/generated/seaborn.boxplot.html#seaborn.boxplot", 700, 400)

# Inline comments should explain the code, not repeat it:

# Exaggerated examples
## Bad
```python
def sigFigs(value, N):
    if not np.isfinite(value):
        raise ValueError("`value` must be a finite number")

    if N < 1:
        raise ValueError("you need at least 1 sig fig")

    # get the floor of the log10 of the absolute value of `value`
    order = np.floor(np.log10(np.abs(value)))
```

## Good
```python
def sigFigs(value, N):
    if not np.isfinite(value):
        raise ValueError("`value` must be a finite number")

    if N < 1:
        raise ValueError("you need at least 1 sig fig")

    # The order of magnitude of the value. We'll divide the 
    # value by this to get a number of order 1, round it,
    # and then multiply back in to the orig order
    order = np.floor(np.log10(np.abs(value)))
```

# Lastly, fire up an interpreter and just try to break it

## e.g., `sigFigs(value1.25e12, 1000000)`

# Questions and Discussion and War Stories