### Joseph Frostad
### HW4: Linting and Testing
### 11/1/2018
#### Prompt

This homework involves revisions to the files you created in HW 3.

(2 points) Change test_create_dataframe.py to be runnable as a test file (using pytest if you prefer).

(5 points) Make create_dataframe and test_create_dataframe PEP8 compliant. You should get a score of at least 9 out of 10. Please put your pylint output in your repo.

#### Config
##### Load in packages

In [3]:
#import python packages
import pandas as pd
import pytest
import requests as rq
import pylint

In [4]:
#import custom modules
import sys
sys.path.append('./analysis/') #add in the analysis path to system
import fx

##### Set globals

In [None]:
SEATTLE_URL = "data.seattle.gov"
SCSI_ID = "bsta-72tn"
WRONG_ID = "bsta-72tb"
ANALYSIS_VARS = ['address', 'year_founded', 'closed', 'square_feet_total']

##### Rewrite creation function

In [None]:
%%file ./analysis/fx.py
"""This module stores a function called create dataframe
"""
def create_dataframe(url,
                     url_id,
                     columns,
                     min_nrow):
    """
    This function reads in data from a government website using Socrata,
    then returns it as a pandas df.

    create_dataframe is the function name

    Parameters:
        url: This is the url of the government website
        id: This is the ID of the data that the user wants to download
        columns: This is a list of columns the user is interested in
        min_nrow: The minimum number of rows expected

    Returns:
        df: A pandas dataframe with the columns requested

    """
    # Import necessary packages
    from sodapy import Socrata
    import pandas as pd

    # Setup client
    client = Socrata(url, None)
    results = client.get(url_id)
    
    # Convert to pandas DataFrame
    data_frame = pd.DataFrame.from_records(results)
    
    # Verify that the minimum rowcount is met
    if len(data_frame) < min_nrow:
        class RowCountException(Exception):
            """Custom exception class.
            
            This exception is raised when the minimum row is unmet.

            """
            pass
        
        raise RowCountException("Minimum number of rows were not returned by Socrata")
    
    # Subset to columns of interest
    # Analysis would be to determine whether the age or
    # square feet of a cultural space are related to closure
    data_frame = data_frame[columns]
    
    return data_frame


In [None]:
%%file test_create_dataframe.py
"""This is a module used to test a function: "create_dataframe"

create_dataframe is a function that downloads data from a website and creates a pands df

This module tests that function by ensuring that it returns expected exceptions in edge cases.
"""
# import packages
import sys
import pytest
import requests as rq #necessary to catch HTTP error
#import custom modules
sys.path.append('./analysis/')#add in the analysis path to system
import fx

#set global for tests
SEATTLE_URL = "data.seattle.gov"
SCSI_ID = "bsta-72tn"
WRONG_ID = "bsta-72tb"
ANALYSIS_VARS = ['address', 'year_founded', 'closed', 'square_feet_total']

def test_create_dataframe_length():
    """This function tests that the dataframe has the correct length.
    """
    # This is the correct URL/ID, should return no exceptions and have 1000 rows
    data_frame = fx.create_dataframe(SEATTLE_URL,
                                     SCSI_ID,
                                     ANALYSIS_VARS,
                                     1000)
    assert len(data_frame) == 1000

# This data is NOT the Seattle Cultural Space Inventory, should return HTTP error
def test_create_dataframe_bad_id():
    """This function tests that the dataframe is not returned and an error called
    HTTPerror (inherited) from the requests package is return when an invalid ID
    is passed to the function (IE the URL will not exist)
    """
    with pytest.raises(rq.exceptions.HTTPError):
        fx.create_dataframe(SEATTLE_URL,
                            WRONG_ID,
                            ANALYSIS_VARS,
                            100)

# This data is the Seattle Cultural Space Inventory
#but can only get 1k rows, should return Row Count Exception
def test_create_dataframe_row_count():
    """This function tests that a custom exception called RowCountException
    will be returned when more than 1k rows are expected.
    """
    with pytest.raises(Exception) as err:
        fx.create_dataframe(SEATTLE_URL,
                            SCSI_ID,
                            ANALYSIS_VARS,
                            1e4+1)
    assert 'RowCountException' in str(err)#verify that your custom error is returned


#### Test it!

In [None]:
!pytest

#### Clean it!

In [None]:
!pylint test_create_dataframe.py

In [None]:
!pylint ./analysis/fx.py