'''
OBJECTIVES:
1. Build WRS system
2. Build Structural BMP Solution evaluator
3. Identify minimum BMP solution front for:
   individual facilities
   facilities w/in departments
   facilities w/in city
   
PYTHON VERSION: 3.6.3  
SQLALCHEMY VERSION: 1.1.13

'''

### Pollutant Constituents
Below are the pollutant constituents we attempt to address through this alternatives analysis

In [1]:
#############################################################################################################
#                   
#                                       DEFINE GLOBAL VARIABLE pollLS
#############################################################################################################     
pollLS = ['tss', 'turbidity', 'p', 'n', 'nn', 'an', 'og', 'cu', 'zn', 'fe', 'phmin', 'phmax'] 

# Program Setup
## (Importing libraries, defining database)

In [2]:
#import standard python libraries:
import winsound
import pandas as pd
import numpy as np
import math
import datetime
import calendar
import time
import itertools
import random

In [3]:
#IMPORT AND DEFINE sqlalchemy libraries, tables, and session engine
#SQLAlchemy library items:
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, String
from sqlalchemy import update, insert
from sqlalchemy import and_ #used in query.filter() to joing multiple where clauses
from sqlalchemy import ForeignKey
from sqlalchemy.orm import relationship #http://docs.sqlalchemy.org/en/latest/orm/basic_relationships.html#relationship-patterns
from sqlalchemy import inspect

from SQLA_Base import Base #module containing declarative_base
from SQLA_conn_man import session, engine #module handling db and connection creation 

#Table definitions as SQLA classes:
from SQLA_DB_base_bmp_feasibility_test_results import Base_BMP_Feasibility_Test_Results as BBFTR
from SQLA_DB_base_bmp_feasibility_test_definitions import Base_BMP_Feasibility_Test_Definitions as BBFTD
from SQLA_DB_base_bmps import Base_BMPs
from SQLA_DB_combo_bmps import Combo_BMPs
from SQLA_DB_combo_bmp_feasibility_test_results import Combo_BMP_Feasibility_Test_Results as CBFTR
from SQLA_DB_expressions import Expressions
from SQLA_DB_facility_chars import Facility_Chars
from SQLA_DB_facility_monthly_rain import Facility_Monthly_Rain
from SQLA_DB_facility_risks import Facility_Risks
from SQLA_DB_facility_type_has_nel import Facility_Type_Has_NEL
from SQLA_DB_facility_types import Facility_Types
from SQLA_DB_feasibility_test_questions import Feasibility_Test_Questions as FTQ
from SQLA_DB_nel_sample_classes import NEL_Sample_Classes
from SQLA_DB_existing_pollutant_concentrations import Existing_Pollutant_Concentrations as ExPollConcs
from SQLA_DB_pollutant_removal_rates import Pollutant_Removal_Rates as PRR
from SQLA_DB_wrs_pollutant_risks import WRS_Pollutant_Risks
Base.metadata.create_all(engine, checkfirst=True) #create SQLA classes

'''
Dictionary of "SQLAlchemy where clause lambda functions" that importCSV uses to test record uniqueness.
used as the where clause in sqlalchemy queries, updates and deletes 
Form:
    {TableName:Lambda Function, TableName:Lambda Function, ...}
    
    TableName is the table name we want to define uniqueness test for
    Lambda Function is a SQLAlchemy query used to test record uniqueness. The function can take on any form 
        but must be made to evaluate the CSV row passed as a dictionary (CSVRowDict in this explanation):
        CSVRowDict: {FieldName:CSVColValue, DBTableFieldName:CSVColValue...} 
            Where: DBTableFieldName is the name of the field associated with the value at CSVColValue on the current row
                   CSVColValue: a value in the CSV's current row+column corresponding to the DBTableFieldName 
        *this assumes that field names are unique across table. if not, then method fails (maybe need to extend method?)
    FALSE: indicates that db table doesn't impose uniqueness on its records (other than its record id being unique)
        
e.g.: lambda myRowVal: Base.metadata.tables['people'].c['name'] == CSVRowDict['name']
        using lambda function in query will search for CSVRowDict's value for 'name' in the table people, field name 
if table has no record uniqueness requirement, then enter: TableName:False
'''
unqTests = {
    'facility_chars': lambda CSVRowDict: Base.metadata.tables['facility_chars'].c['Fac_Name'] == CSVRowDict['Fac_Name'],
    'facility_monthly_rain': False, #DB schema does not impose uniqueness on records in this table
    'facility_type_has_nel': False,
    'facility_risks': False,
    'facility_types': lambda CSVRowDict: Base.metadata.tables['facility_types'].c['Fac_Type'] == CSVRowDict['Fac_Type'],
    'nel_sample_classes': lambda CSVRowDict: Base.metadata.tables['nel_sample_classes'].c['nel_column']==CSVRowDict['nel_column'],
    'existing_pollutant_concentrations': False, #uniqueness not imposed for records in this table.
    'wrs_pollutant_risks': False #DB schema does not impose uniqueness on records in this table
}

import SQLA_main as SQLA_main #import main SQLAlchemy functions


Clearing old DB


In [4]:
'''
Define other custom modules
'''
import mod_Base_BMP_Eval as BBMP_Eval
import mod_Combo_BMP_Eval as CBMP_Eval
import mod_EffluentLimit as EffLim
import mod_expression as Expr
import mod_importSpecial as importSpecial #special import functions are defined here
import mod_importCSV as importCSV #generic CSV importer ****IMPORTANT NOTE: function assumes csv in the utf-8-sig file format. weird things happen if its not in this format!!!


#  Import Data

In [5]:
def ImportDat():
    #import feasibillity questions, build feasibility expressions
    importSpecial.importFeasibilityQuestionsCSV('Input_Files\\feasibility_test_questions.csv') 

    #import base bmp information including:
      #1. imports definitions for cip costs, o&m costs, and BMP sizing to the expressions table
      #2. imports pollutant removal rates into pollutant_removal_rates table
      #3. creates a record in the base_bmps table using (1) and (2)
      #4. feasibility tests
    importSpecial.importBaseBMPsCSV('Input_Files\\bmp_lego_piece.csv') 

    #IMPORT BASIC FACILITY CHARS:
        #!!!!IMPORTANT!!!! This import must occur before other facility specific data is imported!
    print ('\nImporting facility characteristics:')
    importCSV.importCSV('Input_Files\\facility_chars.csv', unqTests)

    #IMPORT PBP Appendix A1 data
    print ('\nImporting PBP Appendix A1 data:')
    importCSV.importCSV('Input_Files\\pbp_appxa1.csv', unqTests)

    #IMPORT FACILITY RAINFALL EXTRACTED FROM http://rainfall.geography.hawaii.edu/downloads.html
    print ('\nImporting Facility Rainfall Data:')
    importCSV.importCSV('Input_Files\\FacilityRainfallData.csv', unqTests)

    #IMPORT EFFLUENT LIMITS EXISTANCE FOR FACILITY TYPES: (either by Priority Based Plan, Table 3 or as City operational assignment)
    #IF CSV HEADRS SETUP CORRECTLY, THEN THIS INSERTS NEL EXISTANCE DATA (0 OR 1) TO WRS_POLLUTANT TABLE 
    #AND USES THE FACILITY_TYPE_HAS_NEL TO ASSOCIATE RECORD WITH FACILITY TYPE
    print ('\nImporting Facility Type Has Effluent Limits:') #import into wrs_pollutant_risks table
    importCSV.importCSV('Input_Files\\nel_exists_facility_types.csv', unqTests)

    #IMPORT NEL CLASSIFICATION DATA (from PBP Appendix L)
    print ('\nImporting NEL Classes')
    importCSV.importCSV('Input_Files\\nel_pbp_appxl.csv', unqTests)

    #IMPORT FACILITY RISKS:
    print ('\nImporting Facility Risks')
    #for future implementation:
        #The current process inserts fac risk and update existing_fac_char_id in Facility_chars table. this process thus creates
    #dead records. a more sophisticated approach using sophisticated lambda function in unqTests would fix this
    importCSV.importCSV('Input_Files\\facility_risks.csv', unqTests)

    # #IMPORT FACILITY SAMPLING DATA
     #!!!IMPORTANT!!!! For now, we make none detects = 0 BUT this must be changed to detection limit, per DOH guidance.
    print ('\nImporting Facilty Sampling data:')
    importCSV.importCSV('Input_Files\\sample_data.csv', unqTests)


    # for now, since we're developing, delete out all except 1st 2 facilities.
    # n = 5
    # session.query(ExPollConcs).filter(ExPollConcs.facility_id >n).delete(synchronize_session = False) #http://docs.sqlalchemy.org/en/latest/orm/query.html#sqlalchemy.orm.query.Query.delete
    # session.query(Facility_Chars).filter(Facility_Chars.id >n).delete(synchronize_session = False) #http://docs.sqlalchemy.org/en/latest/orm/query.html#sqlalchemy.orm.query.Query.delete
    # session.commit #we chose not to sync session so need to commit before proceeding to requery or else you may get unpredictable resutls
    session.commit()
    winsound.Beep(250,1000)
ImportDat()

Reading csv for import to Feasibility Questions

Reading csv record: Feas-1

Reading csv record: Feas-2

Reading csv record: Feas-3

Reading csv record: Feas-4

Reading csv record: Feas-5

Reading csv record: Feas-6

Reading csv record: Feas-7

Reading csv record: Feas-8

Reading csv record: Feas-9

Reading csv record: Feas-10

Reading csv record: Feas-11

Reading csv record: Feas-12

Reading csv record: Feas-13

Reading csv record: Feas-14

Reading csv record: Feas-15

Reading csv record: Feas-16

Reading csv record: Feas-17

Reading csv record: Feas-18

Reading csv record: Feas-19

Reading csv record: Feas-20

Reading csv record: Feas-21

Reading csv record: Feas-22
Reading csv for import to base bmp tables

Reading csv record: Hydrodynamic_Separation
Reading pollutant removal rate info...
Linking feasibility tests w/ base bmp: 1
Removed:  0  old feasibility test defs for the base bmp
Added feasibility test def as record:  1
Added feasibility test def as record:  2

Reading csv recor

# Existing Sampling Data
Talk about it...

Global variables related to existing sampling data include:  
 - 
 - 

Defined several functions that will be used by BMP Option Evaluation. These include:  
 - 
 - 

In [6]:
'''
#############################################################################################################
#              ASSIGN CONCENTRATION DATA FOR FACILITIES WITHOUT SAMPLING RESULTS:
#                      assignment made into database table: ExPollConcs 
#############################################################################################################
Enter estimated pollutant concentrations into database's existing pollutant concentration table for facilities without 
actual sampling data. Use 1 of 2 methods:

Method 1 (sim_MaxType): Use maximum concentration value sampled for period 2013-2017
          This method is for Permit Table 1 facilities only
          Method assumes we have already entered sampling data for into the database's existing pollutant concentration table

Method 2 (sim_EMC): Use data from an EMC study.
          This method is for facilities that are not on Permit Table 1
'''

def WriteSampleDat_simMaxType(pollLS):
    #assign maximum sampled values to Table 1 facilities that have not yet been sampled
    #delete all pollutant concentration table records that are not from infield sampling.
    #     To be sure we're starting fresh, let's remove any records in ExPollConcs that:
    #     1. Were not obtained directly from field samples (i.e. sample_method != 'infield)
    #     2. Were obtained from field samples, but are not Table 1 facilities (i.e. we shouldn't be looking at their  sample results)
    session.query(ExPollConcs).filter(ExPollConcs.sample_method != 'infield').delete(synchronize_session = False)
    #delete all pollutant concentration table records that are not for Table 1 facilities
    #for some reason bulk delete's not working. so let's use a loop to work around it.
    for rec in session.query(ExPollConcs.id).filter(ExPollConcs.facility_id == Facility_Chars.id).filter(Facility_Chars.Permit_Table != 'Table 1'):
        session.query(ExPollConcs).filter(ExPollConcs.id == rec[0]).delete(synchronize_session = False)

    #make a dataframe called pd_Concs to hold existing pollutant concentrations that were sampled in the field (the 'infield' sampling method)'''
    q = session.query(ExPollConcs).filter(ExPollConcs.sample_method == 'infield')
    pd_Concs = pd.read_sql(q.statement,session.bind)         

    #build pd_infieldExtreama by making a dictionary of maximum sample results for each constiuent
    dict_extrema = {'c_' + Constituent: pd_Concs.loc[:,'c_' + Constituent].max() for Constituent in pollLS}
    dict_extrema['c_phmin'] = pd_Concs.loc[:,'c_phmin'].min() #phMin is exception to above. we want min. phMin value
    #use dictionary to build pd_infieldExtrema dataframe
    pd_infieldExtrema = pd.DataFrame([dict_extrema])
    #     display(pd_infieldExtrema)

    #now build query that identifies all Table 1 facilities that are not in ExPollConcs
    subq = session.query(ExPollConcs.facility_id.distinct()).order_by(ExPollConcs.facility_id).all()
    ls_sq = [i[0] for i in subq if i[0] is not None] #list comprehension to produce list of all facility_id in ExPollConcs table
    #get list of Table 1 facilities not in ExPollConcs:
    tpl_q = session.query(Facility_Chars.id).filter(Facility_Chars.Permit_Table == 'Table 1').filter(Facility_Chars.id.notin_(ls_sq)).all()
    ls_FacIDs = [i[0] for i in tpl_q] #write query tuple to list    
    #make a list of Table 1 facs not in ExPollConcs (a list of dicts). also include extrema conc. values.  
    ls_dict_pd = [{**{'facility_id': FacID, 'sample_method': 'sim_MaxType', 'sample_date':'12/31/2016'}, **dict_extrema} for FacID in ls_FacIDs]
    #write list to database:
    ExPollConcs_meta = Base.metadata.tables['existing_pollutant_concentrations']
    ExPollConcs_id_meta = ExPollConcs_meta.c['id']
    for dict_temp in ls_dict_pd:
        SQLA_main.insertRec(ExPollConcs_meta,dict_temp)
    session.commit()
    #for future implementation: write dict -> dataframe -> db(using sqla):
        # pd_temp.to_sql('existing_pollutant_concentrations', engine, if_exists='append', index = False)
        #http://docs.sqlalchemy.org/en/latest/faq/performance.html#i-m-inserting-400-000-rows-with-the-orm-and-it-s-really-slow
        #https://stackoverflow.com/questions/31997859/bulk-insert-a-pandas-dataframe-using-sqlalchemy

WriteSampleDat_simMaxType(pollLS) #call function defined above   

In [7]:
#############################################################################################################
#                                 Write all sampling data from database to pd_ExConcs
#                                       (DEFINE GLOBAL VARIABLE: pd_ExConcs)
#############################################################################################################    
                  
#get all existing sampling data.
q = session.query(ExPollConcs.facility_id.label('Facility_ID'), ExPollConcs.sample_date, 
        ExPollConcs.c_tss,
        ExPollConcs.c_turbidity,
        ExPollConcs.c_p,
        ExPollConcs.c_n,
        ExPollConcs.c_nn,
        ExPollConcs.c_an,
        ExPollConcs.c_og,
        ExPollConcs.c_cu,
        ExPollConcs.c_zn,
        ExPollConcs.c_fe,
        ExPollConcs.c_phmin,
        ExPollConcs.c_phmax  
         ).order_by(ExPollConcs.facility_id) #.filter(ExPollConcs.facility_id == FacID)
pd_ExConcs = pd.read_sql(q.statement,session.bind) 
#tidy up the sampling data
from datetime import datetime
pd_ExConcs['sample_date'] = pd.to_datetime(pd_ExConcs['sample_date'], format="%m/%d/%Y")
#assign NaN values to any None element 
pd_ExConcs = pd_ExConcs.applymap(lambda x: float('nan') if x is None else x) 
print ('a few pieces of data:')
pd_ExConcs

a few pieces of data:


Unnamed: 0,Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,c_zn,c_fe,c_phmin,c_phmax
0,1,2017-04-19,122.0,,,2.430,,,,,,,7.00,7.00
1,1,2017-02-11,59.0,13.00,0.097,0.580,,,,,,,8.30,8.30
2,1,2016-12-04,80.0,71.20,0.300,0.910,0.120,0.141,0.0,,,,8.20,8.20
3,1,2016-06-17,83.0,81.40,0.250,0.940,0.200,0.060,0.0,,,,6.92,6.92
4,1,2015-02-20,33.5,17.50,0.176,1.830,0.190,1.090,5.7,,,,8.54,8.54
5,1,2014-04-13,14.0,8.50,0.244,2.037,0.247,0.556,4.7,,,,6.64,6.64
6,1,2013-03-09,163.0,24.40,0.155,1.199,0.239,0.073,5.0,,,,8.09,8.09
7,2,2017-04-20,,13.00,,0.780,,,,,,,,
8,2,2017-01-21,0.0,31.00,0.100,1.580,0.000,0.418,0.0,,,,7.20,7.20
9,2,2016-05-05,7.0,4.90,0.066,2.672,0.212,0.416,0.0,,,,6.83,6.83


In [8]:
#############################################################################################################
#                                 ESTIMATE Numeric Effluent Limits
#                          (DEFINE GLOBAL VARIABLES: pd_FacsNELs_Wet & pd_FacsNELs_Dry)
#############################################################################################################    
'''
Estimate the Numeric Effluent Limits (NELs) for each facility.
Return wet and dry season NELs in 2 separate dataframes:
    pd_FacsNELs_Wet & pd_FacsNELs_Dry
Estimate NELs using the EffLim module's GetNELs function call.
 The GetNELs function call will differentiate between wet and dry season limits
 (if limits are the same between wet & dry season, then the same limit will be placed into the wet and dry
  dataframes.)
 The GetNEls function calculates a pollutant constituent NEL using this formula:
    NEL = fTypeHas_NEL * SampleClass_NEL
    Where:
      fTypeHas_NEL is a [0,1] value from PBP Table 3, based on facility type (stored in SQLA_DB_facility_type_has_nel)
      SampleClass_NEL is pollutant concentration based on facility's sample class, based on PBP Appendix L
'''
pd_FacsNELs_Wet, pd_FacsNELs_Dry = pd.DataFrame(),  pd.DataFrame() #initialize wet and dry season nel dataframes 
for recFac in session.query(Facility_Chars): #do the following for each facility:
    wet,dry = EffLim.GetNELs(recFac,False) #Get Wed & Dry NELs by calculating: NEL = fTypeHas_NEL * SampleClass_NEL
#     if wet is not None:
    pd_FacsNELs_Wet = pd.concat([pd_FacsNELs_Wet, wet]) #write wet NELs to pd_FacsNELs_Wet
#     if dry is not None:
    pd_FacsNELs_Dry = pd.concat([pd_FacsNELs_Dry, dry]) #write dry NELs to pd_FacsNELs_Dry

print('Wet NELs:')
display(pd_FacsNELs_Wet)
print('Dry NELs:')
display(pd_FacsNELs_Dry)

Wet NELs:


Unnamed: 0_level_0,nel_tss,nel_turbidity,nel_p,nel_n,nel_nn,nel_an,nel_og,nel_cu,nel_zn,nel_fe,nel_phmin,nel_phmax
Facility_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,50.0,15.00,0.10,0.52,0.180,,15.0,,,,5.5,8.0
2,50.0,15.00,0.10,0.52,0.180,,15.0,,,,5.5,8.0
3,,,,,,,15.0,,,,5.5,8.0
4,50.0,15.00,0.10,0.52,0.180,,15.0,,,,5.5,8.0
5,50.0,15.00,0.10,0.52,0.180,,15.0,,,,5.5,8.0
6,50.0,15.00,0.10,0.52,0.180,,15.0,,,,5.5,8.0
7,50.0,15.00,0.10,0.52,0.180,,15.0,,,,5.5,8.0
8,,,,,,,15.0,,,,5.5,8.0
9,,0.50,0.03,0.18,0.010,0.0050,15.0,,,,7.6,8.6
10,50.0,15.00,0.10,0.52,0.180,,15.0,,,,5.5,8.0


Dry NELs:


Unnamed: 0_level_0,nel_tss,nel_turbidity,nel_p,nel_n,nel_nn,nel_an,nel_og,nel_cu,nel_zn,nel_fe,nel_phmin,nel_phmax
Facility_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,30.0,5.50,0.06,0.38,0.090,,15.0,,,,5.5,8.0
2,30.0,5.50,0.06,0.38,0.090,,15.0,,,,5.5,8.0
3,,,,,,,15.0,,,,5.5,8.0
4,30.0,5.50,0.06,0.38,0.090,,15.0,,,,5.5,8.0
5,30.0,5.50,0.06,0.38,0.090,,15.0,,,,5.5,8.0
6,30.0,5.50,0.06,0.38,0.090,,15.0,,,,5.5,8.0
7,30.0,5.50,0.06,0.38,0.090,,15.0,,,,5.5,8.0
8,,,,,,,15.0,,,,5.5,8.0
9,,0.50,0.03,0.18,0.010,0.0050,15.0,,,,7.6,8.6
10,30.0,5.50,0.06,0.38,0.090,,15.0,,,,5.5,8.0


In [9]:
#############################################################################################################
#                        Define Maximum Concentrations for facility on each sample date
#                                    DEFINE GLOBAL VARIABLE: pd_exMaxConcs
############################################################################################################# 
#(using pd_ExConcs, calculate the maximum concentrations observed at a facility on a given sampling date)
pd_exMaxConcs = pd_ExConcs.groupby('Facility_ID').apply(lambda x: x.groupby('sample_date').agg(np.max).sort_index(ascending=False))
pd_exMaxConcs.drop('Facility_ID', axis = 1, inplace = True) #remove duplicate FAcility_ID column
pd_exMaxConcs.reset_index(1, inplace = True) #remove date index
pd_exMaxConcs.reset_index( inplace = True) #remove facility index
display(pd_exMaxConcs)

Unnamed: 0,Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,c_zn,c_fe,c_phmin,c_phmax
0,1,2017-04-19,122.0,,,2.430,,,,,,,7.00,7.00
1,1,2017-02-11,59.0,13.00,0.097,0.580,,,,,,,8.30,8.30
2,1,2016-12-04,80.0,71.20,0.300,0.910,0.120,0.141,0.0,,,,8.20,8.20
3,1,2016-06-17,83.0,81.40,0.250,0.940,0.200,0.060,0.0,,,,6.92,6.92
4,1,2015-02-20,33.5,17.50,0.176,1.830,0.190,1.090,5.7,,,,8.54,8.54
5,1,2014-04-13,14.0,8.50,0.244,2.037,0.247,0.556,4.7,,,,6.64,6.64
6,1,2013-03-09,163.0,24.40,0.155,1.199,0.239,0.073,5.0,,,,8.09,8.09
7,2,2017-04-20,,13.00,,0.780,,,,,,,,
8,2,2017-01-21,0.0,31.00,0.100,1.580,0.000,0.418,0.0,,,,7.20,7.20
9,2,2016-05-05,7.0,4.90,0.066,2.672,0.212,0.416,0.0,,,,6.83,6.83


In [10]:
#############################################################################################################
#                 Rank each facility's sampled constituent based on sampling date
#                      (earlier sample dates are given lower rank)
############################################################################################################# 
def _HELPER_SampleRank(datetime):
    #return a numeric value for the passed in date format: 2017-02-06 00:00:00
    return int(str(datetime)[:10].replace('-',''))
def _MakeSampleRank(pd_Concs, pollLS):
    #assign sample rank based on date to each constituent type in pollLS.
    #write column of sample dates expressed as numeric value (used later by AF factor. do now b/c only need to setup 1 time)
    #group by facility, then by sample date, then for each facility-sample data pair, use max constituent concentration, 
    #then sort each facility by sample date w/ newest sample first.   
    for Constituent in pollLS:
       #make helper column that expreses date as numeric:
        pd_Concs['c_' + Constituent + '_HelpSR'] = pd_Concs.apply(
            lambda row: _HELPER_SampleRank(row['sample_date']) if not (math.isnan(row['c_'+Constituent])) else np.nan, axis = 1)
        #rank sample dates for each constituent of each facility
        pd_Concs['c_' + Constituent + '_SR'] = pd_Concs.groupby(
            ['Facility_ID'])['c_' + Constituent + '_HelpSR'].rank(ascending = False)-1 #subtract 1 to start ranking at 0. 
        #drop helper column
        pd_Concs = pd_Concs.drop('c_' + Constituent + '_HelpSR', axis = 1)
    return pd_Concs
#############################################################################################################
#                 Rank each facility's sampled constituent based on sampling date
#                      REDEFINE GLOBAL VARIABLE: pd_exMaxConcs
############################################################################################################# 
pd_exMaxConcs = _MakeSampleRank(pd_exMaxConcs, pollLS)
display(pd_exMaxConcs)

Unnamed: 0,Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,...,c_p_SR,c_n_SR,c_nn_SR,c_an_SR,c_og_SR,c_cu_SR,c_zn_SR,c_fe_SR,c_phmin_SR,c_phmax_SR
0,1,2017-04-19,122.0,,,2.430,,,,,...,,0.0,,,,,,,0.0,0.0
1,1,2017-02-11,59.0,13.00,0.097,0.580,,,,,...,0.0,1.0,,,,,,,1.0,1.0
2,1,2016-12-04,80.0,71.20,0.300,0.910,0.120,0.141,0.0,,...,1.0,2.0,0.0,0.0,0.0,,,,2.0,2.0
3,1,2016-06-17,83.0,81.40,0.250,0.940,0.200,0.060,0.0,,...,2.0,3.0,1.0,1.0,1.0,,,,3.0,3.0
4,1,2015-02-20,33.5,17.50,0.176,1.830,0.190,1.090,5.7,,...,3.0,4.0,2.0,2.0,2.0,,,,4.0,4.0
5,1,2014-04-13,14.0,8.50,0.244,2.037,0.247,0.556,4.7,,...,4.0,5.0,3.0,3.0,3.0,,,,5.0,5.0
6,1,2013-03-09,163.0,24.40,0.155,1.199,0.239,0.073,5.0,,...,5.0,6.0,4.0,4.0,4.0,,,,6.0,6.0
7,2,2017-04-20,,13.00,,0.780,,,,,...,,0.0,,,,,,,,
8,2,2017-01-21,0.0,31.00,0.100,1.580,0.000,0.418,0.0,,...,0.0,1.0,0.0,0.0,0.0,,,,0.0,0.0
9,2,2016-05-05,7.0,4.90,0.066,2.672,0.212,0.416,0.0,,...,1.0,2.0,1.0,1.0,1.0,,,,1.0,1.0


In [11]:
#############################################################################################################
#                        Assign NEL for facility sample based on sample date
#                                    REDEFINE  GLOBAL VARIABLE: pd_exMaxConcs
############################################################################################################# 
def _HELPER_GetWetOrDryVal(Constituent, row):
    x = EffLim.Get_pd_NEL_WetOrDry(row['sample_date'], pd_FacsNELs_Wet, pd_FacsNELs_Dry)
    try:
        y = x.loc[row['Facility_ID'], 'nel_'+Constituent]
    except KeyError:
        y = np.nan
    return y
#write nels for each sample based on wet or dry season
for Constituent in pollLS:
    pd_exMaxConcs['nel_'+Constituent] = 0
#     display(pd_exMaxConcs['nel_'+Constituent])
    pd_exMaxConcs['nel_'+Constituent] = pd_exMaxConcs.apply(lambda row: 
      _HELPER_GetWetOrDryVal(Constituent,row), axis = 1)
#     display(pd_exMaxConcs['nel_'+Constituent])
pd_exMaxConcs

Unnamed: 0,Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,...,nel_p,nel_n,nel_nn,nel_an,nel_og,nel_cu,nel_zn,nel_fe,nel_phmin,nel_phmax
0,1,2017-04-19,122.0,,,2.430,,,,,...,0.10,0.52,0.18,,15.0,,,,5.5,8.0
1,1,2017-02-11,59.0,13.00,0.097,0.580,,,,,...,0.10,0.52,0.18,,15.0,,,,5.5,8.0
2,1,2016-12-04,80.0,71.20,0.300,0.910,0.120,0.141,0.0,,...,0.10,0.52,0.18,,15.0,,,,5.5,8.0
3,1,2016-06-17,83.0,81.40,0.250,0.940,0.200,0.060,0.0,,...,0.06,0.38,0.09,,15.0,,,,5.5,8.0
4,1,2015-02-20,33.5,17.50,0.176,1.830,0.190,1.090,5.7,,...,0.10,0.52,0.18,,15.0,,,,5.5,8.0
5,1,2014-04-13,14.0,8.50,0.244,2.037,0.247,0.556,4.7,,...,0.10,0.52,0.18,,15.0,,,,5.5,8.0
6,1,2013-03-09,163.0,24.40,0.155,1.199,0.239,0.073,5.0,,...,0.10,0.52,0.18,,15.0,,,,5.5,8.0
7,2,2017-04-20,,13.00,,0.780,,,,,...,0.10,0.52,0.18,,15.0,,,,5.5,8.0
8,2,2017-01-21,0.0,31.00,0.100,1.580,0.000,0.418,0.0,,...,0.10,0.52,0.18,,15.0,,,,5.5,8.0
9,2,2016-05-05,7.0,4.90,0.066,2.672,0.212,0.416,0.0,,...,0.06,0.38,0.09,,15.0,,,,5.5,8.0


In [12]:
#############################################################################################################
#                               Estimate Exceedances of Faclility Effluent Limits
#                      
#############################################################################################################     
def CalcExceedances(pd_Concs, pollLS):
    '''
    #for each facility in database, calculate exceedance for each pollutant constituent in pollLS list
    #do the Exceedance Calculation = max(0,(Constituent Concentration - NEL))
    # if no exceedance, then report 0. report NaN sample result is NaN
    #INPUT:
        pd_Concs: dataframe of concentrations [Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,c_zn,c_fe,c_phmin,c_phmax]
        pollLS: list of pollutant constituents we want to analyze (constituent list needs to match those in pd_Concs and FacsNELs dataframes)
    #Return dataframe [Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,c_zn,c_fe,c_phmin,c_phmax]
    '''
    pd_FacExceedances = pd_Concs   
    #     calculate exceedances:
    for Constituent in pollLS:
        if Constituent != 'phmin': #exc = concentration - nel
            pd_FacExceedances['exc_' + Constituent] = pd_FacExceedances['c_' + Constituent] - pd_FacExceedances['nel_' + Constituent]
        else: #phmin: exc = nel - phmin
            pd_FacExceedances['exc_' + Constituent] = pd_FacExceedances['nel_' + Constituent] - pd_FacExceedances['c_' + Constituent]          
        #replace <0 values w/ 0 (meaning no exceedance)
        pd_FacExceedances.loc[pd_FacExceedances['exc_' + Constituent]<0, 'exc_' + Constituent] = 0 
    return pd_FacExceedances

#############################################################################################################
#                          
#                                (DEFINE GLOBAL VARIABLE: pd_exFacExceedances)
############################################################################################################# 
start_time = time.time()
pd_exFacExceedances = CalcExceedances (pd_exMaxConcs, pollLS)
print('Concentrations in excess of wet/dry season NELs')
print ('--- %s execution time in seconds ---' % (time.time() - start_time))
display(pd_exFacExceedances)

Concentrations in excess of wet/dry season NELs
--- 0.06799983978271484 execution time in seconds ---


Unnamed: 0,Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,...,exc_p,exc_n,exc_nn,exc_an,exc_og,exc_cu,exc_zn,exc_fe,exc_phmin,exc_phmax
0,1,2017-04-19,122.0,,,2.430,,,,,...,,1.910,,,,,,,0.00,0.00
1,1,2017-02-11,59.0,13.00,0.097,0.580,,,,,...,0.000,0.060,,,,,,,0.00,0.30
2,1,2016-12-04,80.0,71.20,0.300,0.910,0.120,0.141,0.0,,...,0.200,0.390,0.000,,0.0,,,,0.00,0.20
3,1,2016-06-17,83.0,81.40,0.250,0.940,0.200,0.060,0.0,,...,0.190,0.560,0.110,,0.0,,,,0.00,0.00
4,1,2015-02-20,33.5,17.50,0.176,1.830,0.190,1.090,5.7,,...,0.076,1.310,0.010,,0.0,,,,0.00,0.54
5,1,2014-04-13,14.0,8.50,0.244,2.037,0.247,0.556,4.7,,...,0.144,1.517,0.067,,0.0,,,,0.00,0.00
6,1,2013-03-09,163.0,24.40,0.155,1.199,0.239,0.073,5.0,,...,0.055,0.679,0.059,,0.0,,,,0.00,0.09
7,2,2017-04-20,,13.00,,0.780,,,,,...,,0.260,,,,,,,,
8,2,2017-01-21,0.0,31.00,0.100,1.580,0.000,0.418,0.0,,...,0.000,1.060,0.000,,0.0,,,,0.00,0.00
9,2,2016-05-05,7.0,4.90,0.066,2.672,0.212,0.416,0.0,,...,0.006,2.292,0.122,,0.0,,,,0.00,0.00


In [13]:
#############################################################################################################
#       CALCULATE EXISTING AGE FACTOR WEIGHTED AVERAGE FACILITY EXCEEDANCE VALUES FOR EACH CONSTITUENT:
#       
#############################################################################################################   
'''
Age factor acknowledges fact that more recent samples are a better representation of facility pollutant discharge 
(i.e. sampling data) and housekeeping-operations (i.e. inspections) realities. But, historic data as a whole also tells part 
of story (i.e. we want to dampen whipsaw effects that may occur if we only considered most recent data).

AF = exp(-SampleRank)
SampleRank = Newest sample = 1
              Second Newest sample = 2
              ...
              nth Newest Sample = n (out of n samples)
'''
def AFWFacExceedances(pd_FacExceedances, pollLS):
    '''
    CALCULATE AGE FACTOR WEIGHTED AVERAGE FOR EACH CONSTITUENT:

    Age factor acknowledges fact that more recent samples are a better representation of facility pollutant discharge
    (i.e. sampling data) and housekeeping-operations (i.e. inspections) realities. But, historic data as a whole also tells part
    of story (i.e. we want to dampen whipsaw effects that may occur if we only considered most recent data).

    AF = exp(-SampleRank)
    SampleRank = Newest sample = 1
                  Second Newest sample = 2
                  ...
                  nth Newest Sample = n (out of n samples)

    INPUTS:
        pd_FacExceedances: dataframe holding exceedances
            FORMAT: ExPollConc.id, Facility_ID, Sample_Date, exceedance concentrations
        pollLS: list of polluant constituents that can be found in the dataframe's exceedance concentrations
        ShowCalculations: True if you want output of calculation summary. false if not

    RETURN:
        DataFrame of age factor weighted averages.
        FORMAT: Facility_ID, AFwtd_c_conc...
    '''
    #calculate age factor weighted averages for each constituent in pollLS FOR each facility IN DATABASE.
    #write these averages into a dataframe called pd_AFWFacExceedances [Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,c_zn,c_fe,c_phmin,c_phmax]
    pd_AFWFacExceedances = pd.DataFrame() #make an empty dataframe.  we will append to it.
    #insert blank columns:
    for Constituent in pollLS:
        #CALC AGE FACTOR
        pd_FacExceedances['c_' + Constituent + '_AF'] =np.exp(-pd_FacExceedances['c_' + Constituent + '_SR'])
        #CALC AGE FACTOR WTD CONCENTRATION        
        pd_FacExceedances['c_' + Constituent + '_AF*c'] = pd_FacExceedances[
            'c_' + Constituent + '_AF'] * pd_FacExceedances[
                'exc_' + Constituent]
    #sum AF and AF*c columns (just do all the columns in pd_FacExceedances for now. make more efficient if need to)
    pd_sums = pd_FacExceedances.groupby(['Facility_ID']).sum() 
#     #setup pd_AFWExceedances to include summed data
    #and do wtd average:
    for Constituent in pollLS:
        pd_AFWFacExceedances['c_' + Constituent + '_AFWtd'] =  pd_sums['c_' + Constituent + '_AF*c']/pd_sums['c_' + Constituent + '_AF']
    pd_AFWFacExceedances.reset_index(inplace = True)
    return pd_AFWFacExceedances

#############################################################################################################
#                        calculate age factor exceedances of existing samples in pd_exFacExceedances
#                                    DEFINE GLOBAL VARIABLE: pd_exAFWFacExceedances
############################################################################################################# 
start_time = time.time()
pd_exAFWFacExceedances = AFWFacExceedances(pd_exFacExceedances, pollLS)
print ('--- %s execution time in seconds ---' % (time.time() - start_time))
print ('Age Factor Weighted Averages:')
display(pd_exAFWFacExceedances)

--- 0.02499985694885254 execution time in seconds ---
Age Factor Weighted Averages:


Unnamed: 0,Facility_ID,c_tss_AFWtd,c_turbidity_AFWtd,c_p_AFWtd,c_n_AFWtd,c_nn_AFWtd,c_an_AFWtd,c_og_AFWtd,c_cu_AFWtd,c_zn_AFWtd,c_fe_AFWtd,c_phmin_AFWtd,c_phmax_AFWtd
0,1,52.064543,19.729711,0.067223,1.296165,0.029425,,0.0,,,,0.0,0.093351
1,2,0.961758,3.745947,0.004876,0.637805,0.031165,,0.0,,,,0.0,0.0
2,3,,,,,,,0.0,,,,0.0,0.118441
3,4,197.0,0.0,0.655,0.829,0.0,,0.0,,,,0.0,0.0
4,5,180.960566,199.986459,1.752511,17.437221,0.004532,,1.421297,,,,0.0,0.0
5,6,1960.0,4166.0,10.2,26.73,3.5,,44.0,,,,0.0,0.7
6,7,0.0,12.8,0.0,0.323,0.043,,0.0,,,,0.0,0.0
7,8,,,,,,,0.0,,,,0.0,0.07
8,9,,4180.5,10.27,27.07,3.67,10.495,44.0,,,,1.59,0.1
9,10,1960.0,4166.0,10.2,26.73,3.5,,44.0,,,,0.0,0.7


In [14]:
#############################################################################################################
#                       Estimate Facility Runoff Volumes
#                       DEFINE GLOBAL VARIABLE: pd_RunoffVols
#############################################################################################################   
#get facility imperviousness and area. order by Facility_ID so it's given in same order as monthly rain data dataframe
q_facDat = session.query(Facility_Chars.id.label('Facility_ID'), 
                         Facility_Chars.Indus_Area, 
                         Facility_Chars.Imperv.label('Imperv')).order_by('Facility_ID')
pd_facDat = pd.read_sql(q_facDat.statement,session.bind)

#get monthly rain data for each facility. order by facility_id so order matches facility data dataframe
q_rain = session.query(Facility_Chars.id.label('Facility_ID'), Facility_Monthly_Rain).filter(
    Facility_Chars.facility_monthly_rain_id == Facility_Monthly_Rain.id).order_by('Facility_ID')
pd_rainDat = pd.read_sql(q_rain.statement,session.bind)

#create a new dataframe to hold rain volumes
pd_RunoffVols = pd_facDat.loc[:,['Facility_ID']] #put facilities into the new dataframe
#now calculate volumes for each month:
for mo in range(1,13):
    pd_RunoffVols[calendar.month_name[mo]] = pd.DataFrame(pd_facDat['Indus_Area'] * pd_facDat['Imperv'] * pd_rainDat[calendar.month_name[mo]]/12)
#add monthlys together to get annual volume
pd_RunoffVols['Annual_Volume'] = pd_RunoffVols[[calendar.month_name[mo] for mo in range (1,13)]].sum(axis = 1)
display(pd_RunoffVols)

Unnamed: 0,Facility_ID,January,February,March,April,May,June,July,August,September,October,November,December,Annual_Volume
0,1,295325.500000,335399.166667,352822.500000,250024.833333,157681.166667,140257.833333,143742.500000,138515.500000,170748.666667,259607.666667,334528.000000,392025.000000,2.970678e+06
1,2,282248.999856,204901.918164,200831.019128,84810.396591,109235.790810,80061.014382,80739.497555,65134.384582,87524.329282,219150.064792,293104.730619,281570.516683,1.989313e+06
2,3,77993.831554,75165.810944,79184.577074,53434.705206,42122.622767,29322.108428,40038.818107,30810.540327,33638.560937,61918.767035,86924.422953,84691.775103,6.952465e+05
3,4,21601.669993,18133.109161,16552.499161,9439.754164,7771.332498,6454.157498,7376.179998,7595.709164,9878.812497,13698.619996,17298.898328,18177.014994,1.539778e+05
4,5,50545.681653,57766.493317,56824.648318,36261.032490,25272.840826,21191.512494,21348.486661,23232.176660,26528.634159,43795.792488,56039.777485,65615.201649,4.844223e+05
5,6,15974.190828,18427.906661,18127.451661,11667.669163,7911.981664,6810.313331,6910.464998,7311.071664,8462.815831,13871.005829,18027.299994,20831.546660,1.543337e+05
6,7,10058.130000,7407.150000,7069.280000,2858.900000,3742.560000,2521.030000,2754.940000,2365.090000,3066.820000,7407.150000,10447.980000,9928.180000,6.962721e+04
7,8,31502.679978,29712.754979,32397.642477,21658.092485,17183.279988,11873.169158,16168.989155,12529.474991,13603.429990,24999.285815,35142.194142,34426.224142,2.811972e+05
8,9,9823.037518,8034.365015,6568.240012,3782.602507,2140.542504,967.642502,1554.092503,3548.022506,3108.185006,6978.755013,7565.205014,9529.812517,6.360050e+04
9,10,7359.825004,4947.100003,3973.900002,2108.600001,1581.450001,648.800000,811.000000,3000.700002,3467.025002,4460.500002,5170.125003,5920.300003,4.344933e+04


In [15]:
#############################################################################################################
#                       Calculate raw pollutant exceedance potential scores (PEP_raw)
#                         PEP_raw = AFWtd Exceedance * Annual Runoff Volume (cu. ft)
############################################################################################################   
def _HELPER_calc_PEP_raw(row, Constituent, pd_RunoffVols):
    #HELPER function to calculate PEP_raw
    AnnRunoffVol = pd_RunoffVols.loc[pd_RunoffVols['Facility_ID']==row.loc['Facility_ID'],'Annual_Volume'].values[0]
    AFWFacExceedVal = row.loc['c_' + Constituent + '_AFWtd']
    return  AFWFacExceedVal * AnnRunoffVol
def CalcPEP_Raw(pd_AFWFacExceedances,pollLS, pd_RunoffVols):
    #use age factor weighted scores to calculate raw PEP scores for each constituent pollutant
    #input: 
        #pd_AFWFacExceedances: [Facility_ID	AFWtd_c_tss	AFWtd_c_turbidity	AFWtd_c_p	AFWtd_c_n	AFWtd_c_nn	AFWtd_c_an	AFWtd_c_og	AFWtd_c_cu	AFWtd_c_zn	AFWtd_c_fe	AFWtd_c_phmin	AFWtd_c_phmax]
        #pollLS: pollutant constituent list
        #pd_RunoffVols: RUNOFF VOLUMES [Facility_ID	January	February	March	April	May	June	July	August	September	October	November	December	Annual_Volume] 
    #output: pd_PEP_raw[	Facility_ID	PEP_raw_tss	PEP_raw_turbidity	PEP_raw_p	PEP_raw_n	PEP_raw_nn	PEP_raw_an	PEP_raw_og	PEP_raw_cu	PEP_raw_zn	PEP_raw_fe	PEP_raw_phmin	PEP_raw_phmax]

    #initialize pd_PEP_raw dataframe w/ Facility_IDs from pd_AFWFacExceedances
    pd_PEP_raw = pd_AFWFacExceedances.loc[:,['Facility_ID']]
#     display(pd_PEP_raw)
#     pd_PEP_raw.reset_index(drop=True)
    #for each facility in pd_exPEP_raw, calculate PEP_Raw SCORE for each pollutant constituent in the pollLS LIST:
    for Constituent in pollLS:
        pd_PEP_raw['PEP_raw_' + Constituent] = pd_AFWFacExceedances.apply(lambda row: 
                                               _HELPER_calc_PEP_raw(row,Constituent, pd_RunoffVols), axis = 1)    
    return pd_PEP_raw

#############################################################################################################
#                              calculate existing PEP_raw scores
#                              DEFINE GLOBAL VARIABLE: pd_exPEP_raw
#############################################################################################################  

pd_exPEP_raw = CalcPEP_Raw(pd_exAFWFacExceedances,pollLS,pd_RunoffVols)
display(pd_exPEP_raw)

Unnamed: 0,Facility_ID,PEP_raw_tss,PEP_raw_turbidity,PEP_raw_p,PEP_raw_n,PEP_raw_nn,PEP_raw_an,PEP_raw_og,PEP_raw_cu,PEP_raw_zn,PEP_raw_fe,PEP_raw_phmin,PEP_raw_phmax
0,1,154667000.0,58610630.0,199697.6,3850488.0,87413.03,,0.0,,,,0.0,277315.755592
1,2,1913238.0,7451859.0,9700.661,1268793.0,61997.84,,0.0,,,,0.0,0.0
2,3,,,,,,,0.0,,,,0.0,82345.979883
3,4,30333620.0,0.0,100855.4,127647.6,0.0,,0.0,,,,0.0,0.0
4,5,87661330.0,96877900.0,848955.5,8446978.0,2195.162,,688507.9,,,,0.0,0.0
5,6,302494100.0,642954300.0,1574204.0,4125340.0,540168.0,,6790684.0,,,,0.0,108033.602799
6,7,0.0,891228.3,0.0,22489.59,2993.97,,0.0,,,,0.0,0.0
7,8,,,,,,,0.0,,,,0.0,19683.805211
8,9,,265881900.0,653177.2,1721666.0,233413.8,667487.3,2798422.0,,,,101124.799158,6360.050261
9,10,85160680.0,181009900.0,443183.1,1161400.0,152072.6,,1911770.0,,,,0.0,30414.527517


In [16]:
#############################################################################################################
#                       Calculate normalized pollutant exceedance potential scores (PEP_norm)
#                         PEP_Norm = (PEP_raw - PEPmin) / (PEPMax - PEPmin)
############################################################################################################   
'''
NORMALIZE the raw Pollutant Exceedance Potential scores held in a pd_PEP_raw dataframe to a new dataframe called pd_PEP_norm.
Use calculation:
PEP_Norm = (PEP_raw - PEPmin) / (PEPMax - PEPmin)

Hold the PEPmax and PEPmin baseline scores used for the normalization in a dataframe called pd_NormBaselinePEP
****NOTE: LATER, we'll need to write the norm. basis to file
          This will allow us to use a common baseline in future (when we get more data, we'll want to have same baseline)         
'''
#############################################################################################################
#                           BUILD BASELINE dataframe pd_NormBaselinePEP
#                              DEFINE GLOBAL VARIABLE: pd_NormBaselinePEP
############################################################################################################  
##Use pd_exPEP_Ras data as our baseline max. Use 0 as min for all:
dict_NormBaselinePEP = {'PEP_Baseline_' + Constituent: [pd_exPEP_raw.loc[:,'PEP_raw_' + Constituent].max(),
                                             0]
                                             for Constituent in pollLS}
dict_NormBaselinePEP['MaxMin'] = ['Max','Min'] #add column identifying if row is max or min
pd_NormBaselinePEP = pd.DataFrame(dict_NormBaselinePEP) #write dict to new dataframe 
print ('This is the pd_NormBaselinePEP dataframe:')
display(pd_NormBaselinePEP)

#############################################################################################################
#                                        CALCULATE PEP_norm
#                         
############################################################################################################ 
def CalcPEP_norm(pd_PEP_raw,pollLS, pd_NormBaselinePEP):
    #calculate PEP_norm for each constituent pollutant of each facility in pd_PEP_raw
    #return pd_PEP_norm [	Facility_ID	PEP_norm_tss	PEP_norm_turbidity	PEP_norm_p	PEP_norm_n	PEP_norm_nn	PEP_norm_an	PEP_norm_og	PEP_norm_cu	PEP_norm_zn	PEP_norm_fe	PEP_norm_phmin	PEP_norm_phmax]
    
    #initialize pd_PEP_norm dataframe w/ Facility_IDs from pd_exPEP_raw
    pd_PEP_norm = pd_PEP_raw.loc[:,['Facility_ID']]
    for Constituent in pollLS:
        BLmax= pd_NormBaselinePEP.loc[pd_NormBaselinePEP['MaxMin']=='Max', 'PEP_Baseline_' + Constituent].values[0] 
        BLmin= pd_NormBaselinePEP.loc[pd_NormBaselinePEP['MaxMin']=='Min', 'PEP_Baseline_' + Constituent].values[0]        
        pd_PEP_norm['PEP_norm_' + Constituent] = (pd_PEP_raw['PEP_raw_' + Constituent] - BLmin) / (BLmax - BLmin)
    return pd_PEP_norm

#############################################################################################################
#                       Normalize existing raw pollutant exceedance potential scores 
#                         (DEFINE GLOBAL VARIABLE: pd_exPEP_norm)
############################################################################################################   
print('This is the pd_exPEP_norm dataframe:')
pd_exPEP_norm = CalcPEP_norm(pd_exPEP_raw,pollLS,pd_NormBaselinePEP)
display(pd_exPEP_norm)

#TO DO:  WRITE existing NORMALIZED PEP SCORES TO DB: 

This is the pd_NormBaselinePEP dataframe:


Unnamed: 0,MaxMin,PEP_Baseline_an,PEP_Baseline_cu,PEP_Baseline_fe,PEP_Baseline_n,PEP_Baseline_nn,PEP_Baseline_og,PEP_Baseline_p,PEP_Baseline_phmax,PEP_Baseline_phmin,PEP_Baseline_tss,PEP_Baseline_turbidity,PEP_Baseline_zn
0,Max,7452262.0,229725000.0,4448060000.0,19115650.0,2600866.0,31267240.0,7283846.0,497346.455645,703512.890691,1392570000.0,2968967000.0,1051438000.0
1,Min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


This is the pd_exPEP_norm dataframe:


Unnamed: 0,Facility_ID,PEP_norm_tss,PEP_norm_turbidity,PEP_norm_p,PEP_norm_n,PEP_norm_nn,PEP_norm_an,PEP_norm_og,PEP_norm_cu,PEP_norm_zn,PEP_norm_fe,PEP_norm_phmin,PEP_norm_phmax
0,1,0.111066,0.019741,0.027417,0.201431,0.033609,,0.0,,,,0.0,0.557591
1,2,0.001374,0.00251,0.001332,0.066375,0.023837,,0.0,,,,0.0,0.0
2,3,,,,,,,0.0,,,,0.0,0.165571
3,4,0.021782,0.0,0.013846,0.006678,0.0,,0.0,,,,0.0,0.0
4,5,0.062949,0.03263,0.116553,0.441888,0.000844,,0.02202,,,,0.0,0.0
5,6,0.21722,0.216558,0.216123,0.21581,0.207688,,0.217182,,,,0.0,0.21722
6,7,0.0,0.0003,0.0,0.001177,0.001151,,0.0,,,,0.0,0.0
7,8,,,,,,,0.0,,,,0.0,0.039578
8,9,,0.089554,0.089675,0.090066,0.089745,0.089568,0.0895,,,,0.143743,0.012788
9,10,0.061154,0.060967,0.060845,0.060757,0.05847,,0.061143,,,,0.0,0.061154


In [17]:
#############################################################################################################
#                                        Sum Normalized PEP Scores
#                                write scores to new dataframe called pd_PEP_sum
############################################################################################################ 
def SumNormPEPs (pd_PEP_norm):
    #general function to sum normalized PEPs
    #CAUTION!!! REVISES passed dataframe to include summing column
    pd_PEP_norm.set_index('Facility_ID', inplace=True) #move FAcility ID to index temporarily
#     display(pd_PEP_norm['PEP_norm_sum'])
    pd_PEP_norm['PEP_norm_sum'] = pd_PEP_norm.sum(axis = 1) #sum norm scores for each facility
    pd_PEP_norm.reset_index(inplace=True) #move facility ID from index
    return(pd_PEP_norm) #return passed dataframe

#############################################################################################################
#                                  Sum existing Normalized PEP Scores
#                              (REDEFINE GLOBAL VARIABLE: pd_exPEP_norm)
############################################################################################################ 

# display(pd_exPEP_norm.loc[:,['Facility_ID', 'PEP_norm_sum']])
pd_exPEP_norm = SumNormPEPs(pd_exPEP_norm) #revise pd_exPEP_Norm to include summing column
display(pd_exPEP_norm)

Unnamed: 0,Facility_ID,PEP_norm_tss,PEP_norm_turbidity,PEP_norm_p,PEP_norm_n,PEP_norm_nn,PEP_norm_an,PEP_norm_og,PEP_norm_cu,PEP_norm_zn,PEP_norm_fe,PEP_norm_phmin,PEP_norm_phmax,PEP_norm_sum
0,1,0.111066,0.019741,0.027417,0.201431,0.033609,,0.0,,,,0.0,0.557591,0.950855
1,2,0.001374,0.00251,0.001332,0.066375,0.023837,,0.0,,,,0.0,0.0,0.095428
2,3,,,,,,,0.0,,,,0.0,0.165571,0.165571
3,4,0.021782,0.0,0.013846,0.006678,0.0,,0.0,,,,0.0,0.0,0.042307
4,5,0.062949,0.03263,0.116553,0.441888,0.000844,,0.02202,,,,0.0,0.0,0.676885
5,6,0.21722,0.216558,0.216123,0.21581,0.207688,,0.217182,,,,0.0,0.21722,1.5078
6,7,0.0,0.0003,0.0,0.001177,0.001151,,0.0,,,,0.0,0.0,0.002628
7,8,,,,,,,0.0,,,,0.0,0.039578,0.039578
8,9,,0.089554,0.089675,0.090066,0.089745,0.089568,0.0895,,,,0.143743,0.012788,0.694638
9,10,0.061154,0.060967,0.060845,0.060757,0.05847,,0.061143,,,,0.0,0.061154,0.424489


In [18]:
'''
#############################################################################################################
#                               CALCULATE WRS PEP BASE SCORES
#                      WRS PEP BASE SCORE = NORM_PEP_SCORE*(SampleUncertainty + 1) 
############################################################################################################ 
'''
def _HELPER_PEPUncertainty(ls_id, dict_unc):
    '''determine the uncertainty level based on sample method
        (retrieve list of sample methods from ExPollConcs table for facilities in ls_id; assign uncertainty level using dict_unc)
       input: 
            ls_id: list of facility ids
            dict_unc: dictionary of uncertainty values for each sample method
        return: 
            pd_unc: dataframe [Facility_ID, UncertaintyValue]
    '''
    #get sample method for each facility in ls_id list
    q = session.query(ExPollConcs.facility_id.label('Facility_ID'), ExPollConcs.sample_method.label('sample_method')).filter(
        ExPollConcs.facility_id.in_(ls_id)).distinct(ExPollConcs.facility_id).order_by(ExPollConcs.facility_id)
    pd_samplemethod = pd.read_sql(q.statement,session.bind)
    #use dict_unc to assign uncertainty value for each facility's sample method
    pd_samplemethod['Uncertainty_Value'] = pd_samplemethod['sample_method'].apply(lambda val: dict_unc[val])
    return pd_samplemethod
    
def CalcWRSPEPBaseScore(pd_PEP_sum, Use_PrevUncertaintyVals):
    #calculate wrs pep base score = NORM_PEP_SCORE*(SampleUncertainty + 1) 
    #input: pd_PEP_sum dataframe containing COLUMNS [Facility_ID, PEP_norm_sum]
    #       Use_PrevUncertaintyVals: TRUE if want to use uncertainty_values prev. retrieved and stored in pd_exWRSPEPBaseScore
    #                                FALSE if want to retrieve uncertainty vals
    #return: dataframe of WRS PEP Base Scores
    
    #make a list of Facility IDs in pd_PEP_sum
    ls_id = [np.asscalar(id) for id in pd_PEP_sum['Facility_ID']] #id given as numpy int. cast to python int https://stackoverflow.com/questions/9452775/converting-numpy-dtypes-to-native-python-types
    #make the pd_WRSPEPBaseScore dataframe:
    #write uncertainty information into pd_WRSPEPBaseScore
    if Use_PrevUncertaintyVals:
        pd_WRSPEPBaseScore = pd.DataFrame() #make new df
        pd_WRSPEPBaseScore['Facility_ID'] = pd_exWRSPEPBaseScore['Facility_ID']
        pd_WRSPEPBaseScore['Uncertainty_Value'] = pd_exWRSPEPBaseScore['Uncertainty_Value']
    else:
        pd_WRSPEPBaseScore = _HELPER_PEPUncertainty(ls_id, {'infield':0.25, 'sim_MaxType':1.0, 'sim_EMC':0.0})
    #copy in PEP_norm_sum values
    pd_WRSPEPBaseScore['PEP_norm_sum'] = pd_PEP_sum['PEP_norm_sum']
    #calculate PEP wrs and then write result into column
    pd_WRSPEPBaseScore['PEP_BaseRisk'] = pd_WRSPEPBaseScore['PEP_norm_sum'] * (pd_WRSPEPBaseScore['Uncertainty_Value'] + 1)
    return pd_WRSPEPBaseScore

#############################################################################################################
#                            CALCULATE existing WRS PEP BASE SCORES & (TO DO: WRITE SCORES TO database) 
#                              (DEFINE GLOBAL VARIABLE: pd_exWRSPEPBaseScore)
############################################################################################################ 
#calc WRS PEP Base Scores for existing normalized PEP sums (pd_exPEP_norm)
pd_exWRSPEPBaseScore = CalcWRSPEPBaseScore(pd_exPEP_norm, False)
display(pd_exWRSPEPBaseScore)

Unnamed: 0,Facility_ID,sample_method,Uncertainty_Value,PEP_norm_sum,PEP_BaseRisk
0,1,infield,0.25,0.950855,1.188568
1,2,infield,0.25,0.095428,0.119284
2,3,infield,0.25,0.165571,0.206963
3,4,infield,0.25,0.042307,0.052883
4,5,infield,0.25,0.676885,0.846106
5,6,sim_MaxType,1.0,1.5078,3.015601
6,7,infield,0.25,0.002628,0.003285
7,8,infield,0.25,0.039578,0.049472
8,9,sim_MaxType,1.0,0.694638,1.389276
9,10,sim_MaxType,1.0,0.424489,0.848977


In [19]:
'''
#############################################################################################################
#                                    CALCULATE WRS BASE SCORES  
#
############################################################################################################ 
CALCULATE WRS BASE SCORE:
    TABLE 1 Facilities: WRS_BASE = WRS_INHERENT + WRS_CONTROLLABLE
        WRS_CONTROLLABLE = WRS_BMP + WRS_PEP
        WRS_BMP = WRS_HOUSEKEEPING + WRS_PCBMP
    TABLE 1A Facilities: 
    TABLE 2 & non-permitted:

    INPUTS:
        pd_wrsNonPEPScores
        pd_wrsPEPScores
'''
def GET_pd_FacRisks(ls_id):
    '''helper function that takes in list of facility_char ids and returns dataframe of:
        Facility_ID
        Inherent base risk
        housekeeping bmp base risk
        sw plan base risk
        bmp inspection deficiency rate (pc base risk)
    
        these items are obtained by querying database table: Facility Risks
    '''    
    q_facriskIDs =  session.query(Facility_Chars.existing_facility_risk_id).filter(Facility_Chars.id.in_(ls_id)) #for facilities in pd_exPEP_sum, get existing_facility_risk_id records
    #use q_facriskIDs as filter on Facility_Risks table to get associated wrs pollutant base id
    q_facrisks = session.query(
        Facility_Chars.id.label('Facility_ID'),Facility_Risks.Category_RiskFactor, Facility_Risks.Inherent_BaseRisk, Facility_Risks.HousekeepingBMP_BaseRisk, Facility_Risks.SWPlan_BaseRisk, Facility_Risks.BMPInspectionDeficiency_Rate).filter(
            Facility_Risks.id.in_(q_facriskIDs)).filter(
                Facility_Risks.id == Facility_Chars.existing_facility_risk_id).order_by(Facility_Chars.id)
    pd_facrisks = pd.read_sql(q_facrisks.statement, session.bind)
    return pd_facrisks

def CalcWRSBaseScore(pd_wrsNonPEPScores, pd_wrsPEPScores):
    '''
    CALCULATE WRS BASE SCORE:
        TABLE 1 Facilities: WRS_BASE = WRS_INHERENT + WRS_CONTROLLABLE
            WRS_CONTROLLABLE = WRS_BMP + WRS_PEP
            WRS_BMP = WRS_HOUSEKEEPING + WRS_PCBMP
        TABLE 1A Facilities: 
        TABLE 2 & non-permitted:

        INPUTS:
            pd_wrsNonPEPScores [Facility_ID	Category_RiskFactor	Inherent_BaseRisk	HousekeepingBMP_BaseRisk	SWPlan_BaseRisk	BMPInspectionDeficiency_Rate]
            pd_wrsPEPScores [	Facility_ID	sample_method	Uncertainty_Value	PEP_norm_sum	PEP_BaseRisk ]

    '''
    #initialize pd_exPEP_norm dataframe w/ Facility_IDs from pd_exPEP_raw
    pd_wrsBaseScores = pd.merge(pd_wrsNonPEPScores, pd_wrsPEPScores, on='Facility_ID')
    
    #calculate Table 1 scores (no need to differentiate tables now. all facilities are table 1)
    pd_wrsBaseScores['BMP_BaseRisk'] = pd_wrsBaseScores['HousekeepingBMP_BaseRisk'] + pd_wrsBaseScores['BMPInspectionDeficiency_Rate']
    pd_wrsBaseScores['Controllable_BaseRisk'] = pd_wrsBaseScores['BMP_BaseRisk'] + pd_wrsBaseScores['PEP_BaseRisk']
    #calculate total score:
    pd_wrsBaseScores['Total_BaseRisk'] = pd_wrsBaseScores['Inherent_BaseRisk'] + pd_wrsBaseScores['Controllable_BaseRisk']
    return pd_wrsBaseScores

#############################################################################################################
#                             CALCULATE existing WRS BASE SCORES 
#                       (DEFINE GLOBAL VARIABLE: pd_exwrsNonPEPScores & pd_exwrsBaseScores)
############################################################################################################ 
#make a list of Facility IDs in pd_exWRSPEPBaseScore
ls_id = [np.asscalar(id) for id in pd_exWRSPEPBaseScore['Facility_ID']] #id given as numpy int. cast to python int https://stackoverflow.com/questions/9452775/converting-numpy-dtypes-to-native-python-types
#get nonPEP WRS scores for each facility
pd_exwrsNonPEPScores = GET_pd_FacRisks(ls_id)
#make base scores using existing sub-scores.
pd_exwrsBaseScores =  CalcWRSBaseScore(pd_exwrsNonPEPScores,pd_exWRSPEPBaseScore.loc[:,['Facility_ID','PEP_BaseRisk']])
display(pd_exwrsBaseScores)

Unnamed: 0,Facility_ID,Category_RiskFactor,Inherent_BaseRisk,HousekeepingBMP_BaseRisk,SWPlan_BaseRisk,BMPInspectionDeficiency_Rate,PEP_BaseRisk,BMP_BaseRisk,Controllable_BaseRisk,Total_BaseRisk
0,1,150.003019,1.311222,0.96355,0.337498,1.0,1.188568,1.96355,3.152118,4.463339
1,2,150.003019,1.292442,0.98211,0.382344,1.0,0.119284,1.98211,2.101395,3.393837
2,3,150.003019,1.354261,0.394759,0.394759,0.0,0.206963,0.394759,0.601722,1.955983
3,4,150.003019,1.054281,0.359765,0.359765,0.0,0.052883,0.359765,0.412648,1.466929
4,5,150.003019,1.289362,1.091404,0.428664,0.0,0.846106,1.091404,1.93751,3.226872
5,6,150.003019,1.287252,0.351029,0.351029,0.0,3.015601,0.351029,3.36663,4.653882
6,7,150.003019,1.271192,0.452946,0.452946,0.855,0.003285,1.307946,1.311231,2.582423
7,8,150.003019,1.350908,0.339225,0.339225,0.0,0.049472,0.339225,0.388697,1.739606
8,9,150.003019,1.002867,0.323702,0.323702,0.0,1.389276,0.323702,1.712978,2.715845
9,10,150.003019,1.000678,0.35123,0.35123,0.0,0.848977,0.35123,1.200207,2.200885


In [67]:
# pd_ls = pd_exwrsBaseScores['Facility_ID'].tolist()
pd_ls = [np.asscalar(id) for id in pd_exwrsBaseScores['Facility_ID']] #
q = session.query(Facility_Chars.id.label('Facility_ID'), Facility_Chars.Fac_Name).filter(Facility_Chars.id.in_((pd_ls)))
pd_names = pd.read_sql(q.statement,session.bind)

pd_merge = pd.merge(pd_names, pd_exwrsBaseScores, on = 'Facility_ID')
pd_merge




Unnamed: 0,Facility_ID,Fac_Name,Category_RiskFactor,Inherent_BaseRisk,HousekeepingBMP_BaseRisk,SWPlan_BaseRisk,BMPInspectionDeficiency_Rate,PEP_BaseRisk,BMP_BaseRisk,Controllable_BaseRisk,Total_BaseRisk
0,1,Kalihi-Palama Bus & Paratransit Facility,150.003019,1.311222,0.96355,0.337498,1.0,1.188568,1.96355,3.152118,4.463339
1,2,Pearl City Bus Facility,150.003019,1.292442,0.98211,0.382344,1.0,0.119284,1.98211,2.101395,3.393837
2,3,Kapaa Refuse Transfer Station,150.003019,1.354261,0.394759,0.394759,0.0,0.206963,0.394759,0.601722,1.955983
3,4,Kawailoa Refuse Transfer Station,150.003019,1.054281,0.359765,0.359765,0.0,0.052883,0.359765,0.412648,1.466929
4,5,Keehi Refuse Transfer Station,150.003019,1.289362,1.091404,0.428664,0.0,0.846106,1.091404,1.93751,3.226872
5,6,Honolulu Refuse Collection Yard,150.003019,1.287252,0.351029,0.351029,0.0,3.015601,0.351029,3.36663,4.653882
6,7,Pearl City Refuse Collection Yard,150.003019,1.271192,0.452946,0.452946,0.855,0.003285,1.307946,1.311231,2.582423
7,8,Kapaa Refuse Collection Yard,150.003019,1.350908,0.339225,0.339225,0.0,0.049472,0.339225,0.388697,1.739606
8,9,Waianae Refuse Collection Yard,150.003019,1.002867,0.323702,0.323702,0.0,1.389276,0.323702,1.712978,2.715845
9,10,Ewa Refuse Convenience Center,150.003019,1.000678,0.35123,0.35123,0.0,0.848977,0.35123,1.200207,2.200885


# BMP FEASIBILITY EVALUATION
Talk about it...

Global variables related to existing sampling data include:  
 - 
 - 

Defined several functions that will be used by BMP Option Evaluation. These include:  
 - 
 - 

In [20]:
'''
#############################################################################################################
#                    EVALUATE BASE BMP FEASIBILITY at each facility  
#                Write results to the base_bmp_feasibility_test_results table.
############################################################################################################ 

'''

'\n#############################################################################################################\n#                    EVALUATE BASE BMP FEASIBILITY at each facility  \n#                Write results to the base_bmp_feasibility_test_results table.\n############################################################################################################ \n\n'

In [21]:
# %%capture cap --no-stderr
print('\n******Evaluating Base BMP feasibility at facilities.******')
ShowCalculations = False #flag indicating if steps should be outputted
Expr.ResetEvalErrorCount() #RESET EXPRESION EVALUATOR ERROR COUNT

#Only analyze bmps at facilities we have normalized PEP data for. make list of these facilities.
ls_id = [np.asscalar(id) for id in pd_exPEP_norm['Facility_ID']] #id given as numpy int. cast to python int https://stackoverflow.com/questions/9452775/converting-numpy-dtypes-to-native-python-types
for aFac in session.query(Facility_Chars).filter(Facility_Chars.id.in_(ls_id)):    
    if ShowCalculations: print ('\n***Evaluating base bmp feasibiilty tests for facility: ', aFac.Fac_Name), ' ***'
    myBMPs = session.query(Base_BMPs)
    for aBMP in myBMPs:
        if ShowCalculations:print ('\n######Evaluating feasibility of base_bmp: ', aBMP.bmp_name, ' ID: ', aBMP.id, '######')
        BBMP_Eval.Eval_base_bmp_feasibility_tests(aFac.id, aBMP, ShowCalculations)
session.commit
winsound.Beep(250,1000)
print ('*****************************************************************')
print ('* Completed evaluating Base BMP feasibility                     *')
if Expr.CountEvalErrors() >0:
    print (Expr.CountEvalErrors(), ' errors were encountered. Review output to identify location(s)')
    print ('Hint: expression evaluation error lines are prefixed by: FAULT!!!! Error occured while evaluating expression:')
else:
    print ('No errors detected.')
print ('*****************************************************************')

# with open('Output_Files\\output.txt', 'w') as f:
#     f.write(cap.stdout)
# f.close()


******Evaluating Base BMP feasibility at facilities.******
*****************************************************************
* Completed evaluating Base BMP feasibility                     *
52  errors were encountered. Review output to identify location(s)
Hint: expression evaluation error lines are prefixed by: FAULT!!!! Error occured while evaluating expression:
*****************************************************************


In [22]:
'''
#############################################################################################################
#                           Make all combinations of base bmps  
#                     Write results to the combos bmp database table
############################################################################################################ 
#MAXIMUM POLLUTANT REMOVAL RATES ARE DETERMINED BY IDENTIFYING 
#  THE BASE_BMP IN THE COMBO THAT PROVIDES THE HIGHEST REMOVAL RATE FOR A GIVEN POLLUTANT
'''
print ('get a coffee...this one takes a while!')
start_time = time.time()
CBMP_Eval.Make_ALL_bmp_base_option_combos()
session.commit()
print ('--- %s execution time in seconds ---' % (time.time() - start_time))
winsound.Beep(250,1000)

get a coffee...this one takes a while!
 Making BMP Combos of length: 1
 Find max pollutant removal rates for each BMP Combo of length:  1
  Made  14  combos
 Making BMP Combos of length: 2
 Find max pollutant removal rates for each BMP Combo of length:  2
  Made  91  combos
 Making BMP Combos of length: 3
 Find max pollutant removal rates for each BMP Combo of length:  3
  Made  364  combos
 Making BMP Combos of length: 4
 Find max pollutant removal rates for each BMP Combo of length:  4
  Made  1001  combos
 Making BMP Combos of length: 5
 Find max pollutant removal rates for each BMP Combo of length:  5
  Made  2002  combos
 Making BMP Combos of length: 6
 Find max pollutant removal rates for each BMP Combo of length:  6
  Made  3003  combos
 Making BMP Combos of length: 7
 Find max pollutant removal rates for each BMP Combo of length:  7
  Made  3432  combos
 Making BMP Combos of length: 8
 Find max pollutant removal rates for each BMP Combo of length:  8
  Made  3003  combos
 Makin

In [23]:
# '''
# #############################################################################################################
# #                         BUILD FEASIBLE BMP COMBOS FOR EACH FACILITY 
# #            insert/update combo data to Combo_BMP_Feasibility_Test_Results table & pd_BaseBMPCombos 
# ############################################################################################################ 
#PREPARE DATAFRAMES THAT WILL BE USED TO 

from sqlalchemy import and_

def _Make_bmp_fingerprint(base_BMP_components):
    #create fingerprint of the passed list of base_bmp_ids
    #fingerprint is just a | separated list of ids of the base bmps that make up the combo bmp
    #corresponds to bmp_options table's bmp_fingerprint field
    #FORMAT: |bmp_option_base_component_id||bmp_option_base_component_id| w/ id's given in ascending order
    fingerprint = '|' + '|'.join(str(id) + '|' for id in base_BMP_components)
    return fingerprint

def dictAppend(Append2Dict, DictVals):
    #helper function to append into existing dictionary
    if len(Append2Dict) == 0:
        return DictVals
    else:
        for k,v in DictVals.items():
            Append2Dict[k] = Append2Dict[k] + v
        return Append2Dict
    
def GetBMPPollRedRates(Facility_ID, ls_bmp_fingerprint):
    #get combo bmp pollutant removal rates for the list of bmp combos
    q = session.query(Combo_BMPs.bmp_fingerprint.label('BMP_Fingerprint'), Combo_BMPs.id.label('combos_bmp_id'), PRR.id.label('PRR_id'),
          PRR.r_tss, PRR.r_turbidity, PRR.r_p, PRR.r_n, PRR.r_nn, PRR.r_an,
          PRR.r_og, PRR.r_cu, PRR.r_zn, PRR.r_fe, PRR.r_phmin, PRR.r_phmax
        ).filter(Combo_BMPs.bmp_fingerprint.in_(ls_bmp_fingerprint) ).filter(
        Combo_BMPs.bmp_option_removal_rate_id == PRR.id)     
    pd_rr = pd.read_sql(q.statement,session.bind).applymap(lambda el: 0.00 if el is None else el) #el = 0. if nonetype to represent no removal rate change
    dict_ret = {**{'Facility_ID':[Facility_ID]*len(ls_bmp_fingerprint), 'BMP_Fingerprint': pd_rr['BMP_Fingerprint'].tolist() } ,**{'r_'+Constituent : pd_rr['r_' + Constituent].tolist() for Constituent in pollLS }}
    return dict_ret

def Make_Fsbl_FacBMPCombos(aFac, ShowCalculations):
    #a wrapper around Eval_FacBMPCombo
    print('\n***Making feasible bmp combos for facility: ', aFac.Fac_Name, '***')
    print ('****Evaluating feasibile base bmps****')
    df = pd.DataFrame(BBMP_Eval.evalFacility_BaseBMP(aFac.id, ShowCalculations)).set_index('base_bmp_id')
    if ShowCalculations: display (df)   
    df = df.loc[df['is_feasible'] == 1]
    if ShowCalculations:
        print ('****These are the feasible base bmps. I\'ll use them to make combos:****')
        display (df)
    feas_ls = df.index#send feasible base bmp ids to list
    print ('****Completed base bmp feasibility evaluation.****')
    print ('****Evaluating combinations of feasible base bmps...****')    
    #make fingerprint for each bmp combo. use itertools.combinations to generate all combos of feasible BMP list(feas_ls)
    ls_fingerprints = [_Make_bmp_fingerprint(combo)
            for CBOLen in range (1, len(feas_ls)+1) #+1 so it's inclusive of last count
                 for combo in itertools.combinations(feas_ls,CBOLen)
            ]
    ls_fac = [aFac.id] * len(ls_fingerprints) #make a corresponding list of facility_id for each bmp combo
    #get poll red rates.  
    dict_RedRates={}
    #do in small increments and append dictionary b/c SQLA fails if too many items are passed to it
    ls_sub_fingerprints = np.array_split(np.array(ls_fingerprints),5)      
    for ls_sub_fingerprint_el in ls_sub_fingerprints:
        dict_tmp = GetBMPPollRedRates(aFac.id, ls_sub_fingerprint_el.tolist())
        dict_RedRates = dictAppend(dict_RedRates, dict_tmp)
        
    print ('      There are ', len(ls_fingerprints), ' combinations of feasible Base BMPs.')

    return ls_fingerprints, ls_fac, dict_RedRates #return the fingerprint and facility id lists and reduction rates
    
def Make_Fsbl_AllFacBMPCombos(ShowCalculations):
    print ('Making feasibile BMP Options for each facility:')
    #Only analyze bmps at facilities we have data for. make list of these facilities.
    ls_id = [np.asscalar(id) for id in pd_exPEP_norm['Facility_ID']] #id given as numpy int. cast to python int https://stackoverflow.com/questions/9452775/converting-numpy-dtypes-to-native-python-types

    #make lists of each facility and bmp options:
    ls_fingerprints = []
    ls_fac = []
    dict_RedRates = {}
    for aFac in session.query(Facility_Chars).filter(Facility_Chars.id.in_(ls_id)):
        ls_fingerprints_tmp, ls_fac_tmp, dict_RedRates_tmp =  Make_Fsbl_FacBMPCombos(aFac, ShowCalculations) #get the fingerprint and facility id lists and reduction rates
        ls_fingerprints.extend(ls_fingerprints_tmp)
        ls_fac.extend(ls_fac_tmp)
        dict_RedRates = dictAppend(dict_RedRates, dict_RedRates_tmp)

    #combine lists into a dict:
    dict_FacBMPCombos = {
        'idxFacBMPAssignment': [str(x[0])+x[1] for x in zip(ls_fac, ls_fingerprints)], #concat ID & fingerprint to make uniqe index
        'Facility_ID': ls_fac,
        'BMP_Fingerprint': ls_fingerprints
    }
    #use dict to make a dataframe
    pd_FacBMPComboData = pd.DataFrame.from_dict(dict_FacBMPCombos)
    #make redrates tmp dataframe using dictionary and then join red rates dataframe w/ pd_FacBMPComboData:
    pd_RedRates_tmp = pd.DataFrame.from_dict(dict_RedRates)
#     print (dict_RedRates)
    pd_FacBMPComboData = pd.merge(pd_FacBMPComboData, pd_RedRates_tmp.loc[:,['Facility_ID', 'BMP_Fingerprint'] +\
                              ['r_' + Constituent for Constituent in pollLS]],
                                  on = ['Facility_ID','BMP_Fingerprint'])
#     #join existing wrs dataframe:
    pd_FacBMPComboData = pd.merge(pd_FacBMPComboData, pd_exwrsBaseScores.loc[:,['Facility_ID','PEP_BaseRisk', 'Total_BaseRisk']], on = 'Facility_ID')
#     #add in columns that we'll calculate
    pd_FacBMPComboData['is_calculated'] = False #flag indicating if this row was previously calculated 
    pd_FacBMPComboData['RedPEP_BaseRisk'] = np.nan #reduced PEP risks
    pd_FacBMPComboData['RedTotal_BaseRisk'] = np.nan #reduced total risk
    pd_FacBMPComboData['CIP_Cost'] = np.nan #cip costs
    pd_FacBMPComboData['OM_Cost'] = np.nan #om costs

    return pd_FacBMPComboData
    
#############################################################################################################
#                          BUILD calculation dataframe for the feasible BMP combos
#                   (DEFINE GLOBAL VARIABLE: pd_FacBMPComboData)
############################################################################################################ 
ShowCalculations = False
start_time = time.time()
print ('Evaluating feasibile BMP Options for each facility:')
pd_FacBMPComboData = Make_Fsbl_AllFacBMPCombos(ShowCalculations)
print ('--- %s execution time in seconds ---' % (time.time() - start_time))
display(pd_FacBMPComboData)
session.commit()


Evaluating feasibile BMP Options for each facility:
Making feasibile BMP Options for each facility:

***Making feasible bmp combos for facility:  Kalihi-Palama Bus & Paratransit Facility ***
****Evaluating feasibile base bmps****
****Completed base bmp feasibility evaluation.****
****Evaluating combinations of feasible base bmps...****
      There are  511  combinations of feasible Base BMPs.

***Making feasible bmp combos for facility:  Pearl City Bus Facility ***
****Evaluating feasibile base bmps****
****Completed base bmp feasibility evaluation.****
****Evaluating combinations of feasible base bmps...****
      There are  1023  combinations of feasible Base BMPs.

***Making feasible bmp combos for facility:  Kapaa Refuse Transfer Station ***
****Evaluating feasibile base bmps****
****Completed base bmp feasibility evaluation.****
****Evaluating combinations of feasible base bmps...****
      There are  15  combinations of feasible Base BMPs.

***Making feasible bmp combos for facil

  'strategies for improved performance.' % expr)


****Completed base bmp feasibility evaluation.****
****Evaluating combinations of feasible base bmps...****
      There are  1  combinations of feasible Base BMPs.

***Making feasible bmp combos for facility:  Waianae Closed Sanitary Landfill ***
****Evaluating feasibile base bmps****
****Completed base bmp feasibility evaluation.****
****Evaluating combinations of feasible base bmps...****
      There are  1  combinations of feasible Base BMPs.

***Making feasible bmp combos for facility:  Kapaa Closed Sanitary Landfill ***
****Evaluating feasibile base bmps****
****Completed base bmp feasibility evaluation.****
****Evaluating combinations of feasible base bmps...****
      There are  3  combinations of feasible Base BMPs.

***Making feasible bmp combos for facility:  Kailua Corporation Yard ***
****Evaluating feasibile base bmps****
****Completed base bmp feasibility evaluation.****
****Evaluating combinations of feasible base bmps...****
      There are  127  combinations of feasibl

Unnamed: 0,BMP_Fingerprint,Facility_ID,idxFacBMPAssignment,r_tss,r_turbidity,r_p,r_n,r_nn,r_an,r_og,...,r_fe,r_phmin,r_phmax,PEP_BaseRisk,Total_BaseRisk,is_calculated,RedPEP_BaseRisk,RedTotal_BaseRisk,CIP_Cost,OM_Cost
0,|1|,1,1|1|,0.39,0.000,0.000,0.000,0.0,0.0,0.63,...,0.00,0.0,0.0,1.188568,4.463339,False,,,,
1,|2|,1,1|2|,0.80,0.650,0.590,0.500,0.0,0.0,0.75,...,0.00,0.0,0.0,1.188568,4.463339,False,,,,
2,|3|,1,1|3|,0.80,0.270,0.640,0.340,0.0,0.0,0.90,...,0.00,0.0,0.0,1.188568,4.463339,False,,,,
3,|6|,1,1|6|,0.80,0.000,0.000,0.000,0.0,0.0,0.80,...,0.00,0.0,0.0,1.188568,4.463339,False,,,,
4,|7|,1,1|7|,0.70,0.780,0.330,0.210,0.0,0.0,0.00,...,0.45,0.0,0.0,1.188568,4.463339,False,,,,
5,|8|,1,1|8|,0.95,0.955,0.000,0.360,0.0,0.0,0.93,...,0.00,0.0,0.0,1.188568,4.463339,False,,,,
6,|9|,1,1|9|,1.00,1.000,1.000,1.000,1.0,1.0,1.00,...,1.00,0.0,1.0,1.188568,4.463339,False,,,,
7,|13|,1,1|13|,0.98,0.778,0.902,0.415,0.0,0.0,0.96,...,0.95,0.0,0.0,1.188568,4.463339,False,,,,
8,|14|,1,1|14|,0.00,0.000,0.000,0.000,0.0,0.0,0.00,...,0.00,0.0,0.0,1.188568,4.463339,False,,,,
9,|1||2|,1,1|1||2|,0.80,0.650,0.590,0.500,0.0,0.0,0.75,...,0.00,0.0,0.0,1.188568,4.463339,False,,,,


In [24]:
pdFaccp = pd_FacBMPComboData.copy(deep=True) #copy pd_FacBMPCombo so we don't mess it up
# WRITE COMBOS RESULTS TO EXCEL FILE
xlsFile = 'Output_Files\\Combos.xls'
print ('writing to excel file: ', xlsFile)
writer = pd.ExcelWriter(xlsFile)
pdFaccp.to_excel(writer,'Output')
writer.save()

writing to excel file:  Output_Files\Combos.xls


# BMP OPTION SIMULATOR
Talk about it...

Global variables related to existing sampling data include:  
 - 
 - 

Defined several functions that will be used by BMP Option Evaluation. These include:  
 - 
 - 

In [25]:
'''
ASSIGNMENT EVALUATOR
DEFINE FUNCTIONS USED TO EVALUATE BMP OPTION ASSIGNMENTS FOR EACH FACILITY
EVALUATION WILL INCLUDE RISK REDUCTION AND CIP & OM COSTS

'''

def CalcPollReduction(Facility_ID, pd_rr, pd_ExConcs, pollLS):
     #calculate reduced pollutant concentrations for the passed in removal rate dataframe and facility id 
#         return dataframe slcice of pollutant reductions
    #load in ex sample data:
#     start_time = time.time()
    pd_RedConcs = pd_ExConcs.loc[pd_ExConcs['Facility_ID']==Facility_ID].copy(deep = True)    
    #calculate reduced concentrations (red = c * (1-rr)) 
    for Constituent in pollLS:
        pd_RedConcs['c_' + Constituent] = pd_RedConcs['c_' + Constituent].apply(lambda row: row * (1- pd_rr['r_' + Constituent]))
#     print ('--- %s execution time in seconds ---' % (time.time() - start_time)) 
    
    #calculate wrs reduction:
#     start_time = time.time()
    pd_RedFacExceedances = CalcExceedances(pd_RedConcs, pollLS)
#     print ('--- %s execution time in seconds ---' % (time.time() - start_time)) 
    
    #calculate exceedances
#     start_time = time.time()
    pd_RedAFWExceedances = AFWFacExceedances(pd_RedFacExceedances, pollLS)
#     print ('--- %s execution time in seconds ---' % (time.time() - start_time)) 

    
    #CALCULATE raw polution exceedance potential risk
#     start_time = time.time()
    pd_RedPEP_raw = CalcPEP_Raw(pd_RedAFWExceedances, pollLS, pd_RunoffVols)
#     print ('--- %s execution time in seconds ---' % (time.time() - start_time)) 

    
    #normalize pep score
#     start_time = time.time()    
    pd_RedPEP_norm = CalcPEP_norm(pd_RedPEP_raw, pollLS, pd_NormBaselinePEP)
#     print ('--- %s execution time in seconds ---' % (time.time() - start_time)) 

    
    #add up each facility's norm pep score
#     start_time = time.time()
    pd_RedPEP_sum = SumNormPEPs(pd_RedPEP_norm)
#     print ('--- %s execution time in seconds ---' % (time.time() - start_time))        
    
    #calculate the WRS PEP risk score
#     display(pd_RedPEP_sum)
#     start_time = time.time()
    pd_RedWRSPEPBaseScore = CalcWRSPEPBaseScore(pd_RedPEP_sum, True) #true b/c we want to reuse already gotten uncertainty vals
#     print ('--- %s execution time in seconds ---' % (time.time() - start_time)) 

    
    #calculate total wrs risk scores
#     start_time = time.time()    
    pd_RedWRSBaseScore = CalcWRSBaseScore(pd_exwrsNonPEPScores, pd_RedWRSPEPBaseScore.loc[:,['Facility_ID','PEP_BaseRisk']])
#     print ('--- %s execution time in seconds ---' % (time.time() - start_time)) 
    
    #return the pep risk score and wrs total score
    return np.asscalar(pd_RedWRSBaseScore['PEP_BaseRisk'][0]), np.asscalar(pd_RedWRSBaseScore['Total_BaseRisk'][0])

def CalcCosts(Facility_ID, BMPFingerprint, ShowCalculations):
    #calculate cip and om costs. return as floats
    #get fingerprint list:
    BMPFingerprint = BMPFingerprint.replace('||','|')
    ls_baseBMPs = BMPFingerprint.split('|')
    ls_baseBMPs = ls_baseBMPs[1:len(ls_baseBMPs)-1]
    ls_baseBMPs = [int(i) for i in ls_baseBMPs]
    df = pd.DataFrame(BBMP_Eval.evalFacility_BaseBMP(Facility_ID, ShowCalculations)).set_index('base_bmp_id')
    if ShowCalculations: display (df)   
    sumCIP = sum(df.loc[bmp_id,'calc_cip_cost'] for bmp_id in ls_baseBMPs)
    sumOM = sum(df.loc[bmp_id,'calc_om_cost'] for bmp_id in ls_baseBMPs)
    return sumCIP, sumOM

def EvalFacilityCalcs(pd_BMPOpt, pd_FacBMPComboData, ShowCalculations):
    '''calculate costs and risk reductions for the assignments in pd_BMPOpt. 
        write results to the pd_FacBMPComboData calc dataframe
           inputs:
                Facility_ID: facility to be evaluated
                pd_FacBMPComboData: dataframe to write results to
                ShowCalculations: true/false show calculation steps
            return:
                pd_FacBMPComboData
    '''
    #Evaluate the combo solutions in pd_BMPOpt:
    #I SUCK AND CAN'T FIGURE OUT THE ELEGANT WAY TO DO THIS. OKAY - BRUTE FORCE IT
    #this next line will extract assignments in pd_BMPOpt by
    #extracing the facility_id and bmp_fingerprint for each row into the ls_Assignments list
    #each assignment is a list. inner list 1st element is Facility_ID, 2nd elelment is BMP fingerprint
    ls_Assignments = [[row.Facility_ID, row.BMP_Fingerprint]  for row in pd_BMPOpt.itertuples()] #define list of Fac IDs and BMP Fingerprint assignments
    ls_r = ['r_'+Constituent for Constituent in pollLS] #list of removal constituent column headers in pd_FacBMPComboData
    #calculate using assignments in ls_Assignments:
    for assignment in ls_Assignments:
        #calculate removals
#         start_time = time.time()
        PEP_BaseRisk, Total_BaseRisk = CalcPollReduction(assignment[0], 
            pd_BMPOpt.loc[pd_BMPOpt['Facility_ID'] == assignment[0], ls_r],
            pd_exMaxConcs.copy(deep=True), pollLS)
#         print ('---Calc RR %s execution time in seconds ---' % (time.time() - start_time))
        #calculate costs:
#         start_time = time.time()
#         print (assignment[0])
        sumCIP, sumOM = CalcCosts(np.asscalar(assignment[0]),assignment[1], ShowCalculations)    
#         print ('--- %s execution time in seconds ---' % (time.time() - start_time))
        #write remval results
        pdFaccp.loc[(pdFaccp['idxFacBMPAssignment'] == str(assignment[0]) + assignment[1]), 
                   ['RedPEP_BaseRisk','RedTotal_BaseRisk','is_calculated' , 'CIP_Cost', 'OM_Cost' ]] = \
                    [PEP_BaseRisk, Total_BaseRisk, True , sumCIP, sumOM]
    return pdFaccp, ls_Assignments #return calculated dataframe and assignents list



In [26]:
'''
DEFINE ASSIGNMENT GENERATOR
Function that assigns BMP options for a simulation run (typ. containing multiple facilities)

We will need to identify the BMP options that yield the lowest cost for CIP (or OM, such as we choose).
I am thinking of using a monte carlo type approach. Perhaps using a simulated annealing approach.
For now, just do a pure random walk
'''
def AssignBMPs_RndWlk(ShowCalculations):
    #generate a BMP combo solution for each facility 
    #select bmp option randomly
    #Evaluate the combo solutions in assignments dataframe:
    #each assignment is a list. inner list 1st element is Facility_ID, 2nd elelment is BMP fingerprint
    FacGroup = pdFaccp.groupby('Facility_ID') #group combo options by facility_id
    return FacGroup.apply(lambda aFac:  aFac.iloc[random.randint(0,aFac.shape[0]-1)]) #randomly select a combo option for each facilityp

# def AssignBMPs_SA(WalkNo, WalkMax, ShowCalculations):
#     #generate a BMP combo solution for each facility
#     #select option using a simulated annealing approach
#     #Evaluate the combo solutions in assignments dataframe:
#     #each assignment is a list. inner list 1st element is Facility_ID, 2nd elelment is BMP fingerprint    
#     if WalkNo == 0: #initial assignment is random
#         return AssignBMPs_RndWlk(ShowCalculations)        
#     else: #assign using SA approach
        
        
# http://katrinaeg.com/simulated-annealing.html

In [27]:
'''
ASSIGNMENT RESULTS ANALYZER
'''
# pdFaccp.loc[pdFaccp['is_calculated'] == True]
# FacGroup = pdFaccp.groupby('Facility_ID')
# # FacGroup.groups.keys()
# g =FacGroup.get_group(6)
# display(g)
# print (g.index[g['BMP_Fingerprint'] == '|10|'].tolist())
# # g.loc[g['is_calculated'] == True]

def make_pd_tgt_RR_tplt():
    #reduction rate summary dataframe template
    dict_def = [{'walkno':np.nan,'idxFacBMPAssignment':np.nan,'tgtRR':np.nan, 'actRR':np.nan,
                 'objFunVal':1e+12, 'best_walkno': 0, 'best_objFunVal': 1e+12,
                 'exPEP_BaseRisk':np.nan, 'RedPEP_BaseRisk':np.nan, 
                 'exTotal_BaseRisk':np.nan, 'RedTotal_BaseRisk':np.nan, 
                    'CIP_Cost':np.nan,'CIP_NormCost': np.nan ,'OM_Cost':np.nan}]
    pd_tgtRR_tplt = pd.DataFrame(dict_def)
    pd_tgtRR_tplt = pd_tgtRR_tplt[['walkno','idxFacBMPAssignment', 'tgtRR', 'actRR',
                                   'objFunVal','best_walkno', 'best_objFunVal',
                                   'exPEP_BaseRisk', 'RedPEP_BaseRisk', 'exTotal_BaseRisk',
                                   'RedTotal_BaseRisk', 'CIP_Cost', 'CIP_NormCost', 'OM_Cost']]
    return pd_tgtRR_tplt



def setup_pd_tgtRR(tgtRR):
    #setup a target results dataframe to track how close we are to reaching target reduction rate (tgtRR)
    pd_tgtRR = pd_tgtRR_tplt.copy(deep=True)
    pd_tgtRR['tgtRR'] = tgtRR 
    return pd_tgtRR

from collections import deque

def analyzeAssignmentResults_RW(WalkNo, pd_tgtRR, dict_best, dict_hist , ls_Assignments):
    #FOR RANDOM WALK
    #calculate results, write to pd_tgtRR and return it
    #for now, assume we're targeting PEP risk vs CIP cost
    #but the 2 dimensions are many many orders of magnitude apart.
    #to make a workable objective function they need to be closer. but we don't know max CIP cost!
    #okay - assume it's 1e+9...norm CIP cost to it
    CIP_CostNormalizer = 1e+9
    #get some setup stuff:
    ls_fid = [np.asscalar(Assignment[0]) for Assignment in ls_Assignments] #list of facility ids we analyzed
    ls_bmpf = [Assignment[1] for Assignment in ls_Assignments] #list of bmp fingerprints we analyzed
    ls_idxFacBMPAssignment = [str(x[0]) + x[1] for x in zip(ls_fid,ls_bmpf)] #unique facility id-bmpfingerprint 
    pd_results = pdFaccp.loc[pdFaccp['idxFacBMPAssignment'].isin(ls_idxFacBMPAssignment)] #get results from calc. dataframe
    #now calculate stuff:
    pd_tgtRR['exPEP_BaseRisk'] = pd_results['PEP_BaseRisk'].sum(axis=0)
    pd_tgtRR['exTotal_BaseRisk'] = pd_results['Total_BaseRisk'].sum(axis=0)
    pd_tgtRR['RedPEP_BaseRisk'] = pd_results['RedPEP_BaseRisk'].sum(axis=0)
    pd_tgtRR['RedTotal_BaseRisk'] = pd_results['RedTotal_BaseRisk'].sum(axis=0)
    pd_tgtRR['CIP_Cost' ] = pd_results['CIP_Cost'].sum(axis=0)
    pd_tgtRR['CIP_NormCost' ] = pd_tgtRR['CIP_Cost']/CIP_CostNormalizer
    pd_tgtRR['OM_Cost' ] = pd_results['OM_Cost'].sum(axis=0)
    pd_tgtRR['actRR'] = (pd_tgtRR['exPEP_BaseRisk'] - pd_tgtRR['RedPEP_BaseRisk'])/pd_tgtRR['exPEP_BaseRisk']
    pd_tgtRR['objFunVal'] = math.pow(pd_tgtRR['tgtRR'] - pd_tgtRR['actRR'],2) + math.pow(pd_tgtRR['CIP_NormCost'],2) #obj fun = sum of the squares
    pd_tgtRR['walkno']  = WalkNo
    pd_tgtRR['idxFacBMPAssignment'] = ','.join(ls_idxFacBMPAssignment) #write index list to comma sep. string
    #assess objfun results:
    if pd_tgtRR['best_objFunVal'].iloc[0] > pd_tgtRR['objFunVal'].iloc[0]:  # pd_tgtRR['objFunVal'] > pd_tgtRR['best_objFunVal']
        pd_tgtRR['best_objFunVal'] = pd_tgtRR['objFunVal']
        pd_tgtRR['best_walkno'] = pd_tgtRR['walkno']
        dict_best[str(pd_tgtRR['tgtRR'].iloc[0])] = [pd_tgtRR.copy(deep=True), ls_Assignments] #put best record into best_dictionary @ reduction rate element 
    #append iteration history. for now, just accept. later, it will be acceptance based on some logic
    dict_hist[str(pd_tgtRR['tgtRR'].iloc[0])].appendleft(pd_tgtRR.copy(deep=True))
    return pd_tgtRR, dict_best, dict_hist

def prAccept(T,ObjFunDelta):
    return math.exp(ObjFunDelta/T) #http://katrinaeg.com/simulated-annealing.html

def analyzeAssignmentResults_SA(pd_tgtRR, dict_best, dict_hist, dq_Assignments ,ls_Assignments, T, WalkNo):
    #FOR SIMULATED ANNEALING
    #calculate results, write to pd_tgtRR and return it
    #for now, assume we're targeting PEP risk vs CIP cost
    #but the 2 dimensions are many many orders of magnitude apart.
    #to make a workable objective function they need to be closer. but we don't know max CIP cost!
    #okay - assume it's 1e+9...norm CIP cost to it
    CIP_CostNormalizer = 1e+9
    #get some setup stuff:
    ls_fid = [Assignment[0] for Assignment in ls_Assignments] #list of facility ids we analyzed
    ls_bmpf = [Assignment[1] for Assignment in ls_Assignments] #list of bmp fingerprints we analyzed
    ls_idxFacBMPAssignment = [str(x[0]) + x[1] for x in zip(ls_fid,ls_bmpf)] #unique facility id-bmpfingerprint 
    pd_results = pdFaccp.loc[pdFaccp['idxFacBMPAssignment'].isin(ls_idxFacBMPAssignment)] #get results from calc. dataframe
    #now calculate stuff about current iteration:
    pd_tgtRR['exPEP_BaseRisk'] = pd_results['PEP_BaseRisk'].sum(axis=0)
    pd_tgtRR['exTotal_BaseRisk'] = pd_results['Total_BaseRisk'].sum(axis=0)
    pd_tgtRR['RedPEP_BaseRisk'] = pd_results['RedPEP_BaseRisk'].sum(axis=0)
    pd_tgtRR['RedTotal_BaseRisk'] = pd_results['RedTotal_BaseRisk'].sum(axis=0)
    pd_tgtRR['CIP_Cost' ] = pd_results['CIP_Cost'].sum(axis=0)
    pd_tgtRR['CIP_NormCost' ] = pd_tgtRR['CIP_Cost']/CIP_CostNormalizer
    pd_tgtRR['OM_Cost' ] = pd_results['OM_Cost'].sum(axis=0)
    pd_tgtRR['actRR'] = (pd_tgtRR['exPEP_BaseRisk'] - pd_tgtRR['RedPEP_BaseRisk'])/pd_tgtRR['exPEP_BaseRisk']
    pd_tgtRR['objFunVal'] = math.pow(pd_tgtRR['tgtRR'] - pd_tgtRR['actRR'],2) + math.pow(pd_tgtRR['CIP_NormCost'],2) #obj fun = sum of the squares
    pd_tgtRR['walkno']  = WalkNo
    pd_tgtRR['idxFacBMPAssignment'] = ','.join(ls_idxFacBMPAssignment) #write index list to comma sep. string

    #calc objective function delta
        #get prev. obj fun val:
    if len(dict_hist[str(pd_tgtRR['tgtRR'].iloc[0])]) > 0:
        pd_hist = dict_hist[str(pd_tgtRR['tgtRR'].iloc[0])][0]
        prevObjFunVal = pd_hist['objFunVal'].iloc[0]
        ObjFunDelta = pd_tgtRR['objFunVal'] - prevObjFunVal
    else:
        ObjFunDelta = 1
    
    #assess objfun results:
    #record new best if new best was found:
    newBestFound = False
    if pd_tgtRR['best_objFunVal'].iloc[0] > pd_tgtRR['objFunVal'].iloc[0]:  # pd_tgtRR['objFunVal'] > pd_tgtRR['best_objFunVal']
        pd_tgtRR['best_objFunVal'] = pd_tgtRR['objFunVal']
        pd_tgtRR['best_walkno'] = pd_tgtRR['walkno']
        dict_best[str(pd_tgtRR['tgtRR'].iloc[0])] = [pd_tgtRR.copy(deep=True), ls_Assignments] #put best record into best_dictionary @ reduction rate element 
        newBestFound = True
    #append history w/  based on acceptance probability. (last in history record is used to assign next round)
    #if deque is empty, accept
    if len(dict_hist[str(pd_tgtRR['tgtRR'].iloc[0])]) > 0:
        dict_hist[str(pd_tgtRR['tgtRR'].iloc[0])].appendleft(pd_tgtRR.copy(deep=True))
        dq_Assignments.appendleft(pd_results)
    elif random.uniform(0, 1) < prAccept(T,ObjFunDelta):
        dict_hist[str(pd_tgtRR['tgtRR'].iloc[0])].appendleft(pd_tgtRR.copy(deep=True))
        dq_Assignments.appendleft(pd_results)
    return pd_tgtRR, dict_best, dict_hist

##  THIS IS THE ASSIGN-EVAL-ANALYZE LOOP
At first I tried a pure random walk. this ended up being too slow. So, I then tried a simulated annealing approach.

In [28]:
'''
THIS IS MY RANDOM WALK APPROACH (FULLY COMMENTED OUT)
'''
#get ready to enter loop:
pd_tgtRR_tplt = make_pd_tgt_RR_tplt() #make the template results dataframe
pd_tgtRR50 =setup_pd_tgtRR(0.50) #50% PEP reduction rate
#dictionary holding best records for each reduction rate point
#FORM: {key=reduction rt pt: [pd_tgtRR, lsAssignment]}
dict_best = {'0.5': [pd_tgtRR50,[]]}
dict_hist = {'0.5': deque(maxlen=5)}
MaxWalks = 2
for aWalk in range(0,MaxWalks):
    #Evaluate the combo solutions in assignments dataframe:
    #each assignment is a list. inner list 1st element is Facility_ID, 2nd elelment is BMP fingerprint
    ShowCalculations = False
    start_time = time.time()
    pd_Assignments = AssignBMPs_RndWlk(ShowCalculations) #assign bmps randomly
    pdFaccp, ls_Assignments = EvalFacilityCalcs(pd_Assignments, pd_FacBMPComboData, ShowCalculations) #eval assignments
    pd_tgtRR50, dict_best, dict_hist = analyzeAssignmentResults_RW(aWalk, pd_tgtRR50, dict_best, dict_hist, ls_Assignments) #analyze results
    print ('--- Completed Random Evaluation Walk ', aWalk)    
    print ('--- %s execution time in seconds ---' % (time.time() - start_time))
    if aWalk % 10 == 0:
        display(pd_tgtRR50)


--- Completed Random Evaluation Walk  0
--- 6.815000057220459 execution time in seconds ---


Unnamed: 0,walkno,idxFacBMPAssignment,tgtRR,actRR,objFunVal,best_walkno,best_objFunVal,exPEP_BaseRisk,RedPEP_BaseRisk,exTotal_BaseRisk,RedTotal_BaseRisk,CIP_Cost,CIP_NormCost,OM_Cost
0,0,"1|6||7|,2|2||7||9||13||14|,3|10|,4|10||14|,5|1...",0.5,0.753737,0.792401,0,0.792401,104.13822,25.645399,186.784907,166.460568,853240200.0,0.85324,3633875.0


--- Completed Random Evaluation Walk  1
--- 7.005999803543091 execution time in seconds ---


In [29]:
#AFTER SIM RUN HERE IS THE BEST SOLUTION:
dict_best['0.5'][0]

Unnamed: 0,walkno,idxFacBMPAssignment,tgtRR,actRR,objFunVal,best_walkno,best_objFunVal,exPEP_BaseRisk,RedPEP_BaseRisk,exTotal_BaseRisk,RedTotal_BaseRisk,CIP_Cost,CIP_NormCost,OM_Cost
0,0,"1|6||7|,2|2||7||9||13||14|,3|10|,4|10||14|,5|1...",0.5,0.753737,0.792401,0,0.792401,104.13822,25.645399,186.784907,166.460568,853240200.0,0.85324,3633875.0


In [30]:
ls_leftORright = [-1,1]
def leftORRight():
    return ls_leftORright[random.randint(0,1)]

def HopToIdx(HopFactor, df, prevAssignment):
    #determine index (idx1) of df to hop to given HopFactor and current index (idx0)
    #formula: idx1 = (idx0 + +/-HopFactor* randomfloat[0,1] * dfSpan) mod dfSpan.
    hopSz = random.uniform(0,1)
    idx0= getIdx(df,prevAssignment['idxFacBMPAssignment'])
    #assign next index:
    if df.shape[0]-1 >0: #change index if theres another to change to
        idx1 =  int(round((idx0 + leftORRight() * HopFactor * random.uniform(0,1) * (df.shape[0]-1)) % (df.shape[0]-1),0))
    else: #this is the only index. stay at it
        idx1 = idx0
    return idx1

def getIdx(df,BMPFingerprint):
    val =  list(np.where(df['idxFacBMPAssignment'] == BMPFingerprint)[0])[0]
    return val

def AssignBMPs_SA(WalkNo, HopFactor, pd_PrevAssignment, ShowCalculations):
    # http://katrinaeg.com/simulated-annealing.html
    #generate a BMP combo solution for each facility
    #select option using a simulated annealing approach
    #Evaluate the combo solutions in assignments dataframe:
    #each assignment is a list. inner list 1st element is Facility_ID, 2nd elelment is BMP fingerprint    
    if WalkNo == 0: #initial assignment is random
        return AssignBMPs_RndWlk(ShowCalculations)        
    else: #assign using SA approach
        FacGroup = pdFaccp.groupby('Facility_ID') #group combo options by facility_id
        return FacGroup.apply(lambda aFac: print (aFac['idxFacBMPAssignment']))#aFac.iloc[HopToIdx(HopFactor, aFac, pd_PrevAssignment.loc[aFac['Facility_ID'].iloc[0]])]) 


In [31]:
'''
THIS IS MY SIMULATED ANNEALING ATTEMPT:
'''

import random
import time
random.seed(time.time())

def AssignEvalAssess_SA(ShowCalculations, pd_tgtRR50, dict_best, dict_hist, dq_Assignments, MaxWalks):
    # http://katrinaeg.com/simulated-annealing.html
    #initial random assignment, eval, and assessment:
    start_time = time.time()
    pd_Assignments = AssignBMPs_RndWlk(ShowCalculations) #assign bmps randomly
    pdFaccp, ls_Assignments = EvalFacilityCalcs(pd_Assignments, pd_FacBMPComboData, ShowCalculations) #eval assignments
    pd_tgtRR50, dict_best, dict_hist = analyzeAssignmentResults_SA(pd_tgtRR50, dict_best, dict_hist, dq_Assignments,
                                                                   ls_Assignments,1,0) #analyze results
    dq_Assignments.appendleft(pd_Assignments)
    print ('--- Completed Random Evaluation Walk ', 0)    
    print ('--- %s execution time in seconds ---' % (time.time() - start_time))
    display(pd_tgtRR50)      
    #gfet ready to enter SA loop:
    T=1.0
    T_Min = 0.001
    alpha = 0.9    
    #enter SA loop:
    WalkNo = 1
    while T > T_Min:
        print (prAccept(T,1))
        print (T)
        for iterAtT in range (0,25):
            start_time = time.time()
            pd_Assignments = AssignBMPs_SA(WalkNo, T, dq_Assignments[0], ShowCalculations) #assign bmps
            pdFaccp, ls_Assignments = EvalFacilityCalcs(pd_Assignments, pd_FacBMPComboData, ShowCalculations) #eval assignments
            pd_tgtRR50, dict_best, dict_hist = analyzeAssignmentResults_SA(pd_tgtRR50, dict_best, dict_hist, dq_Assignments,
                                                                           ls_Assignments, T, WalkNo) #analyze results
            print ('--- Completed Random Evaluation Walk ', WalkNo)    
            print ('--- %s execution time in seconds ---' % (time.time() - start_time))
            display(pd_tgtRR50)        
            #advance iterators:
            WalkNo = WalkNo + 1
            if WalkNo == MaxWalks:
                break
        #advance iterators:
        T = T * alpha
        if WalkNo == MaxWalks: 
            break
    return dict_best, dict_hist

#get ready to enter loop:
ShowCalculations = False
pd_tgtRR_tplt = make_pd_tgt_RR_tplt() #make the template results dataframe
pd_tgtRR50 =setup_pd_tgtRR(0.50) #50% PEP reduction rate
#dictionary holding best records for each reduction rate point
#FORM: {key=reduction rt pt: [pd_tgtRR, lsAssignment]}
dict_best = {'0.5': [pd_tgtRR50,[]]}
dict_hist = {'0.5': deque(maxlen=5)}
dq_Assignments = deque(maxlen=3)
MaxWalks = 5
    
AssignEvalAssess_SA(ShowCalculations, pd_tgtRR50, dict_best, dict_hist, dq_Assignments, MaxWalks)
    

--- Completed Random Evaluation Walk  0
--- 7.081000328063965 execution time in seconds ---


Unnamed: 0,walkno,idxFacBMPAssignment,tgtRR,actRR,objFunVal,best_walkno,best_objFunVal,exPEP_BaseRisk,RedPEP_BaseRisk,exTotal_BaseRisk,RedTotal_BaseRisk,CIP_Cost,CIP_NormCost,OM_Cost
0,0,"1|1||2||6||7||8||9||13||14|,2|2||3||8||13|,3|6...",0.5,0.847285,1.287915,0,1.287915,104.13822,15.903424,186.784907,156.718593,1080420000.0,1.08042,3878737.0


2.718281828459045
1.0
0                                1|1|
1                                1|2|
2                                1|3|
3                                1|6|
4                                1|7|
5                                1|8|
6                                1|9|
7                               1|13|
8                               1|14|
9                             1|1||2|
10                            1|1||3|
11                            1|1||6|
12                            1|1||7|
13                            1|1||8|
14                            1|1||9|
15                           1|1||13|
16                           1|1||14|
17                            1|2||3|
18                            1|2||6|
19                            1|2||7|
20                            1|2||8|
21                            1|2||9|
22                           1|2||13|
23                           1|2||14|
24                            1|3||6|
25                          

Unnamed: 0,walkno,idxFacBMPAssignment,tgtRR,actRR,objFunVal,best_walkno,best_objFunVal,exPEP_BaseRisk,RedPEP_BaseRisk,exTotal_BaseRisk,RedTotal_BaseRisk,CIP_Cost,CIP_NormCost,OM_Cost
0,1,,0.5,,,0,1.287915,0,0,0,0,0,0.0,0


0                                1|1|
1                                1|2|
2                                1|3|
3                                1|6|
4                                1|7|
5                                1|8|
6                                1|9|
7                               1|13|
8                               1|14|
9                             1|1||2|
10                            1|1||3|
11                            1|1||6|
12                            1|1||7|
13                            1|1||8|
14                            1|1||9|
15                           1|1||13|
16                           1|1||14|
17                            1|2||3|
18                            1|2||6|
19                            1|2||7|
20                            1|2||8|
21                            1|2||9|
22                           1|2||13|
23                           1|2||14|
24                            1|3||6|
25                            1|3||7|
26          

Unnamed: 0,walkno,idxFacBMPAssignment,tgtRR,actRR,objFunVal,best_walkno,best_objFunVal,exPEP_BaseRisk,RedPEP_BaseRisk,exTotal_BaseRisk,RedTotal_BaseRisk,CIP_Cost,CIP_NormCost,OM_Cost
0,2,,0.5,,,0,1.287915,0,0,0,0,0,0.0,0


0                                1|1|
1                                1|2|
2                                1|3|
3                                1|6|
4                                1|7|
5                                1|8|
6                                1|9|
7                               1|13|
8                               1|14|
9                             1|1||2|
10                            1|1||3|
11                            1|1||6|
12                            1|1||7|
13                            1|1||8|
14                            1|1||9|
15                           1|1||13|
16                           1|1||14|
17                            1|2||3|
18                            1|2||6|
19                            1|2||7|
20                            1|2||8|
21                            1|2||9|
22                           1|2||13|
23                           1|2||14|
24                            1|3||6|
25                            1|3||7|
26          

4006            11|9|
4007           11|10|
4008           11|14|
4009        11|9||10|
4010        11|9||14|
4011       11|10||14|
4012    11|9||10||14|
Name: idxFacBMPAssignment, dtype: object
4013            12|9|
4014           12|10|
4015           12|14|
4016        12|9||10|
4017        12|9||14|
4018       12|10||14|
4019    12|9||10||14|
Name: idxFacBMPAssignment, dtype: object
4020            13|9|
4021           13|10|
4022           13|14|
4023        13|9||10|
4024        13|9||14|
4025       13|10||14|
4026    13|9||10||14|
Name: idxFacBMPAssignment, dtype: object
4027                        14|1|
4028                        14|2|
4029                        14|3|
4030                        14|7|
4031                        14|9|
4032                       14|10|
4033                       14|14|
4034                     14|1||2|
4035                     14|1||3|
4036                     14|1||7|
4037                     14|1||9|
4038                    14|1||10|
4039   

Unnamed: 0,walkno,idxFacBMPAssignment,tgtRR,actRR,objFunVal,best_walkno,best_objFunVal,exPEP_BaseRisk,RedPEP_BaseRisk,exTotal_BaseRisk,RedTotal_BaseRisk,CIP_Cost,CIP_NormCost,OM_Cost
0,3,,0.5,,,0,1.287915,0,0,0,0,0,0.0,0


0                                1|1|
1                                1|2|
2                                1|3|
3                                1|6|
4                                1|7|
5                                1|8|
6                                1|9|
7                               1|13|
8                               1|14|
9                             1|1||2|
10                            1|1||3|
11                            1|1||6|
12                            1|1||7|
13                            1|1||8|
14                            1|1||9|
15                           1|1||13|
16                           1|1||14|
17                            1|2||3|
18                            1|2||6|
19                            1|2||7|
20                            1|2||8|
21                            1|2||9|
22                           1|2||13|
23                           1|2||14|
24                            1|3||6|
25                            1|3||7|
26          

4013            12|9|
4014           12|10|
4015           12|14|
4016        12|9||10|
4017        12|9||14|
4018       12|10||14|
4019    12|9||10||14|
Name: idxFacBMPAssignment, dtype: object
4020            13|9|
4021           13|10|
4022           13|14|
4023        13|9||10|
4024        13|9||14|
4025       13|10||14|
4026    13|9||10||14|
Name: idxFacBMPAssignment, dtype: object
4027                        14|1|
4028                        14|2|
4029                        14|3|
4030                        14|7|
4031                        14|9|
4032                       14|10|
4033                       14|14|
4034                     14|1||2|
4035                     14|1||3|
4036                     14|1||7|
4037                     14|1||9|
4038                    14|1||10|
4039                    14|1||14|
4040                     14|2||3|
4041                     14|2||7|
4042                     14|2||9|
4043                    14|2||10|
4044                    14|2||14

Unnamed: 0,walkno,idxFacBMPAssignment,tgtRR,actRR,objFunVal,best_walkno,best_objFunVal,exPEP_BaseRisk,RedPEP_BaseRisk,exTotal_BaseRisk,RedTotal_BaseRisk,CIP_Cost,CIP_NormCost,OM_Cost
0,4,,0.5,,,0,1.287915,0,0,0,0,0,0.0,0


({'0.5': [   walkno                                idxFacBMPAssignment  tgtRR     actRR  \
   0       0  1|1||2||6||7||8||9||13||14|,2|2||3||8||13|,3|6...    0.5  0.847285   
   
      objFunVal  best_walkno  best_objFunVal  exPEP_BaseRisk  RedPEP_BaseRisk  \
   0   1.287915            0        1.287915       104.13822        15.903424   
   
      exTotal_BaseRisk  RedTotal_BaseRisk      CIP_Cost  CIP_NormCost  \
   0        186.784907         156.718593  1.080420e+09       1.08042   
   
           OM_Cost  
   0  3.878737e+06  ,
   [[1, '|1||2||6||7||8||9||13||14|'],
    [2, '|2||3||8||13|'],
    [3, '|6||9||10||14|'],
    [4, '|9||10|'],
    [5, '|1||2||3||4||6||7||9||13|'],
    [6, '|10|'],
    [7, '|1||2||3||10|'],
    [8, '|7||9||10||14|'],
    [9, '|14|'],
    [10, '|10|'],
    [11, '|9||10||14|'],
    [12, '|10|'],
    [13, '|10||14|'],
    [14, '|3||7|'],
    [15, '|10||14|'],
    [34, '|1||3||13||14|'],
    [41, '|3||6||9||14|'],
    [78, '|14|'],
    [79, '|14|'],
    [80, 

In [32]:
# display(dict_best['0.5'][0])
df = dict_hist['0.5']
df
session.rollback()

# UH OH! CONVENIENCE CTR PEP NORM SCORES ARNET CALCULATING! WHY?!
*are there others?!!! might be a good idea to scan for pep norms == 0.

In [33]:
# session.close()
# engine.dispose()