'''
OBJECTIVES:
1. Build WRS system
2. Build Structural BMP Solution evaluator
3. Identify minimum BMP solution front for:
   individual facilities
   facilities w/in departments
   facilities w/in city
   
PYTHON VERSION: 3.6.3  
SQLALCHEMY VERSION: 1.1.13

'''

In [1]:
import winsound
import pandas as pd
'''
Define basic SQLAlchemy items:
    declarative base object
    connection object
    session object
    DB tables
'''
#SQLAlchemy library items:
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, String
from sqlalchemy import update, insert
from sqlalchemy import and_ #used in query.filter() to joing multiple where clauses
from sqlalchemy import ForeignKey
from sqlalchemy.orm import relationship #http://docs.sqlalchemy.org/en/latest/orm/basic_relationships.html#relationship-patterns
from sqlalchemy import inspect

from SQLA_Base import Base #module containing declarative_base
from SQLA_conn_man import session, engine #module handling db and connection creation 

#Table definitions as SQLA classes:
from SQLA_DB_base_bmp_feasibility_test_results import Base_BMP_Feasibility_Test_Results as BBFTR
from SQLA_DB_base_bmp_feasibility_test_definitions import Base_BMP_Feasibility_Test_Definitions as BBFTD
from SQLA_DB_base_bmps import Base_BMPs
from SQLA_DB_combo_bmps import Combo_BMPs
from SQLA_DB_combo_bmp_feasibility_test_results import Combo_BMP_Feasibility_Test_Results as CBFTR
from SQLA_DB_expressions import Expressions
from SQLA_DB_facility_chars import Facility_Chars
from SQLA_DB_facility_monthly_rain import Facility_Monthly_Rain
from SQLA_DB_facility_risks import Facility_Risks
from SQLA_DB_facility_type_has_nel import Facility_Type_Has_NEL
from SQLA_DB_facility_types import Facility_Types
from SQLA_DB_feasibility_test_questions import Feasibility_Test_Questions as FTQ
from SQLA_DB_nel_sample_classes import NEL_Sample_Classes
from SQLA_DB_existing_pollutant_concentrations import Existing_Pollutant_Concentrations as ExPollConcs
from SQLA_DB_pollutant_removal_rates import Pollutant_Removal_Rates as PRR
from SQLA_DB_wrs_pollutant_risks import WRS_Pollutant_Risks
Base.metadata.create_all(engine, checkfirst=True) #create SQLA classes

'''
Dictionary of "SQLAlchemy where clause lambda functions" that importCSV uses to test record uniqueness.
used as the where clause in sqlalchemy queries, updates and deletes 
Form:
    {TableName:Lambda Function, TableName:Lambda Function, ...}
    
    TableName is the table name we want to define uniqueness test for
    Lambda Function is a SQLAlchemy query used to test record uniqueness. The function can take on any form 
        but must be made to evaluate the CSV row passed as a dictionary (CSVRowDict in this explanation):
        CSVRowDict: {FieldName:CSVColValue, DBTableFieldName:CSVColValue...} 
            Where: DBTableFieldName is the name of the field associated with the value at CSVColValue on the current row
                   CSVColValue: a value in the CSV's current row+column corresponding to the DBTableFieldName 
        *this assumes that field names are unique across table. if not, then method fails (maybe need to extend method?)
    FALSE: indicates that db table doesn't impose uniqueness on its records (other than its record id being unique)
        
e.g.: lambda myRowVal: Base.metadata.tables['people'].c['name'] == CSVRowDict['name']
        using lambda function in query will search for CSVRowDict's value for 'name' in the table people, field name 
if table has no record uniqueness requirement, then enter: TableName:False
'''
unqTests = {
    'facility_chars': lambda CSVRowDict: Base.metadata.tables['facility_chars'].c['Fac_Name'] == CSVRowDict['Fac_Name'],
    'facility_monthly_rain': False, #DB schema does not impose uniqueness on records in this table
    'facility_type_has_nel': False,
    'facility_risks': False,
    'facility_types': lambda CSVRowDict: Base.metadata.tables['facility_types'].c['Fac_Type'] == CSVRowDict['Fac_Type'],
    'nel_sample_classes': lambda CSVRowDict: Base.metadata.tables['nel_sample_classes'].c['nel_column']==CSVRowDict['nel_column'],
    'existing_pollutant_concentrations': False, #uniqueness not imposed for records in this table.
    'wrs_pollutant_risks': False #DB schema does not impose uniqueness on records in this table
}

import SQLA_main as SQLA_main #import main SQLAlchemy functions



Clearing old DB


In [2]:
'''
Define other custom modules

'''
import mod_Base_BMP_Eval as BBMP_Eval
import mod_Combo_BMP_Eval as CBMP_Eval
import mod_EffluentLimit as EffLim
import mod_expression as Expr
import mod_importSpecial as importSpecial #special import functions are defined here
import mod_importCSV as importCSV #generic CSV importer ****IMPORTANT NOTE: function assumes csv in the utf-8-sig file format. weird things happen if its not in this format!!!


In [3]:
#import feasibillity questions, build feasibility expressions
importSpecial.importFeasibilityQuestionsCSV('Input_Files\\feasibility_test_questions.csv') 

#import base bmp information including:
  #1. imports definitions for cip costs, o&m costs, and BMP sizing to the expressions table
  #2. imports pollutant removal rates into pollutant_removal_rates table
  #3. creates a record in the base_bmps table using (1) and (2)
  #4. feasibility tests
importSpecial.importBaseBMPsCSV('Input_Files\\bmp_lego_piece.csv') 

#IMPORT BASIC FACILITY CHARS:
    #!!!!IMPORTANT!!!! This import must occur before other facility specific data is imported!
print ('\nImporting facility characteristics:')
importCSV.importCSV('Input_Files\\facility_chars.csv', unqTests)

#IMPORT PBP Appendix A1 data
print ('\nImporting PBP Appendix A1 data:')
importCSV.importCSV('Input_Files\\pbp_appxa1.csv', unqTests)

#IMPORT FACILITY RAINFALL EXTRACTED FROM http://rainfall.geography.hawaii.edu/downloads.html
print ('\nImporting Facility Rainfall Data:')
importCSV.importCSV('Input_Files\\FacilityRainfallData.csv', unqTests)

#IMPORT EFFLUENT LIMITS EXISTANCE FOR FACILITY TYPES: (either by Priority Based Plan, Table 3 or as City operational assignment)
#IF CSV HEADRS SETUP CORRECTLY, THEN THIS INSERTS NEL EXISTANCE DATA (0 OR 1) TO WRS_POLLUTANT TABLE 
#AND USES THE FACILITY_TYPE_HAS_NEL TO ASSOCIATE RECORD WITH FACILITY TYPE
print ('\nImporting Facility Type Has Effluent Limits:') #import into wrs_pollutant_risks table
importCSV.importCSV('Input_Files\\nel_exists_facility_types.csv', unqTests)

#IMPORT NEL CLASSIFICATION DATA (from PBP Appendix L)
print ('\nImporting NEL Classes')
importCSV.importCSV('Input_Files\\nel_pbp_appxl.csv', unqTests)

#IMPORT FACILITY RISKS:
print ('\nImporting Facility Risks')
#for future implementation:
    #The current process inserts fac risk and update existing_fac_char_id in Facility_chars table. this process thus creates
#dead records. a more sophisticated approach using sophisticated lambda function in unqTests would fix this
importCSV.importCSV('Input_Files\\facility_risks.csv', unqTests)

# #IMPORT FACILITY SAMPLING DATA
 #!!!IMPORTANT!!!! For now, we make none detects = 0 BUT this must be changed to detection limit, per DOH guidance.
print ('\nImporting Facilty Sampling data:')
importCSV.importCSV('Input_Files\\sample_data.csv', unqTests)


# for now, since we're developing, delete out all except 1st 2 facilities.
session.query(ExPollConcs).filter(ExPollConcs.facility_id >2).delete(synchronize_session = False) #http://docs.sqlalchemy.org/en/latest/orm/query.html#sqlalchemy.orm.query.Query.delete
session.query(Facility_Chars).filter(Facility_Chars.id >2).delete(synchronize_session = False) #http://docs.sqlalchemy.org/en/latest/orm/query.html#sqlalchemy.orm.query.Query.delete
session.commit #we chose not to sync session so need to commit before proceeding to requery or else you may get unpredictable resutls

session.commit()
winsound.Beep(250,1000)

Reading csv for import to Feasibility Questions

Reading csv record: Feas-1
Adding to variable dictionary: OFFSITE_SD_Exist

Reading csv record: Feas-2
Adding to variable dictionary: GW_Risk

Reading csv record: Feas-3
Adding to variable dictionary: GW_Risk

Reading csv record: Feas-4
Adding to variable dictionary: Soil_Type

Reading csv record: Feas-5
Adding to variable dictionary: Soil_Type

Reading csv record: Feas-6
Adding to variable dictionary: Soil_Type

Reading csv record: Feas-7
Adding to variable dictionary: Count_CB

Reading csv record: Feas-8
Adding to variable dictionary: Runoff_Type

Reading csv record: Feas-9
Adding to variable dictionary: TFMR_Exist

Reading csv record: Feas-10
Adding to variable dictionary: DS_SS_Exist

Reading csv record: Feas-11
Adding to variable dictionary: Fac_Slope

Reading csv record: Feas-12
Adding to variable dictionary: Can_Add_SD

Reading csv record: Feas-13
Adding to variable dictionary: Pave_Area
Adding to variable dictionary: BMP_Size(bas

imported records in  238  rows
associating records...

Importing Facilty Sampling data:
importing data in CSV rows...
imported records in  110  rows
associating records...


In [4]:
# EVALUATE BASE BMP FEASIBILITY at each facility
# Write results to the base_bmp_feasibility_test_results table.

print('\n******Evaluating Base BMP feasibility at facilities.******')
Expr.ResetEvalErrorCount() #RESET EXPRESION EVALUATOR ERROR COUNT
for aFac in session.query(Facility_Chars):
    print ('\n***Evaluating base bmp feasibiilty tests for facility: ', aFac.Fac_Name), ' ***'
    myBMPs = session.query(Base_BMPs)
    for aBMP in myBMPs:
        print ('\nEvaluating feasibility of base_bmp: ', aBMP.bmp_name, ' ID: ', aBMP.id)
        BBMP_Eval.Eval_base_bmp_feasibility_tests(aFac, aBMP)
session.commit
winsound.Beep(250,1000)
print ('*****************************************************************')
print ('* Completed evaluating Base BMP feasibility                     *')
if Expr.CountEvalErrors() >0:
    print (Expr.CountEvalErrors(), ' errors were encountered. Review output to identify location(s)')
    print ('Hint: expression evaluation error lines are prefixed by: FAULT!!!! Error occured while evaluating expression:')
else:
    print ('No errors detected.')
print ('*****************************************************************')


******Evaluating Base BMP feasibility at facilities.******

***Evaluating base bmp feasibiilty tests for facility:  Kalihi-Palama Bus & Paratransit Facility

Evaluating feasibility of base_bmp:  Hydrodynamic Separation  ID:  1

  Attempting eval of feasibility_test ID:  1
proccessing expression: Feas-1=OFFSITE_SD_Exist=='Yes'
    attempting to retrieve value for:  ('OFFSITE_SD_Exist', ['OFFSITE_SD_Exist', 'val', 'facility_chars', 'OFFSITE_SD_Exist', 'id', 'FLOAT'])
       QUERY RESULT: OFFSITE_SD_Exist='Yes'
  eval('Yes'=='Yes')=True
  Writing to DB Feasibility Test Result: True(1)
  Wrote to base_bmp_feasibility_test_results as recordID: 1

  Attempting eval of feasibility_test ID:  3
proccessing expression: Feas-3=GW_Risk!='High'
    attempting to retrieve value for:  ('GW_Risk', ['GW_Risk', 'val', 'facility_chars', 'GW_Risk', 'id', 'FLOAT'])
       QUERY RESULT: GW_Risk='High'
  eval('High'!='High')=False
  Writing to DB Feasibility Test Result: False(0)
  Wrote to base_bmp_feasibi

     This is a dynamic expression. Query for static expression using provided unique identifiers
       dynamic expression: BMP_Size(base_bmps~bmp_size_expression_id~bmp_name) =  static expression: bmp_size_expr_Media Filtration (Pressure)
       Reentering EvalExpr...
proccessing expression: bmp_size_expr_Media Filtration (Pressure)=586.06*WQFR - 41.868
    attempting to retrieve value for:  ('WQFR', ['WQFR', 'val', 'facility_chars', 'WQFR', 'id', 'FLOAT'])
       QUERY RESULT: WQFR=13.32024793
  eval(586.06*13.32024793 - 41.868)=7764.596501855799
  eval(867962.0>7764.596501855799)=True
  Writing to DB Feasibility Test Result: True(1)
  Wrote to base_bmp_feasibility_test_results as recordID: 15

  Attempting eval of feasibility_test ID:  15
proccessing expression: Feas-15=Pave_Area>Det_Size(base_bmps~bmp_size_expression_id~bmp_name)
    attempting to retrieve value for:  ('Pave_Area', ['Pave_Area', 'val', 'facility_chars', 'Pave_Area', 'id', 'FLOAT'])
       QUERY RESULT: Pave_Area=86

    attempting to retrieve value for:  ('OFFSITE_SD_Exist', ['OFFSITE_SD_Exist', 'val', 'facility_chars', 'OFFSITE_SD_Exist', 'id', 'FLOAT'])
       QUERY RESULT: OFFSITE_SD_Exist='Yes'
  eval('Yes'=='Yes')=True
  Writing to DB Feasibility Test Result: True(1)
  Wrote to base_bmp_feasibility_test_results as recordID: 36

  Attempting eval of feasibility_test ID:  3
proccessing expression: Feas-3=GW_Risk!='High'
    attempting to retrieve value for:  ('GW_Risk', ['GW_Risk', 'val', 'facility_chars', 'GW_Risk', 'id', 'FLOAT'])
       QUERY RESULT: GW_Risk='High'
  eval('High'!='High')=False
  Writing to DB Feasibility Test Result: False(0)
  Wrote to base_bmp_feasibility_test_results as recordID: 37

  Attempting eval of feasibility_test ID:  4
proccessing expression: Feas-4=Soil_Type!='Rock'
    attempting to retrieve value for:  ('Soil_Type', ['Soil_Type', 'val', 'facility_chars', 'Soil_Type', 'id', 'FLOAT'])
       QUERY RESULT: Soil_Type='Quarry'
  eval('Quarry'!='Rock')=True
  Writin

*****************************************************************
* Completed evaluating Base BMP feasibility                     *
No errors detected.
*****************************************************************


In [5]:
#Estimate Pollutant Effluent Limits
'''
Estimate the Numeric Effluent Limits (NELs) for each facility.
Return wet and dry season NELs in 2 separate dataframes:
    pd_FacsNELs_Wet & pd_FacsNELs_Dry
Estimate NELs using the EffLim module's GetNELs function call.
 The GetNELs function call will differentiate between wet and dry season limits
 (if limits are the same between wet & dry season, then the same limit will be placed into the wet and dry
  dataframes.)
 The GetNEls function calculates a pollutant constituent NEL using this formula:
    NEL = fTypeHas_NEL * SampleClass_NEL
    Where:
      fTypeHas_NEL is a [0,1] value from PBP Table 3, based on facility type (stored in SQLA_DB_facility_type_has_nel)
      SampleClass_NEL is pollutant concentration based on facility's sample class, based on PBP Appendix L
'''
pd_FacsNELs_Wet, pd_FacsNELs_Dry = pd.DataFrame(),  pd.DataFrame() #initialize wet and dry season nel dataframes 
for recFac in session.query(Facility_Chars): #do the following for each facility:
    wet,dry = EffLim.GetNELs(recFac,True) #Get Wed & Dry NELs by calculating: NEL = fTypeHas_NEL * SampleClass_NEL
    pd_FacsNELs_Wet = pd.concat([pd_FacsNELs_Wet, wet]) #write wet NELs to pd_FacsNELs_Wet
    pd_FacsNELs_Dry = pd.concat([pd_FacsNELs_Dry, dry]) #write dry NELs to pd_FacsNELs_Dry

print('Wet NELs:')
display(pd_FacsNELs_Wet)
print('Dry NELs:')
display(pd_FacsNELs_Dry)



Summary of Wet & Dry Season NEL Determination for:  Kalihi-Palama Bus & Paratransit Facility


Unnamed: 0_level_0,wrs_tss,wrs_turbidity,wrs_p,wrs_n,wrs_nn,wrs_an,wrs_og,wrs_cu,wrs_zn,wrs_fe,wrs_phmin,wrs_phmax
description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Dry Season NELs* (Col. I):,30.0,5.5,0.06,0.38,0.09,,15.0,6.0,22.0,1000.0,5.5,8.0
Wet Season NELs* (Col. II):,50.0,15.0,0.1,0.52,0.18,,15.0,6.0,22.0,1000.0,5.5,8.0
NEL Exists**:,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
Facility Dry Season NELs:,30.0,5.5,0.06,0.38,0.09,,15.0,,,,5.5,8.0
Facility Wet Season NELs:,50.0,15.0,0.1,0.52,0.18,,15.0,,,,5.5,8.0


   Notes: *Per PBP Appendix L; **Facility Type Requires this NEL (0: No; 1: Yes)

Summary of Wet & Dry Season NEL Determination for:  Pearl City Bus Facility


Unnamed: 0_level_0,wrs_tss,wrs_turbidity,wrs_p,wrs_n,wrs_nn,wrs_an,wrs_og,wrs_cu,wrs_zn,wrs_fe,wrs_phmin,wrs_phmax
description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Dry Season NELs* (Col. I):,30.0,5.5,0.06,0.38,0.09,,15.0,6.0,22.0,1000.0,5.5,8.0
Wet Season NELs* (Col. II):,50.0,15.0,0.1,0.52,0.18,,15.0,6.0,22.0,1000.0,5.5,8.0
NEL Exists**:,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
Facility Dry Season NELs:,30.0,5.5,0.06,0.38,0.09,,15.0,,,,5.5,8.0
Facility Wet Season NELs:,50.0,15.0,0.1,0.52,0.18,,15.0,,,,5.5,8.0


   Notes: *Per PBP Appendix L; **Facility Type Requires this NEL (0: No; 1: Yes)
Wet NELs:


Unnamed: 0_level_0,wrs_tss,wrs_turbidity,wrs_p,wrs_n,wrs_nn,wrs_an,wrs_og,wrs_cu,wrs_zn,wrs_fe,wrs_phmin,wrs_phmax
Facility_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,50.0,15.0,0.1,0.52,0.18,,15.0,,,,5.5,8.0
2,50.0,15.0,0.1,0.52,0.18,,15.0,,,,5.5,8.0


Dry NELs:


Unnamed: 0_level_0,wrs_tss,wrs_turbidity,wrs_p,wrs_n,wrs_nn,wrs_an,wrs_og,wrs_cu,wrs_zn,wrs_fe,wrs_phmin,wrs_phmax
Facility_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,30.0,5.5,0.06,0.38,0.09,,15.0,,,,5.5,8.0
2,30.0,5.5,0.06,0.38,0.09,,15.0,,,,5.5,8.0


In [6]:
import pandas as pd
import numpy as np
import math
import datetime

In [7]:
#let's work on calculating exceedance values




###### stuff above this line should go into a module

#get all sampling data
q = session.query(ExPollConcs.id, ExPollConcs.facility_id.label('Facility_ID'), ExPollConcs.sample_date, 
        ExPollConcs.c_tss,
        ExPollConcs.c_turbidity,
        ExPollConcs.c_p,
        ExPollConcs.c_n,
        ExPollConcs.c_nn,
        ExPollConcs.c_an,
        ExPollConcs.c_og,
        ExPollConcs.c_cu,
        ExPollConcs.c_zn,
        ExPollConcs.c_fe,
        ExPollConcs.c_phmin,
        ExPollConcs.c_phmax  
         )#.filter(ExPollConcs.facility_id == FacID)
pd_Concs = pd.read_sql(q.statement,session.bind) 

#tidy up the sampling data
from datetime import datetime
pd_Concs['sample_date'] = pd.to_datetime(pd_Concs['sample_date'], format="%m/%d/%Y")
pd_Concs = pd_Concs.applymap(lambda x: float('nan') if x is None else x) #assign NaN values to any None element
print ('Sampled Concentrations')
display(pd_Concs)

pollLS = ['tss', 'turbidity', 'p', 'n', 'nn', 'an', 'og', 'cu', 'zn', 'fe', 'phmin', 'phmax'] #list of pollutant constituants we're trying to address

#initialize the pd_Exceedances dataframe, which will hold our exceedance calculations
concLS = ['Facility_ID','sample_date'] + ['c_' + Constituent for Constituent in pollLS]
pd_Exceedances = pd_Concs.loc[:, concLS ] #copy data into the new delta dataframe

#for each pollutant constituent, do the Exceedance Calculation = max(0,(Constituent Concentration - NEL))
# if no exceedance, then report 0. report NaN sample result is NaN
# myRow['c_'+Constituent]
for Facs in session.query(Facility_Chars.id):
    FacID = Facs[0]
    for Constituent in pollLS:
        pd_Exceedances['c_' + Constituent]  = pd_Concs.apply(lambda x: #for each row in pd_Concs:
                                 EffLim.ExceedanceCalc(x, Constituent, FacID, pd_FacsNELs_Wet, pd_FacsNELs_Dry), axis = 1)
print('Concentrations in excess of wet/dry season NELs')
display(pd_Exceedances)

Sampled Concentrations


Unnamed: 0,id,Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,c_zn,c_fe,c_phmin,c_phmax
0,35,1,2017-04-19,122.0,,,2.43,,,,,,,7.0,7.0
1,36,1,2017-02-11,59.0,13.0,0.097,0.58,,,,,,,8.3,8.3
2,37,1,2016-12-04,80.0,71.2,0.3,0.91,0.12,0.141,0.0,,,,8.2,8.2
3,38,1,2016-06-17,83.0,81.4,0.25,0.94,0.2,0.06,0.0,,,,6.92,6.92
4,39,1,2015-02-20,33.5,17.5,0.176,1.83,0.19,1.09,5.7,,,,8.54,8.54
5,40,1,2014-04-13,14.0,8.5,0.244,2.037,0.247,0.556,4.7,,,,6.64,6.64
6,41,1,2013-03-09,163.0,24.4,0.155,1.199,0.239,0.073,5.0,,,,8.09,8.09
7,80,2,2017-04-20,,13.0,,0.78,,,,,,,,
8,81,2,2017-01-21,0.0,31.0,0.1,1.58,0.0,0.418,0.0,,,,7.2,7.2
9,82,2,2016-05-05,7.0,4.9,0.066,2.672,0.212,0.416,0.0,,,,6.83,6.83


Concentrations in excess of wet/dry season NELs


Unnamed: 0,Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,c_zn,c_fe,c_phmin,c_phmax
0,1,2017-04-19,72.0,,,1.91,,,,,,,0.0,
1,1,2017-02-11,9.0,0.0,0.0,0.06,,,,,,,0.0,
2,1,2016-12-04,30.0,56.2,0.2,0.39,0.0,,0.0,,,,0.0,
3,1,2016-06-17,53.0,75.9,0.19,0.56,0.11,,0.0,,,,0.0,
4,1,2015-02-20,0.0,2.5,0.076,1.31,0.01,,0.0,,,,0.0,
5,1,2014-04-13,0.0,0.0,0.144,1.517,0.067,,0.0,,,,0.0,
6,1,2013-03-09,113.0,9.4,0.055,0.679,0.059,,0.0,,,,0.0,
7,2,2017-04-20,,0.0,,0.26,,,,,,,,
8,2,2017-01-21,0.0,16.0,0.0,1.06,0.0,,0.0,,,,0.0,
9,2,2016-05-05,0.0,0.0,0.006,2.292,0.122,,0.0,,,,0.0,


In [None]:
'''
CALCULATE AGE FACTOR WEIGHTED AVERAGE FOR EACH CONSTITUENT:

Age factor acknowledges fact that more recent samples are a better representation of facility pollutant discharge 
(i.e. sampling data) and housekeeping-operations (i.e. inspections) realities. But, historic data as a whole also tells part 
of story (i.e. we want to dampen whipsaw effects that may occur if we only considered most recent data).

AF = exp(-SampleRank)
SampleRank = Newest sample = 1
              Second Newest sample = 2
              ...
              nth Newest Sample = n (out of n samples)
'''
import numpy as np
import math

def Calc_AFWtdExceedances(pd_FacExceedances):
    # group dataframe by date
    grouped = pd_FacExceedances.groupby('sample_date')
    #make age factored exceedence dataframe. Only include max concentration for each of the facility's sample date
    pd_AFFacExceedances= pd.DataFrame(grouped.agg(np.max)) #write max concs to new dataframe.
    #establish sorted order. we want most recent data to be assigned lowest rank
    pd_AFFacExceedances = pd_AFFacExceedances.sort_index(ascending=False) #sort by date, which is currently the data frame's index. most recent at top
    pd_AFFacExceedances.reset_index(inplace=True) #index the dataframe we'll use the index as SampleRank

    #define pd_headers to establish order that we want AF column located at w/in pd_headers to. 
    #this is purely an aesthetics issue
    pd_headers = list(pd_AFFacExceedances) #get dataframe column list
    pd_headers.insert(2,'AF') #add AF to the list at location we want AF column to appear

    #make age factored exceedence data frame
    pd_AFFacExceedances['AF'] = pd_AFFacExceedances.index #as initial step, write index to AF column. We'll use that value as the SAmpleRank 
    pd_AFFacExceedances['AF'] = pd_AFFacExceedances['AF'].apply(lambda SRank: math.exp(-SRank)) #calculate age factor weight for the given SRank

    #now rearrange AF column location using order given in pd_headers
    pd_AFFacExceedances = pd_AFFacExceedances[pd_headers] #rearrange dataframe columns to have AF column where we want it

    #calculate age factored exceedances (c_constituent * AF) for each constituent.
    for Constituent in pollLS:
        pd_AFFacExceedances['AF*c_' + Constituent]  = pd_AFFacExceedances.apply(lambda row: (row['AF']*row['c_' + Constituent]), axis = 1)
    print ('AF calculation summary:')
    display(pd_AFFacExceedances)

    #calculate age factor weighted average for each constituent. Put into new dataframe called pd_AFWFacExceedances
    pd_AFWFacExceedances=pd.DataFrame({'Facility_ID':[FacID]})#insert facility id into pd_AFWFacExceedances
    SumAF = pd_AFFacExceedances['AF'].sum() #get sum of AF weights
    for Constituent in pollLS:
        pd_AFWFacExceedances['AFWtd_c_' + Constituent]  = pd_AFFacExceedances['AF*c_' + Constituent].sum()/SumAF
    print ('Age Factor Weighted Averages')
    display(pd_AFWFacExceedances)


#we will process each facility separately.
UnqFacIDs = pd_Exceedances.Facility_ID.unique() #get unique facility IDs
print (type(UnqFacIDs))
# df.loc[df['column_name'] == some_value]


# FacGroups = pd_Exceedances.groupby('Facility_ID')
# print (FacGroups.get_group(1))

# UnqFacIDs = pd_Exceedances.Facility_ID.unique() #get unique facility IDs
# dict_pd_FacExceedances = {str(FacID): pd.DataFrame for FacID in UnqFacIDs} #create dict of facility exceedances
# display(pd_Exceedances)
# for key, value in dict_pd_FacExceedances.items():
#     display(value)
# #     Calc_AFWtdExceedances(value)
    



<class 'numpy.ndarray'>


In [None]:
#CREATE COMBO BMPS USING BASE BMPS
#ALL POSSIBLE COMBOS WILL BE CREATED AND ADDED TO THE COMBO_BMPS TABLE
#MAXIMUM POLLUTANT REMOVAL RATES ARE DETERMINED BY IDENTIFYING 
#  THE BASE_BMP IN THE COMBO THAT PROVIDES THE HIGHEST REMOVAL RATE FOR A GIVEN POLLUTANT

import time
print ('get a coffee...this one takes a while!')
start_time = time.time()
CBMP_Eval.Make_ALL_bmp_base_option_combos()
session.commit()
print ('--- %s execution time in seconds ---' % (time.time() - start_time))

get a coffee...this one takes a while!
 Making BMP Combos of length: 1
 Find max pollutant removal rates for each BMP Combo of length:  1
  Made  13  combos
 Making BMP Combos of length: 2
 Find max pollutant removal rates for each BMP Combo of length:  2
  Made  78  combos
 Making BMP Combos of length: 3
 Find max pollutant removal rates for each BMP Combo of length:  3
  Made  286  combos
 Making BMP Combos of length: 4
 Find max pollutant removal rates for each BMP Combo of length:  4
  Made  715  combos
 Making BMP Combos of length: 5
 Find max pollutant removal rates for each BMP Combo of length:  5
  Made  1287  combos
 Making BMP Combos of length: 6
 Find max pollutant removal rates for each BMP Combo of length:  6


In [None]:
'''
Identify the feasible bmp combinations for each facility
Use base bmp feasibility results for each facility.
Put results into the combo_bmp_feasibility_test_results table
'''
import itertools     #https://docs.python.org/3/library/itertools.html    
import pandas as pd

from sqlalchemy import and_

def _Make_bmp_fingerprint(base_BMP_components):
    #create fingerprint of the passed list of base_bmp_ids
    #fingerprint is just a | separated list of ids of the base bmps that make up the combo bmp
    #corresponds to bmp_options table's bmp_fingerprint field
    #FORMAT: |bmp_option_base_component_id||bmp_option_base_component_id| w/ id's given in ascending order
    fingerprint = '|' + '|'.join(str(id) + '|' for id in base_BMP_components)
    return fingerprint

def Eval_FacBMPCombo(pd_basebmps, myFacility, bmpCombo):
    '''
    input:
        pdbasebmps: pandas built from a BBMP_Eval.evalFacility_BaseBMP dictionary list
                    assme that pandas is passed in w/ index is set as base_bmp_id
        myFacility: SQLA fac_chars record
        bmpCombo: list of base_bmp_ids that make up this combo
    
    #retrieve previously computed combo removal rate
    #calculate combo cip and om cost, insert/update database
    #calculate wrs reduction, insert/update database

    #return as pandas    
    '''    
    #get combo bmp pollutant removal rates into pandas 
    q = session.query(Combo_BMPs.bmp_fingerprint, Combo_BMPs.id.label('combos_bmp_id'), PRR.id.label('PRR_id'),
          PRR.r_tss, PRR.r_turbidity, PRR.r_p, PRR.r_n, PRR.r_nn, PRR.r_an,
          PRR.r_og, PRR.r_cu, PRR.r_zn, PRR.r_fe, PRR.r_phmin, PRR.r_phmax
        ).filter(Combo_BMPs.bmp_fingerprint == _Make_bmp_fingerprint(bmpCombo)).filter(
        Combo_BMPs.bmp_option_removal_rate_id == PRR.id)  
    pd_rr = pd.read_sql(q.statement,session.bind) 

    #use information in pd_rr to get CBFTR_record - make new record if necessary
    myCBFTR = Base.metadata.tables['combo_bmp_feasibility_test_results']
    myCBFTR_id = SQLA_main.insertupdateRec(myCBFTR,{'facility_id':myFacility.id, 'combo_bmps_id':pd_rr['combos_bmp_id'][0]},
                and_(
        myCBFTR.c['facility_id'] == myFacility.id,
        myCBFTR.c['combo_bmps_id'] == pd_rr['combos_bmp_id'][0]
                    ))
    session.flush()
    
    print (myCBFTR_id)
    
    #calculate WRS reduction
#     myFac_exWRSData = session.query(
    
    

    #get costs in pandas
    sumCIP = sum(pd_basebmps.loc[bmp_id,'calc_cip_cost'] for bmp_id in bmpCombo)
    sumOM = sum(pd_basebmps.loc[bmp_id,'calc_om_cost'] for bmp_id in bmpCombo)
    pd_sums = pd.DataFrame([{'calc_cip_cost':sumCIP, 'calc_om_cost': sumOM}])

    #merge combo bmp's removal rates and costs into 1 dataframe
    return pd.concat([pd_rr, pd_sums], axis = 1)
    
    

def Eval_FacBMPOptions(myFacility):
    #a wrapper around Eval_FacBMPCombo
    print('\n***Evaluating feasible bmp combos for facility: ', aFac.Fac_Name, '***')
    print ('****Evaluating feasibile base bmps****')
    df = pd.DataFrame(BBMP_Eval.evalFacility_BaseBMP(aFac, False)).set_index('base_bmp_id')
    display (df)   
    print ('****These are the feasible base bmps. I\'ll use them to make combos:****')
    df = df.loc[df['is_feasible'] == 1]
    display (df)
    feas_ls = df.index #send feasible base bmp ids to list
#     print (feas_ls)
# from SQLA_DB_combo_bmps import Combo_BMPs
# from SQLA_DB_combo_bmp_feasibility_test_results import Combo_BMP_Feasibility_Test_Results as CBFTR
    for CBOLen in range (1, len(feas_ls)+1): #+1 so it's inclusive of last count
        for combo in  itertools.combinations(feas_ls,CBOLen):
            print ('Here is a summary of the combo: ', list(combo))
            display(Eval_FacBMPCombo(df,myFacility, list(combo)))

            
def Eval_All_FacBMPOptions():
    print ('Evaluating feasibile BMP Options for each facility:')
    for aFac in session.query(Facility_Chars):
        Eval_FacBMPOptions(aFac)
    
Eval_All_FacBMPOptions()
session.commit()

In [None]:
# session.close()
# engine.dispose()

In [None]:
# http://pythonhow.com/accessing-dataframe-columns-rows-and-cells/
import pandas as pd #import in pandas library
print ('#get csv data and read into pandas')
df1=pd.read_csv("http://pythonhow.com/wp-content/uploads/2016/01/Income_data.csv")
print (df1)
print ('#write new dataframe w/ index set to the "State" column in the csv')
df2=df1.set_index("State").copy()
print (df2)
print ('#extract a portion of the dataframe: States = Alaska to Arkansas; and Dates 2005:2007')
print (df2.loc["Alaska":"Arkansas","2005":"2007"])

print ('Get only certain States, using a list of states:')
getStates = ['Alaska', 'Arizona']
print (df2.loc[getStates])

print ('#slice a column:')
df2.loc[: , "2005"]
print ('get a cell:')
df2.loc['Alaska','2005']
print ('#get max of 2005 data')
print (df2.loc[:,'2005'].max())
print ('take 2005 column and put into list')
LS = df2['2005'].tolist() #this is a series. we use the .tolist() to convert from series to list
print (type(LS))


In [None]:
df = pd.DataFrame({'col1' : [1.0] * 5, 
                   'col2' : [2.0] * 5, 
                   'col3' : [3.0] * 5 }, index = range(1,6),)
display(df)
df2 = pd.DataFrame({'col1' : [10.0] * 5, 
                    'col2' : [100.0] * 5, 
                    'col3' : [1000.0] * 5 }, index = range(1,6),)
display(df2)
df.mul(df2, 0) # element by element multiplication no problems

In [None]:
import datetime

# xmin = datetime.datetime.strptime('1/1/2018', "%m/%d/%Y").date()
# xmax = datetime.datetime.strptime('5/6/2018', "%m/%d/%Y").date()

# xmin <= datetime.date(2018,1,5) <= xmax

#     Wet Season is from: January 1 through April 30 and November 1 through December 31
#     Dry Season is from: May 1 through October 31

SampleDate = datetime.date(2018,11,1)

#Wet Season 1:
if datetime.date(SampleDate.year, 1,1) <= SampleDate <= datetime.date(SampleDate.year, 4,30):
    print ('ws 1')
elif datetime.date(SampleDate.year, 5,1) <= SampleDate <= datetime.date(SampleDate.year, 10,31):
    print ('dry')
else:
    print ('ws 2')
    
    
import numpy as np    
# np.max([float('nan'),0])
np.max([0,float('nan')])

if math.isnan(10)

In [None]:
#import the pandas library
import pandas as pd

ipl_data = {'Team': ['Riders', 'Riders', 'Devils', 'Devils', 'Kings',
         'kings', 'Kings', 'Kings', 'Riders', 'Royals', 'Royals', 'Riders'],
         'Rank': [1, 2, 2, 3, 3,4 ,1 ,1,2 , 4,1,2],
         'Year': [2014,2015,2014,2015,2014,2015,2016,2017,2016,2014,2015,2017],
         'Points':[876,789,863,673,741,812,756,788,694,701,804,690]}
df = pd.DataFrame(ipl_data)

display (df)

display (df.groupby('Team').groups)


import numpy as np


grouped = df.groupby('Year')
print (grouped['Points'].agg(np.max))