'''
OBJECTIVES:
1. Build WRS system
2. Build Structural BMP Solution evaluator
3. Identify minimum BMP solution front for:
   individual facilities
   facilities w/in departments
   facilities w/in city
   
PYTHON VERSION: 3.6.3  
SQLALCHEMY VERSION: 1.1.13

'''

### Pollutant Constituents
Below are the pollutant constituents we attempt to address through this alternatives analysis

In [1]:
#############################################################################################################
#                   
#                                       DEFINE GLOBAL VARIABLE pollLS
#############################################################################################################     
pollLS = ['tss', 'turbidity', 'p', 'n', 'nn', 'an', 'og', 'cu', 'zn', 'fe', 'phmin', 'phmax'] 

# Program Setup
## (Importing libraries, defining database)

In [2]:
#import standard python libraries:
import winsound
import pandas as pd
import numpy as np
import math
import datetime
import calendar
import time

In [3]:
#IMPORT AND DEFINE sqlalchemy libraries, tables, and session engine
#SQLAlchemy library items:
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, String
from sqlalchemy import update, insert
from sqlalchemy import and_ #used in query.filter() to joing multiple where clauses
from sqlalchemy import ForeignKey
from sqlalchemy.orm import relationship #http://docs.sqlalchemy.org/en/latest/orm/basic_relationships.html#relationship-patterns
from sqlalchemy import inspect

from SQLA_Base import Base #module containing declarative_base
from SQLA_conn_man import session, engine #module handling db and connection creation 

#Table definitions as SQLA classes:
from SQLA_DB_base_bmp_feasibility_test_results import Base_BMP_Feasibility_Test_Results as BBFTR
from SQLA_DB_base_bmp_feasibility_test_definitions import Base_BMP_Feasibility_Test_Definitions as BBFTD
from SQLA_DB_base_bmps import Base_BMPs
from SQLA_DB_combo_bmps import Combo_BMPs
from SQLA_DB_combo_bmp_feasibility_test_results import Combo_BMP_Feasibility_Test_Results as CBFTR
from SQLA_DB_expressions import Expressions
from SQLA_DB_facility_chars import Facility_Chars
from SQLA_DB_facility_monthly_rain import Facility_Monthly_Rain
from SQLA_DB_facility_risks import Facility_Risks
from SQLA_DB_facility_type_has_nel import Facility_Type_Has_NEL
from SQLA_DB_facility_types import Facility_Types
from SQLA_DB_feasibility_test_questions import Feasibility_Test_Questions as FTQ
from SQLA_DB_nel_sample_classes import NEL_Sample_Classes
from SQLA_DB_existing_pollutant_concentrations import Existing_Pollutant_Concentrations as ExPollConcs
from SQLA_DB_pollutant_removal_rates import Pollutant_Removal_Rates as PRR
from SQLA_DB_wrs_pollutant_risks import WRS_Pollutant_Risks
Base.metadata.create_all(engine, checkfirst=True) #create SQLA classes

'''
Dictionary of "SQLAlchemy where clause lambda functions" that importCSV uses to test record uniqueness.
used as the where clause in sqlalchemy queries, updates and deletes 
Form:
    {TableName:Lambda Function, TableName:Lambda Function, ...}
    
    TableName is the table name we want to define uniqueness test for
    Lambda Function is a SQLAlchemy query used to test record uniqueness. The function can take on any form 
        but must be made to evaluate the CSV row passed as a dictionary (CSVRowDict in this explanation):
        CSVRowDict: {FieldName:CSVColValue, DBTableFieldName:CSVColValue...} 
            Where: DBTableFieldName is the name of the field associated with the value at CSVColValue on the current row
                   CSVColValue: a value in the CSV's current row+column corresponding to the DBTableFieldName 
        *this assumes that field names are unique across table. if not, then method fails (maybe need to extend method?)
    FALSE: indicates that db table doesn't impose uniqueness on its records (other than its record id being unique)
        
e.g.: lambda myRowVal: Base.metadata.tables['people'].c['name'] == CSVRowDict['name']
        using lambda function in query will search for CSVRowDict's value for 'name' in the table people, field name 
if table has no record uniqueness requirement, then enter: TableName:False
'''
unqTests = {
    'facility_chars': lambda CSVRowDict: Base.metadata.tables['facility_chars'].c['Fac_Name'] == CSVRowDict['Fac_Name'],
    'facility_monthly_rain': False, #DB schema does not impose uniqueness on records in this table
    'facility_type_has_nel': False,
    'facility_risks': False,
    'facility_types': lambda CSVRowDict: Base.metadata.tables['facility_types'].c['Fac_Type'] == CSVRowDict['Fac_Type'],
    'nel_sample_classes': lambda CSVRowDict: Base.metadata.tables['nel_sample_classes'].c['nel_column']==CSVRowDict['nel_column'],
    'existing_pollutant_concentrations': False, #uniqueness not imposed for records in this table.
    'wrs_pollutant_risks': False #DB schema does not impose uniqueness on records in this table
}

import SQLA_main as SQLA_main #import main SQLAlchemy functions


Clearing old DB


In [4]:
'''
Define other custom modules
'''
import mod_Base_BMP_Eval as BBMP_Eval
import mod_Combo_BMP_Eval as CBMP_Eval
import mod_EffluentLimit as EffLim
import mod_expression as Expr
import mod_importSpecial as importSpecial #special import functions are defined here
import mod_importCSV as importCSV #generic CSV importer ****IMPORTANT NOTE: function assumes csv in the utf-8-sig file format. weird things happen if its not in this format!!!


#  Import Data

In [5]:
#import feasibillity questions, build feasibility expressions
importSpecial.importFeasibilityQuestionsCSV('Input_Files\\feasibility_test_questions.csv') 

#import base bmp information including:
  #1. imports definitions for cip costs, o&m costs, and BMP sizing to the expressions table
  #2. imports pollutant removal rates into pollutant_removal_rates table
  #3. creates a record in the base_bmps table using (1) and (2)
  #4. feasibility tests
importSpecial.importBaseBMPsCSV('Input_Files\\bmp_lego_piece.csv') 

#IMPORT BASIC FACILITY CHARS:
    #!!!!IMPORTANT!!!! This import must occur before other facility specific data is imported!
print ('\nImporting facility characteristics:')
importCSV.importCSV('Input_Files\\facility_chars.csv', unqTests)

#IMPORT PBP Appendix A1 data
print ('\nImporting PBP Appendix A1 data:')
importCSV.importCSV('Input_Files\\pbp_appxa1.csv', unqTests)

#IMPORT FACILITY RAINFALL EXTRACTED FROM http://rainfall.geography.hawaii.edu/downloads.html
print ('\nImporting Facility Rainfall Data:')
importCSV.importCSV('Input_Files\\FacilityRainfallData.csv', unqTests)

#IMPORT EFFLUENT LIMITS EXISTANCE FOR FACILITY TYPES: (either by Priority Based Plan, Table 3 or as City operational assignment)
#IF CSV HEADRS SETUP CORRECTLY, THEN THIS INSERTS NEL EXISTANCE DATA (0 OR 1) TO WRS_POLLUTANT TABLE 
#AND USES THE FACILITY_TYPE_HAS_NEL TO ASSOCIATE RECORD WITH FACILITY TYPE
print ('\nImporting Facility Type Has Effluent Limits:') #import into wrs_pollutant_risks table
importCSV.importCSV('Input_Files\\nel_exists_facility_types.csv', unqTests)

#IMPORT NEL CLASSIFICATION DATA (from PBP Appendix L)
print ('\nImporting NEL Classes')
importCSV.importCSV('Input_Files\\nel_pbp_appxl.csv', unqTests)

#IMPORT FACILITY RISKS:
print ('\nImporting Facility Risks')
#for future implementation:
    #The current process inserts fac risk and update existing_fac_char_id in Facility_chars table. this process thus creates
#dead records. a more sophisticated approach using sophisticated lambda function in unqTests would fix this
importCSV.importCSV('Input_Files\\facility_risks.csv', unqTests)

# #IMPORT FACILITY SAMPLING DATA
 #!!!IMPORTANT!!!! For now, we make none detects = 0 BUT this must be changed to detection limit, per DOH guidance.
print ('\nImporting Facilty Sampling data:')
importCSV.importCSV('Input_Files\\sample_data.csv', unqTests)


# for now, since we're developing, delete out all except 1st 2 facilities.
# n = 5
# session.query(ExPollConcs).filter(ExPollConcs.facility_id >n).delete(synchronize_session = False) #http://docs.sqlalchemy.org/en/latest/orm/query.html#sqlalchemy.orm.query.Query.delete
# session.query(Facility_Chars).filter(Facility_Chars.id >n).delete(synchronize_session = False) #http://docs.sqlalchemy.org/en/latest/orm/query.html#sqlalchemy.orm.query.Query.delete
# session.commit #we chose not to sync session so need to commit before proceeding to requery or else you may get unpredictable resutls

session.commit()
winsound.Beep(250,1000)

Reading csv for import to Feasibility Questions

Reading csv record: Feas-1

Reading csv record: Feas-2

Reading csv record: Feas-3

Reading csv record: Feas-4

Reading csv record: Feas-5

Reading csv record: Feas-6

Reading csv record: Feas-7

Reading csv record: Feas-8

Reading csv record: Feas-9

Reading csv record: Feas-10

Reading csv record: Feas-11

Reading csv record: Feas-12

Reading csv record: Feas-13

Reading csv record: Feas-14

Reading csv record: Feas-15

Reading csv record: Feas-16

Reading csv record: Feas-17

Reading csv record: Feas-18

Reading csv record: Feas-19

Reading csv record: Feas-20

Reading csv record: Feas-21
Reading csv for import to base bmp tables

Reading csv record: Hydrodynamic Separation
Reading pollutant removal rate info...
Linking feasibility tests w/ base bmp: 1
Removed:  0  old feasibility test defs for the base bmp
Added feasibility test def as record:  1
Added feasibility test def as record:  2

Reading csv record: Enhanced Media Filtration 

# Existing Sampling Data
Talk about it...

Global variables related to existing sampling data include:  
 - 
 - 

Defined several functions that will be used by BMP Option Evaluation. These include:  
 - 
 - 

In [6]:
'''
#############################################################################################################
#              ASSIGN CONCENTRATION DATA FOR FACILITIES WITHOUT SAMPLING RESULTS:
#                      assignment made into database table: ExPollConcs 
#############################################################################################################
Enter estimated pollutant concentrations into database's existing pollutant concentration table for facilities without 
actual sampling data. Use 1 of 2 methods:

Method 1 (sim_MaxType): Use maximum concentration value sampled for period 2013-2017
          This method is for Permit Table 1 facilities only
          Method assumes we have already entered sampling data for into the database's existing pollutant concentration table

Method 2 (sim_EMC): Use data from an EMC study.
          This method is for facilities that are not on Permit Table 1
'''

def WriteSampleDat_simMaxType(pollLS):
    #assign maximum sampled values to Table 1 facilities that have not yet been sampled
    #delete all pollutant concentration table records that are not from infield sampling.
    #     To be sure we're starting fresh, let's remove any records in ExPollConcs that:
    #     1. Were not obtained directly from field samples (i.e. sample_method != 'infield)
    #     2. Were obtained from field samples, but are not Table 1 facilities (i.e. we shouldn't be looking at their  sample results)
    session.query(ExPollConcs).filter(ExPollConcs.sample_method != 'infield').delete(synchronize_session = False)
    #delete all pollutant concentration table records that are not for Table 1 facilities
    #for some reason bulk delete's not working. so let's use a loop to work around it.
    for rec in session.query(ExPollConcs.id).filter(ExPollConcs.facility_id == Facility_Chars.id).filter(Facility_Chars.Permit_Table != 'Table 1'):
        session.query(ExPollConcs).filter(ExPollConcs.id == rec[0]).delete(synchronize_session = False)

    #make a dataframe called pd_Concs to hold existing pollutant concentrations that were sampled in the field (the 'infield' sampling method)'''
    q = session.query(ExPollConcs).filter(ExPollConcs.sample_method == 'infield')
    pd_Concs = pd.read_sql(q.statement,session.bind)         

    #build pd_infieldExtreama by making a dictionary of maximum sample results for each constiuent
    dict_extrema = {'c_' + Constituent: pd_Concs.loc[:,'c_' + Constituent].max() for Constituent in pollLS}
    dict_extrema['c_phmin'] = pd_Concs.loc[:,'c_phmin'].min() #phMin is exception to above. we want min. phMin value
    #use dictionary to build pd_infieldExtrema dataframe
    pd_infieldExtrema = pd.DataFrame([dict_extrema])
    #     display(pd_infieldExtrema)

    #now build query that identifies all Table 1 facilities that are not in ExPollConcs
    subq = session.query(ExPollConcs.facility_id.distinct()).order_by(ExPollConcs.facility_id).all()
    ls_sq = [i[0] for i in subq if i[0] is not None] #list comprehension to produce list of all facility_id in ExPollConcs table
    #get list of Table 1 facilities not in ExPollConcs:
    tpl_q = session.query(Facility_Chars.id).filter(Facility_Chars.Permit_Table == 'Table 1').filter(Facility_Chars.id.notin_(ls_sq)).all()
    ls_FacIDs = [i[0] for i in tpl_q] #write query tuple to list    
    #make a list of Table 1 facs not in ExPollConcs (a list of dicts). also include extrema conc. values.  
    ls_dict_pd = [{**{'facility_id': FacID, 'sample_method': 'sim_MaxType', 'sample_date':'12/31/2016'}, **dict_extrema} for FacID in ls_FacIDs]
    #write list to database:
    ExPollConcs_meta = Base.metadata.tables['existing_pollutant_concentrations']
    ExPollConcs_id_meta = ExPollConcs_meta.c['id']
    for dict_temp in ls_dict_pd:
        SQLA_main.insertRec(ExPollConcs_meta,dict_temp)
    session.commit()
    #for future implementation: write dict -> dataframe -> db(using sqla):
        # pd_temp.to_sql('existing_pollutant_concentrations', engine, if_exists='append', index = False)
        #http://docs.sqlalchemy.org/en/latest/faq/performance.html#i-m-inserting-400-000-rows-with-the-orm-and-it-s-really-slow
        #https://stackoverflow.com/questions/31997859/bulk-insert-a-pandas-dataframe-using-sqlalchemy

WriteSampleDat_simMaxType(pollLS) #call function defined above   

In [7]:
#############################################################################################################
#                                 Write all sampling data from database to pd_ExConcs
#                                       (DEFINE GLOBAL VARIABLE: pd_ExConcs)
#############################################################################################################    
#get all existing sampling data.
q = session.query(ExPollConcs.facility_id.label('Facility_ID'), ExPollConcs.sample_date, 
        ExPollConcs.c_tss,
        ExPollConcs.c_turbidity,
        ExPollConcs.c_p,
        ExPollConcs.c_n,
        ExPollConcs.c_nn,
        ExPollConcs.c_an,
        ExPollConcs.c_og,
        ExPollConcs.c_cu,
        ExPollConcs.c_zn,
        ExPollConcs.c_fe,
        ExPollConcs.c_phmin,
        ExPollConcs.c_phmax  
         ).order_by(ExPollConcs.facility_id) #.filter(ExPollConcs.facility_id == FacID)
pd_ExConcs = pd.read_sql(q.statement,session.bind) 
#tidy up the sampling data
from datetime import datetime
pd_ExConcs['sample_date'] = pd.to_datetime(pd_ExConcs['sample_date'], format="%m/%d/%Y")
#Write to a dataframe called pd_ExConcs
pd_ExConcs = pd_ExConcs.applymap(lambda x: float('nan') if x is None else x) #assign NaN values to any None element 
print ('a few pieces of data:')
pd_ExConcs

a few pieces of data:


Unnamed: 0,Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,c_zn,c_fe,c_phmin,c_phmax
0,1,2017-04-19,122.0,,,2.430,,,,,,,7.00,7.00
1,1,2017-02-11,59.0,13.00,0.097,0.580,,,,,,,8.30,8.30
2,1,2016-12-04,80.0,71.20,0.300,0.910,0.120,0.141,0.0,,,,8.20,8.20
3,1,2016-06-17,83.0,81.40,0.250,0.940,0.200,0.060,0.0,,,,6.92,6.92
4,1,2015-02-20,33.5,17.50,0.176,1.830,0.190,1.090,5.7,,,,8.54,8.54
5,1,2014-04-13,14.0,8.50,0.244,2.037,0.247,0.556,4.7,,,,6.64,6.64
6,1,2013-03-09,163.0,24.40,0.155,1.199,0.239,0.073,5.0,,,,8.09,8.09
7,2,2017-04-20,,13.00,,0.780,,,,,,,,
8,2,2017-01-21,0.0,31.00,0.100,1.580,0.000,0.418,0.0,,,,7.20,7.20
9,2,2016-05-05,7.0,4.90,0.066,2.672,0.212,0.416,0.0,,,,6.83,6.83


In [8]:
#############################################################################################################
#                                 ESTIMATE Numeric Effluent Limits
#                          (DEFINE GLOBAL VARIABLES: pd_FacsNELs_Wet & pd_FacsNELs_Dry)
#############################################################################################################    
'''
Estimate the Numeric Effluent Limits (NELs) for each facility.
Return wet and dry season NELs in 2 separate dataframes:
    pd_FacsNELs_Wet & pd_FacsNELs_Dry
Estimate NELs using the EffLim module's GetNELs function call.
 The GetNELs function call will differentiate between wet and dry season limits
 (if limits are the same between wet & dry season, then the same limit will be placed into the wet and dry
  dataframes.)
 The GetNEls function calculates a pollutant constituent NEL using this formula:
    NEL = fTypeHas_NEL * SampleClass_NEL
    Where:
      fTypeHas_NEL is a [0,1] value from PBP Table 3, based on facility type (stored in SQLA_DB_facility_type_has_nel)
      SampleClass_NEL is pollutant concentration based on facility's sample class, based on PBP Appendix L
'''
pd_FacsNELs_Wet, pd_FacsNELs_Dry = pd.DataFrame(),  pd.DataFrame() #initialize wet and dry season nel dataframes 
for recFac in session.query(Facility_Chars): #do the following for each facility:
    wet,dry = EffLim.GetNELs(recFac,False) #Get Wed & Dry NELs by calculating: NEL = fTypeHas_NEL * SampleClass_NEL
#     if wet is not None:
    pd_FacsNELs_Wet = pd.concat([pd_FacsNELs_Wet, wet]) #write wet NELs to pd_FacsNELs_Wet
#     if dry is not None:
    pd_FacsNELs_Dry = pd.concat([pd_FacsNELs_Dry, dry]) #write dry NELs to pd_FacsNELs_Dry

print('Wet NELs:')
display(pd_FacsNELs_Wet)
print('Dry NELs:')
display(pd_FacsNELs_Dry)

Wet NELs:


Unnamed: 0_level_0,nel_tss,nel_turbidity,nel_p,nel_n,nel_nn,nel_an,nel_og,nel_cu,nel_zn,nel_fe,nel_phmin,nel_phmax
Facility_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,50.0,15.00,0.10,0.52,0.180,,15.0,,,,5.5,8.0
2,50.0,15.00,0.10,0.52,0.180,,15.0,,,,5.5,8.0
3,,,,,,,15.0,,,,5.5,8.0
4,50.0,15.00,0.10,0.52,0.180,,15.0,,,,5.5,8.0
5,50.0,15.00,0.10,0.52,0.180,,15.0,,,,5.5,8.0
6,50.0,15.00,0.10,0.52,0.180,,15.0,,,,5.5,8.0
7,50.0,15.00,0.10,0.52,0.180,,15.0,,,,5.5,8.0
8,,,,,,,15.0,,,,5.5,8.0
9,,0.50,0.03,0.18,0.010,0.0050,15.0,,,,7.6,8.6
10,50.0,15.00,0.10,0.52,0.180,,15.0,,,,5.5,8.0


Dry NELs:


Unnamed: 0_level_0,nel_tss,nel_turbidity,nel_p,nel_n,nel_nn,nel_an,nel_og,nel_cu,nel_zn,nel_fe,nel_phmin,nel_phmax
Facility_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,30.0,5.50,0.06,0.38,0.090,,15.0,,,,5.5,8.0
2,30.0,5.50,0.06,0.38,0.090,,15.0,,,,5.5,8.0
3,,,,,,,15.0,,,,5.5,8.0
4,30.0,5.50,0.06,0.38,0.090,,15.0,,,,5.5,8.0
5,30.0,5.50,0.06,0.38,0.090,,15.0,,,,5.5,8.0
6,30.0,5.50,0.06,0.38,0.090,,15.0,,,,5.5,8.0
7,30.0,5.50,0.06,0.38,0.090,,15.0,,,,5.5,8.0
8,,,,,,,15.0,,,,5.5,8.0
9,,0.50,0.03,0.18,0.010,0.0050,15.0,,,,7.6,8.6
10,30.0,5.50,0.06,0.38,0.090,,15.0,,,,5.5,8.0


In [64]:
#############################################################################################################
#                               Estimate Exceedances of Faclility Effluent Limits
#                      
#############################################################################################################     
def CalcExceedances0(pd_Concs, pollLS, pd_FacsNELs_Wet, pd_FacsNELs_Dry):
    '''    
    #for each facility in database, calculate exceedance for each pollutant constituent in pollLS list
    #do the Exceedance Calculation = max(0,(Constituent Concentration - NEL))
    # if no exceedance, then report 0. report NaN sample result is NaN
    #INPUT:
        pd_Concs: dataframe of concentrations [Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,c_zn,c_fe,c_phmin,c_phmax]
        pollLS: list of pollutant constituents we want to analyze (constituent list needs to match those in pd_Concs and FacsNELs dataframes)
        pd_FacsNELs_Wet & pdFacsNELs_Dry: dataframes holding numerical effluent limits for each pollutant
    #Return dataframe [Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,c_zn,c_fe,c_phmin,c_phmax]
    '''
    start_time = time.time()
    pd_FacExceedances = pd.DataFrame() #make an empty dataframe.  we will append to it.
    for Facs in session.query(Facility_Chars.id).order_by(Facility_Chars.id):
        FacID = Facs[0]
        if (FacID in pd_Concs.Facility_ID.values) and (FacID in pd_FacsNELs_Dry.index) and (FacID in pd_FacsNELs_Wet.index):
            pd_temp = pd_Concs.loc[pd_Concs['Facility_ID'] == FacID] #slice facility id rows into a temp dataframe
            pd_temp.is_copy = False #acknowledge that pd_temp is NOT a copy of pd_ExConcs. but intended to be treated as new dataframe
            for Constituent in pollLS:
                pd_temp['c_' + Constituent]  = pd_Concs.apply(lambda row: #for each row in pd_ExConcs:
                         EffLim.ExceedanceCalc(row, Constituent, FacID, pd_FacsNELs_Wet, pd_FacsNELs_Dry, True), axis = 1)
            pd_FacExceedances = pd.concat([pd_FacExceedances,pd_temp])
    print ('--- %s execution time in seconds ---' % (time.time() - start_time))            
    return (pd_FacExceedances)

def CalcExceedances(pd_Concs, pollLS, pd_FacsNELs_Wet, pd_FacsNELs_Dry):
    '''    
    #for each facility in database, calculate exceedance for each pollutant constituent in pollLS list
    #do the Exceedance Calculation = max(0,(Constituent Concentration - NEL))
    # if no exceedance, then report 0. report NaN sample result is NaN
    #INPUT:
        pd_Concs: dataframe of concentrations [Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,c_zn,c_fe,c_phmin,c_phmax]
        pollLS: list of pollutant constituents we want to analyze (constituent list needs to match those in pd_Concs and FacsNELs dataframes)
        pd_FacsNELs_Wet & pdFacsNELs_Dry: dataframes holding numerical effluent limits for each pollutant
    #Return dataframe [Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,c_zn,c_fe,c_phmin,c_phmax]
    '''

    pd_FacExceedances = pd_Concs.copy(deep = True)
    #add NEL columns
    for Constituent in pollLS: #for loop method of adding columns uses same dataframe (a deep copy is not made)
        pd_FacExceedances['nel_' + Constituent] = 0
    #add exceedance columns
    for Constituent in pollLS: #for loop method of adding columns uses same dataframe (a deep copy is not made)
        pd_FacExceedances['exc_' + Constituent] = 0        
    ls_NELNames = list(pd_FacsNELs_Dry.columns) #get list of NEL column names, in order that they appear in the wet/dry pd_FacsNELs 
    ls_v = [1 for a in ls_NELNames]
    
    #write nels for each sample
    for Constituent in pollLS:
        pd_FacExceedances['nel_' + Constituent] = pd_FacExceedances.apply(lambda row: 
                          EffLim.Get_pd_NEL_WetOrDry(row['sample_date'], pd_FacsNELs_Wet, pd_FacsNELs_Dry)['nel_'+Constituent], axis = 1)

    #     calculate exceedances:
    for Constituent in pollLS:
        pd_FacExceedances['exc_' + Constituent] = pd_FacExceedances.apply(lambda row: #for each row in pd_ExConcs:
                         EffLim.ExceedanceCalc(row, Constituent, row['Facility_ID'], pd_FacsNELs_Wet, pd_FacsNELs_Dry, True), axis = 1)
        
#     for Constituent in pollLS:
#         pd_FacExceedances['exc_' + Constituent] = pd_FacExceedances.apply(lambda row:
#                         max(0,row['c_' + Constituent] - row['nel_' + Constituent]) if Constituent != 'phmin'
#                         else max(0,row['nel_' + Constituent] - row['c_' + Constituent])
#                                                                           , axis=1)
#     for Constituent in pollLS:
#         pd_FacExceedances['exc_' + Constituent] = pd_FacExceedances['c_' + Constituent] - pd_FacExceedances['nel_' + Constituent]        

    return pd_FacExceedances


#############################################################################################################
#                          
#                                (DEFINE GLOBAL VARIABLE: pd_exFacExceedances)
############################################################################################################# 
# start_time = time.time()
pd_exFacExceedances = CalcExceedances (pd_ExConcs, pollLS, pd_FacsNELs_Wet, pd_FacsNELs_Dry)
# display(pd_exFacExceedances)
# pd_exFacExceedances = CalcExceedances0 (pd_ExConcs, pollLS, pd_FacsNELs_Wet, pd_FacsNELs_Dry)
print('Concentrations in excess of wet/dry season NELs')
#     print ('--- %s execution time in seconds ---' % (time.time() - start_time))
display(pd_exFacExceedances)

Concentrations in excess of wet/dry season NELs


Unnamed: 0,Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,...,exc_p,exc_n,exc_nn,exc_an,exc_og,exc_cu,exc_zn,exc_fe,exc_phmin,exc_phmax
0,1,2017-04-19,122.0,,,2.430,,,,,...,,1.910,,,,,,,0.00,0.00
1,1,2017-02-11,59.0,13.00,0.097,0.580,,,,,...,0.000,0.060,,,,,,,0.00,0.30
2,1,2016-12-04,80.0,71.20,0.300,0.910,0.120,0.141,0.0,,...,0.200,0.390,0.000,,0.0,,,,0.00,0.20
3,1,2016-06-17,83.0,81.40,0.250,0.940,0.200,0.060,0.0,,...,0.190,0.560,0.110,,0.0,,,,0.00,0.00
4,1,2015-02-20,33.5,17.50,0.176,1.830,0.190,1.090,5.7,,...,0.076,1.310,0.010,,0.0,,,,0.00,0.54
5,1,2014-04-13,14.0,8.50,0.244,2.037,0.247,0.556,4.7,,...,0.144,1.517,0.067,,0.0,,,,0.00,0.00
6,1,2013-03-09,163.0,24.40,0.155,1.199,0.239,0.073,5.0,,...,0.055,0.679,0.059,,0.0,,,,0.00,0.09
7,2,2017-04-20,,13.00,,0.780,,,,,...,,0.260,,,,,,,,
8,2,2017-01-21,0.0,31.00,0.100,1.580,0.000,0.418,0.0,,...,0.000,1.060,0.000,,0.0,,,,0.00,0.00
9,2,2016-05-05,7.0,4.90,0.066,2.672,0.212,0.416,0.0,,...,0.006,2.292,0.122,,0.0,,,,0.00,0.00


In [82]:
#############################################################################################################
#       CALCULATE EXISTING AGE FACTOR WEIGHTED AVERAGE FACILITY EXCEEDANCE VALUES FOR EACH CONSTITUENT:
#       
#############################################################################################################   
'''
Age factor acknowledges fact that more recent samples are a better representation of facility pollutant discharge 
(i.e. sampling data) and housekeeping-operations (i.e. inspections) realities. But, historic data as a whole also tells part 
of story (i.e. we want to dampen whipsaw effects that may occur if we only considered most recent data).

AF = exp(-SampleRank)
SampleRank = Newest sample = 1
              Second Newest sample = 2
              ...
              nth Newest Sample = n (out of n samples)
'''
def Calc_AFWtdExceedances0(pd_FacExceedances, pollLS, ShowCalculations):
    '''
    CALCULATE AGE FACTOR WEIGHTED AVERAGE FOR EACH CONSTITUENT:

    Age factor acknowledges fact that more recent samples are a better representation of facility pollutant discharge
    (i.e. sampling data) and housekeeping-operations (i.e. inspections) realities. But, historic data as a whole also tells part
    of story (i.e. we want to dampen whipsaw effects that may occur if we only considered most recent data).

    AF = exp(-SampleRank)
    SampleRank = Newest sample = 1
                  Second Newest sample = 2
                  ...
                  nth Newest Sample = n (out of n samples)

    INPUTS:
        pd_FacExceedances: dataframe holding exceedances
            FORMAT: ExPollConc.id, Facility_ID, Sample_Date, exceedance concentrations
        pollLS: list of polluant constituents that can be found in the dataframe's exceedance concentrations
        ShowCalculations: True if you want output of calculation summary. false if not

    RETURN:
        DataFrame of age factor weighted averages.
        FORMAT: Facility_ID, AFwtd_c_conc...
    '''
def _HELPER_SampleRank(ser_exc):
    #return a numeric value for the series of dates
    return int (str(ser_exc[0])[:10].replace('-',''))


def AFWFacExceedances(pd_FacExceedances, pollLS):
    '''
    CALCULATE AGE FACTOR WEIGHTED AVERAGE FOR EACH CONSTITUENT:

    Age factor acknowledges fact that more recent samples are a better representation of facility pollutant discharge
    (i.e. sampling data) and housekeeping-operations (i.e. inspections) realities. But, historic data as a whole also tells part
    of story (i.e. we want to dampen whipsaw effects that may occur if we only considered most recent data).

    AF = exp(-SampleRank)
    SampleRank = Newest sample = 1
                  Second Newest sample = 2
                  ...
                  nth Newest Sample = n (out of n samples)

    INPUTS:
        pd_FacExceedances: dataframe holding exceedances
            FORMAT: ExPollConc.id, Facility_ID, Sample_Date, exceedance concentrations
        pollLS: list of polluant constituents that can be found in the dataframe's exceedance concentrations
        ShowCalculations: True if you want output of calculation summary. false if not

    RETURN:
        DataFrame of age factor weighted averages.
        FORMAT: Facility_ID, AFwtd_c_conc...
    '''
    #calculate age factor weighted averages for each constituent in pollLS FOR each facility IN DATABASE.
    #write these averages into a dataframe called pd_AFWFacExceedances [Facility_ID,sample_date,c_tss,c_turbidity,c_p,c_n,c_nn,c_an,c_og,c_cu,c_zn,c_fe,c_phmin,c_phmax]
    pd_AFWFacExceedances = pd.DataFrame() #make an empty dataframe.  we will append to it.
    #group by facility, then by sample date, then for each facility-sample data pair, use max constituent concentration, 
    #then sort each facility by sample date w/ newest sample first.   
    pd_FacExceedances = pd_FacExceedances.groupby('Facility_ID').apply(lambda x: x.groupby('sample_date').agg(np.max).sort_index(ascending=False))
    #cleanup indexes
    pd_FacExceedances.drop('Facility_ID', axis = 1, inplace = True) #remove duplicate FAcility_ID column
    pd_FacExceedances.reset_index(1, inplace = True) #remove date groupby
    #insert blank columns:
    for Constituent in pollLS:
        #fill out SR helper column
        pd_FacExceedances['c_' + Constituent + '_HelpSR'] = pd_FacExceedances.apply(
            lambda row: _HELPER_SampleRank(row) if not (math.isnan(row['exc_'+Constituent])) else np.nan, axis = 1)
        #rank sample dates for each constituent of each facility
        pd_FacExceedances['c_' + Constituent + '_SR'] = pd_FacExceedances.groupby(
            ['Facility_ID'])['c_' + Constituent + '_HelpSR'].rank(ascending = False)-1
        #CALC AGE FACTOR
        pd_FacExceedances['c_' + Constituent + '_AF'] = pd_FacExceedances.apply(
            lambda row: math.exp(-row['c_' + Constituent + '_SR']), axis = 1)

        #CALC AGE FACTOR WTD CONCENTRATION
        pd_FacExceedances['c_' + Constituent + '_AF*c'] = pd_FacExceedances.apply(
            lambda row: row['c_' + Constituent + '_AF'] * row['exc_' + Constituent], axis =1 )

    #sum AF and AF*c columns (just do all the columns in pd_FacExceedances for now. make more efficient if need to)
    pd_sums = pd_FacExceedances.groupby(['Facility_ID']).sum() 
    #setup pd_AFWExceedances to include summed data
    pd_AFWFacExceedances = pd_sums.loc[:,['c_' + Constituent + '_AF'] + ['c_' + Constituent + '_AF*c']]
    #and do wtd average:
    for Constituent in pollLS:
        pd_AFWFacExceedances['c_' + Constituent + '_AFWtd'] =  pd_sums['c_' + Constituent + '_AF*c']/pd_sums['c_' + Constituent + '_AF']
    pd_AFWFacExceedances.reset_index(inplace = True)
    return pd_AFWFacExceedances

#############################################################################################################
#                        calculate age factor exceedances of existing samples in pd_exFacExceedances
#                                    DEFINE GLOBAL VARIABLE: pd_exAFWFacExceedances
############################################################################################################# 
start_time = time.time()
pd_exAFWFacExceedances = AFWFacExceedances(pd_exFacExceedances, pollLS)
print ('Age Factor Weighted Averages:')
display(pd_exAFWFacExceedances)
print ('--- %s execution time in seconds ---' % (time.time() - start_time))

Age Factor Weighted Averages:


Unnamed: 0,Facility_ID,c_phmax_AF,c_phmax_AF*c,c_tss_AFWtd,c_turbidity_AFWtd,c_p_AFWtd,c_n_AFWtd,c_nn_AFWtd,c_an_AFWtd,c_og_AFWtd,c_cu_AFWtd,c_zn_AFWtd,c_fe_AFWtd,c_phmin_AFWtd,c_phmax_AFWtd
0,1,1.580534,0.147544,52.064543,19.729711,0.067223,1.296165,0.029425,,0.0,,,,0.0,0.093351
1,2,1.553002,0.0,0.961758,3.745947,0.004876,0.637805,0.031165,,0.0,,,,0.0,0.0
2,3,1.553002,0.18394,,,,,,,0.0,,,,0.0,0.118441
3,4,1.0,0.0,197.0,0.0,0.655,0.829,0.0,,0.0,,,,0.0,0.0
4,5,1.503215,0.0,180.960566,199.986459,1.752511,17.437221,0.004532,,1.421297,,,,0.0,0.0
5,6,1.0,0.7,1960.0,4166.0,10.2,26.73,3.5,,44.0,,,,0.0,0.7
6,7,1.0,0.0,0.0,12.8,0.0,0.323,0.043,,0.0,,,,0.0,0.0
7,8,1.0,0.07,,,,,,,0.0,,,,0.0,0.07
8,9,1.0,0.1,,4180.5,10.27,27.07,3.67,10.495,44.0,,,,1.59,0.1
9,10,1.0,0.7,1960.0,4166.0,10.2,26.73,3.5,,44.0,,,,0.0,0.7


--- 0.8035717010498047 execution time in seconds ---


In [11]:
#############################################################################################################
#                       Estimate Facility Runoff Volumes
#                       DEFINE GLOBAL VARIABLE: pd_RunoffVols
#############################################################################################################   
#get facility imperviousness and area. order by Facility_ID so it's given in same order as monthly rain data dataframe
q_facDat = session.query(Facility_Chars.id.label('Facility_ID'), 
                         Facility_Chars.Indus_Area, 
                         Facility_Chars.Imperv.label('Imperv')).order_by('Facility_ID')
pd_facDat = pd.read_sql(q_facDat.statement,session.bind)

#get monthly rain data for each facility. order by facility_id so order matches facility data dataframe
q_rain = session.query(Facility_Chars.id.label('Facility_ID'), Facility_Monthly_Rain).filter(
    Facility_Chars.facility_monthly_rain_id == Facility_Monthly_Rain.id).order_by('Facility_ID')
pd_rainDat = pd.read_sql(q_rain.statement,session.bind)

#create a new dataframe to hold rain volumes
pd_RunoffVols = pd_facDat.loc[:,['Facility_ID']] #put facilities into the new dataframe
#now calculate volumes for each month:
for mo in range(1,13):
    pd_RunoffVols[calendar.month_name[mo]] = pd.DataFrame(pd_facDat['Indus_Area'] * pd_facDat['Imperv'] * pd_rainDat[calendar.month_name[mo]]/12)
#add monthlys together to get annual volume
pd_RunoffVols['Annual_Volume'] = pd_RunoffVols[[calendar.month_name[mo] for mo in range (1,13)]].sum(axis = 1)
display(pd_RunoffVols)

Unnamed: 0,Facility_ID,January,February,March,April,May,June,July,August,September,October,November,December,Annual_Volume
0,1,295325.500000,335399.166667,352822.500000,250024.833333,157681.166667,140257.833333,143742.500000,138515.500000,170748.666667,259607.666667,334528.000000,392025.000000,2.970678e+06
1,2,338916.586546,246040.406579,241152.186581,101837.916631,131167.236620,96134.993299,96949.696632,78211.519972,105096.729963,263149.176573,351951.839875,338101.883213,2.388710e+06
2,3,77993.831554,75165.810944,79184.577074,53434.705206,42122.622767,29322.108428,40038.818107,30810.540327,33638.560937,61918.767035,86924.422953,84691.775103,6.952465e+05
3,4,21601.669993,18133.109161,16552.499161,9439.754164,7771.332498,6454.157498,7376.179998,7595.709164,9878.812497,13698.619996,17298.898328,18177.014994,1.539778e+05
4,5,50545.681653,57766.493317,56824.648318,36261.032490,25272.840826,21191.512494,21348.486661,23232.176660,26528.634159,43795.792488,56039.777485,65615.201649,4.844223e+05
5,6,15974.190828,18427.906661,18127.451661,11667.669163,7911.981664,6810.313331,6910.464998,7311.071664,8462.815831,13871.005829,18027.299994,20831.546660,1.543337e+05
6,7,10058.130000,7407.150000,7069.280000,2858.900000,3742.560000,2521.030000,2754.940000,2365.090000,3066.820000,7407.150000,10447.980000,9928.180000,6.962721e+04
7,8,31502.679978,29712.754979,32397.642477,21658.092485,17183.279988,11873.169158,16168.989155,12529.474991,13603.429990,24999.285815,35142.194142,34426.224142,2.811972e+05
8,9,9823.037518,8034.365015,6568.240012,3782.602507,2140.542504,967.642502,1554.092503,3548.022506,3108.185006,6978.755013,7565.205014,9529.812517,6.360050e+04
9,10,7359.825004,4947.100003,3973.900002,2108.600001,1581.450001,648.800000,811.000000,3000.700002,3467.025002,4460.500002,5170.125003,5920.300003,4.344933e+04


In [83]:
#############################################################################################################
#                       Calculate raw pollutant exceedance potential scores (PEP_raw)
#                         PEP_raw = AFWtd Exceedance * Annual Runoff Volume (cu. ft)
############################################################################################################   
def _HELPER_calc_PEP_raw(row, Constituent, pd_RunoffVols):
    #HELPER function to calculate PEP_raw
    AnnRunoffVol = pd_RunoffVols.loc[pd_RunoffVols['Facility_ID']==row.loc['Facility_ID'],'Annual_Volume'].values[0]
    AFWFacExceedVal = row.loc['c_' + Constituent + '_AFWtd']
    return  AFWFacExceedVal * AnnRunoffVol
def CalcPEP_Raw(pd_AFWFacExceedances,pollLS, pd_RunoffVols):
    #use age factor weighted scores to calculate raw PEP scores for each constituent pollutant
    #input: 
        #pd_AFWFacExceedances: [Facility_ID	AFWtd_c_tss	AFWtd_c_turbidity	AFWtd_c_p	AFWtd_c_n	AFWtd_c_nn	AFWtd_c_an	AFWtd_c_og	AFWtd_c_cu	AFWtd_c_zn	AFWtd_c_fe	AFWtd_c_phmin	AFWtd_c_phmax]
        #pollLS: pollutant constituent list
        #pd_RunoffVols: RUNOFF VOLUMES [Facility_ID	January	February	March	April	May	June	July	August	September	October	November	December	Annual_Volume] 
    #output: pd_PEP_raw[	Facility_ID	PEP_raw_tss	PEP_raw_turbidity	PEP_raw_p	PEP_raw_n	PEP_raw_nn	PEP_raw_an	PEP_raw_og	PEP_raw_cu	PEP_raw_zn	PEP_raw_fe	PEP_raw_phmin	PEP_raw_phmax]

    #initialize pd_PEP_raw dataframe w/ Facility_IDs from pd_AFWFacExceedances
    pd_PEP_raw = pd_AFWFacExceedances.loc[:,['Facility_ID']]
#     display(pd_PEP_raw)
#     pd_PEP_raw.reset_index(drop=True)
    #for each facility in pd_exPEP_raw, calculate PEP_Raw SCORE for each pollutant constituent in the pollLS LIST:
    for Constituent in pollLS:
        pd_PEP_raw['PEP_raw_' + Constituent] = pd_AFWFacExceedances.apply(lambda row: 
                                               _HELPER_calc_PEP_raw(row,Constituent, pd_RunoffVols), axis = 1)    
    return pd_PEP_raw

#############################################################################################################
#                              calculate existing PEP_raw scores
#                              DEFINE GLOBAL VARIABLE: pd_exPEP_raw
#############################################################################################################  

pd_exPEP_raw = CalcPEP_Raw(pd_exAFWFacExceedances,pollLS,pd_RunoffVols)
display(pd_exPEP_raw)

Unnamed: 0,Facility_ID,PEP_raw_tss,PEP_raw_turbidity,PEP_raw_p,PEP_raw_n,PEP_raw_nn,PEP_raw_an,PEP_raw_og,PEP_raw_cu,PEP_raw_zn,PEP_raw_fe,PEP_raw_phmin,PEP_raw_phmax
0,1,154667000.0,58610630.0,199697.6,3850488.0,87413.03,,0.0,,,,0.0,277315.755592
1,2,2297361.0,8947981.0,11648.28,1523531.0,74445.24,,0.0,,,,0.0,0.0
2,3,,,,,,,0.0,,,,0.0,82345.979883
3,4,30333620.0,0.0,100855.4,127647.6,0.0,,0.0,,,,0.0,0.0
4,5,87661330.0,96877900.0,848955.5,8446978.0,2195.162,,688507.9,,,,0.0,0.0
5,6,302494100.0,642954300.0,1574204.0,4125340.0,540168.0,,6790684.0,,,,0.0,108033.602799
6,7,0.0,891228.3,0.0,22489.59,2993.97,,0.0,,,,0.0,0.0
7,8,,,,,,,0.0,,,,0.0,19683.805211
8,9,,265881900.0,653177.2,1721666.0,233413.8,667487.3,2798422.0,,,,101124.799158,6360.050261
9,10,85160680.0,181009900.0,443183.1,1161400.0,152072.6,,1911770.0,,,,0.0,30414.527517


In [84]:
#############################################################################################################
#                       Calculate normalized pollutant exceedance potential scores (PEP_norm)
#                         PEP_Norm = (PEP_raw - PEPmin) / (PEPMax - PEPmin)
############################################################################################################   
'''
NORMALIZE the raw Pollutant Exceedance Potential scores held in a pd_PEP_raw dataframe to a new dataframe called pd_PEP_norm.
Use calculation:
PEP_Norm = (PEP_raw - PEPmin) / (PEPMax - PEPmin)

Hold the PEPmax and PEPmin baseline scores used for the normalization in a dataframe called pd_NormBaselinePEP
****NOTE: LATER, we'll need to write the norm. basis to file
          This will allow us to use a common baseline in future (when we get more data, we'll want to have same baseline)         
'''
#############################################################################################################
#                           BUILD BASELINE dataframe pd_NormBaselinePEP
#                              DEFINE GLOBAL VARIABLE: pd_NormBaselinePEP
############################################################################################################  
##Use pd_exPEP_Ras data as our baseline max. Use 0 as min for all:
dict_NormBaselinePEP = {'PEP_Baseline_' + Constituent: [pd_exPEP_raw.loc[:,'PEP_raw_' + Constituent].max(),
                                             0]
                                             for Constituent in pollLS}
dict_NormBaselinePEP['MaxMin'] = ['Max','Min'] #add column identifying if row is max or min
pd_NormBaselinePEP = pd.DataFrame(dict_NormBaselinePEP) #write dict to new dataframe 
print ('This is the pd_NormBaselinePEP dataframe:')
display(pd_NormBaselinePEP)

#############################################################################################################
#                                        CALCULATE PEP_norm
#                         
############################################################################################################ 
def _HELPER_calc_PEP_norm(row, Constituent, pd_NormBaselinePEP):
    #HELPER function to calculate norm of PEP_raw for a given constituent
    #get max and min baseline values from pd_NormBAselinePEP
    BLmax= pd_NormBaselinePEP.loc[pd_NormBaselinePEP['MaxMin']=='Max', 'PEP_Baseline_' + Constituent].values[0] 
    BLmin= pd_NormBaselinePEP.loc[pd_NormBaselinePEP['MaxMin']=='Min', 'PEP_Baseline_' + Constituent].values[0]
    #i dont know how to catch div by zero errors using numpy :-/
    dv = row['PEP_raw_' + Constituent] - BLmin
    if dv != 0:
        v = (dv - BLmin)/(BLmax - BLmin)
    else:
        v = 0 #force to 0
    return v
def CalcPEP_norm(pd_PEP_raw,pollLS, pd_NormBaselinePEP):
    #calculate PEP_norm for each constituent pollutant of each facility in pd_PEP_raw
    #return pd_PEP_norm [	Facility_ID	PEP_norm_tss	PEP_norm_turbidity	PEP_norm_p	PEP_norm_n	PEP_norm_nn	PEP_norm_an	PEP_norm_og	PEP_norm_cu	PEP_norm_zn	PEP_norm_fe	PEP_norm_phmin	PEP_norm_phmax]
    
    #initialize pd_PEP_norm dataframe w/ Facility_IDs from pd_exPEP_raw
    pd_PEP_norm = pd_PEP_raw.loc[:,['Facility_ID']]
#     pd_PEP_norm.reset_index(drop=True,inplace=True)
    #loop through each constituent; calculating norms as we go:   
    for Constituent in pollLS:
            pd_PEP_norm['PEP_norm_' + Constituent] = pd_PEP_raw.apply(lambda row: 
                                               _HELPER_calc_PEP_norm(row,Constituent,pd_NormBaselinePEP), axis = 1)
    return pd_PEP_norm

#############################################################################################################
#                       Normalize existing raw pollutant exceedance potential scores 
#                         (DEFINE GLOBAL VARIABLE: pd_exPEP_norm)
############################################################################################################   
print('This is the pd_exPEP_norm dataframe:')
pd_exPEP_norm = CalcPEP_norm(pd_exPEP_raw,pollLS,pd_NormBaselinePEP)
display(pd_exPEP_norm)

#TO DO:  WRITE existing NORMALIZED PEP SCORES TO DB: 

This is the pd_NormBaselinePEP dataframe:


Unnamed: 0,MaxMin,PEP_Baseline_an,PEP_Baseline_cu,PEP_Baseline_fe,PEP_Baseline_n,PEP_Baseline_nn,PEP_Baseline_og,PEP_Baseline_p,PEP_Baseline_phmax,PEP_Baseline_phmin,PEP_Baseline_tss,PEP_Baseline_turbidity,PEP_Baseline_zn
0,Max,7452262.0,229725000.0,4448060000.0,19115650.0,2600866.0,31267240.0,7283846.0,497346.455645,703512.890691,1392570000.0,2968967000.0,1051438000.0
1,Min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


This is the pd_exPEP_norm dataframe:


Unnamed: 0,Facility_ID,PEP_norm_tss,PEP_norm_turbidity,PEP_norm_p,PEP_norm_n,PEP_norm_nn,PEP_norm_an,PEP_norm_og,PEP_norm_cu,PEP_norm_zn,PEP_norm_fe,PEP_norm_phmin,PEP_norm_phmax
0,1,0.111066,0.019741,0.027417,0.201431,0.033609,,0.0,,,,0.0,0.557591
1,2,0.00165,0.003014,0.001599,0.079701,0.028623,,0.0,,,,0.0,0.0
2,3,,,,,,,0.0,,,,0.0,0.165571
3,4,0.021782,0.0,0.013846,0.006678,0.0,,0.0,,,,0.0,0.0
4,5,0.062949,0.03263,0.116553,0.441888,0.000844,,0.02202,,,,0.0,0.0
5,6,0.21722,0.216558,0.216123,0.21581,0.207688,,0.217182,,,,0.0,0.21722
6,7,0.0,0.0003,0.0,0.001177,0.001151,,0.0,,,,0.0,0.0
7,8,,,,,,,0.0,,,,0.0,0.039578
8,9,,0.089554,0.089675,0.090066,0.089745,0.089568,0.0895,,,,0.143743,0.012788
9,10,0.061154,0.060967,0.060845,0.060757,0.05847,,0.061143,,,,0.0,0.061154


In [None]:
#############################################################################################################
#                                        Sum Normalized PEP Scores
#                                write scores to new dataframe called pd_PEP_sum
############################################################################################################ 
def SumNormPEPs (pd_PEP_norm):
    #general function to sum normalized PEPs
    pd_PEP_norm.set_index('Facility_ID', inplace=True) #move FAcility ID to index temporarily
    pd_PEP_norm['PEP_norm_sum'] = pd_PEP_norm.sum(axis = 1) #sum norm scores for each facility
    pd_PEP_norm.reset_index(inplace=True) #move facility ID from index
    return(pd_PEP_norm.loc[:,['Facility_ID', 'PEP_norm_sum']]) #copy summed norm scores and return dataframe

#############################################################################################################
#                                  Sum existing Normalized PEP Scores
#                              (DEFINE GLOBAL VARIABLE: pd_exPEP_sum)
############################################################################################################ 
pd_exPEP_sum = SumNormPEPs(pd_exPEP_norm)
display(pd_exPEP_sum)

In [None]:
'''
#############################################################################################################
#                               CALCULATE WRS PEP BASE SCORES
#                      WRS PEP BASE SCORE = NORM_PEP_SCORE*(SampleUncertainty + 1) 
############################################################################################################ 
'''
def _HELPER_PEPUncertainty(ls_id, dict_unc):
    '''determine the uncertainty level based on sample method
        (retrieve list of sample methods from ExPollConcs table for facilities in ls_id; assign uncertainty level using dict_unc)
       input: 
            ls_id: list of facility ids
            dict_unc: dictionary of uncertainty values for each sample method
        return: 
            pd_unc: dataframe [Facility_ID, UncertaintyValue]
    '''
    #get sample method for each facility in ls_id list
    q = session.query(ExPollConcs.facility_id.label('Facility_ID'), ExPollConcs.sample_method.label('sample_method')).filter(
        ExPollConcs.facility_id.in_(ls_id)).distinct(ExPollConcs.facility_id).order_by(ExPollConcs.facility_id)
    pd_samplemethod = pd.read_sql(q.statement,session.bind)
    #use dict_unc to assign uncertainty value for each facility's sample method
    pd_samplemethod['Uncertainty_Value'] = pd_samplemethod['sample_method'].apply(lambda val: dict_unc[val])
    return pd_samplemethod
    
def CalcWRSPEPBaseScore(pd_PEP_sum):
    #calculate wrs pep base score = NORM_PEP_SCORE*(SampleUncertainty + 1) 
    #input: pd_PEP_sum dataframe containing COLUMNS [Facility_ID, PEP_norm_sum]
    #return: dataframe of WRS PEP Base Scores
    
    #make a list of Facility IDs in pd_PEP_sum
    ls_id = [np.asscalar(id) for id in pd_PEP_sum['Facility_ID']] #id given as numpy int. cast to python int https://stackoverflow.com/questions/9452775/converting-numpy-dtypes-to-native-python-types
    #make the pd_WRSPEPBaseScore dataframe:
    #write uncertainty information into pd_WRSPEPBaseScore
    pd_WRSPEPBaseScore = _HELPER_PEPUncertainty(ls_id, {'infield':0.25, 'sim_MaxType':1.0, 'sim_EMC':0.0})
    #copy in PEP_norm_sum values
    pd_WRSPEPBaseScore['PEP_norm_sum'] = pd_PEP_sum['PEP_norm_sum']
    #calculate PEP wrs and then write result into column
    pd_WRSPEPBaseScore['PEP_BaseRisk'] = pd_WRSPEPBaseScore['PEP_norm_sum'] * (pd_WRSPEPBaseScore['Uncertainty_Value'] + 1)
    return pd_WRSPEPBaseScore

#############################################################################################################
#                            CALCULATE existing WRS PEP BASE SCORES & (TO DO: WRITE SCORES TO database) 
#                              (DEFINE GLOBAL VARIABLE: pd_exWRSPEPBaseScore)
############################################################################################################ 
#calc WRS PEP Base Scores for existing normalized PEP sums (pd_exPEP_norm)
pd_exWRSPEPBaseScore = CalcWRSPEPBaseScore(pd_exPEP_sum)
display(pd_exWRSPEPBaseScore)

In [None]:
'''
#############################################################################################################
#                                    CALCULATE WRS BASE SCORES  
#
############################################################################################################ 
CALCULATE WRS BASE SCORE:
    TABLE 1 Facilities: WRS_BASE = WRS_INHERENT + WRS_CONTROLLABLE
        WRS_CONTROLLABLE = WRS_BMP + WRS_PEP
        WRS_BMP = WRS_HOUSEKEEPING + WRS_PCBMP
    TABLE 1A Facilities: 
    TABLE 2 & non-permitted:

    INPUTS:
        pd_wrsNonPEPScores
        pd_wrsPEPScores
'''
def GET_pd_FacRisks(ls_id):
    '''helper function that takes in list of facility_char ids and returns dataframe of:
        Facility_ID
        Inherent base risk
        housekeeping bmp base risk
        sw plan base risk
        bmp inspection deficiency rate (pc base risk)
    
        these items are obtained by querying database table: Facility Risks
    '''    
    q_facriskIDs =  session.query(Facility_Chars.existing_facility_risk_id).filter(Facility_Chars.id.in_(ls_id)) #for facilities in pd_exPEP_sum, get existing_facility_risk_id records
    #use q_facriskIDs as filter on Facility_Risks table to get associated wrs pollutant base id
    q_facrisks = session.query(
        Facility_Chars.id.label('Facility_ID'),Facility_Risks.Category_RiskFactor, Facility_Risks.Inherent_BaseRisk, Facility_Risks.HousekeepingBMP_BaseRisk, Facility_Risks.SWPlan_BaseRisk, Facility_Risks.BMPInspectionDeficiency_Rate).filter(
            Facility_Risks.id.in_(q_facriskIDs)).filter(
                Facility_Risks.id == Facility_Chars.existing_facility_risk_id).order_by(Facility_Chars.id)
    pd_facrisks = pd.read_sql(q_facrisks.statement, session.bind)
    return pd_facrisks

def CalcWRSBaseScore(pd_wrsNonPEPScores, pd_wrsPEPScores):
    '''
    CALCULATE WRS BASE SCORE:
        TABLE 1 Facilities: WRS_BASE = WRS_INHERENT + WRS_CONTROLLABLE
            WRS_CONTROLLABLE = WRS_BMP + WRS_PEP
            WRS_BMP = WRS_HOUSEKEEPING + WRS_PCBMP
        TABLE 1A Facilities: 
        TABLE 2 & non-permitted:

        INPUTS:
            pd_wrsNonPEPScores [Facility_ID	Category_RiskFactor	Inherent_BaseRisk	HousekeepingBMP_BaseRisk	SWPlan_BaseRisk	BMPInspectionDeficiency_Rate]
            pd_wrsPEPScores [	Facility_ID	sample_method	Uncertainty_Value	PEP_norm_sum	PEP_BaseRisk ]

    '''
    #initialize pd_exPEP_norm dataframe w/ Facility_IDs from pd_exPEP_raw
    pd_wrsBaseScores = pd.merge(pd_wrsNonPEPScores, pd_wrsPEPScores, on='Facility_ID')
    
    #calculate Table 1 scores (no need to differentiate tables now. all facilities are table 1)
    pd_wrsBaseScores['BMP_BaseRisk'] = pd_wrsBaseScores['HousekeepingBMP_BaseRisk'] + pd_wrsBaseScores['BMPInspectionDeficiency_Rate']
    pd_wrsBaseScores['Controllable_BaseRisk'] = pd_wrsBaseScores['BMP_BaseRisk'] + pd_wrsBaseScores['PEP_BaseRisk']
    #calculate total score:
    pd_wrsBaseScores['Total_BaseRisk'] = pd_wrsBaseScores['Inherent_BaseRisk'] + pd_wrsBaseScores['Controllable_BaseRisk']
    return pd_wrsBaseScores

#############################################################################################################
#                             CALCULATE existing WRS BASE SCORES 
#                       (DEFINE GLOBAL VARIABLE: pd_exwrsNonPEPScores & pd_exwrsBaseScores)
############################################################################################################ 
#make a list of Facility IDs in pd_exWRSPEPBaseScore
ls_id = [np.asscalar(id) for id in pd_exWRSPEPBaseScore['Facility_ID']] #id given as numpy int. cast to python int https://stackoverflow.com/questions/9452775/converting-numpy-dtypes-to-native-python-types
#get nonPEP WRS scores for each facility
pd_exwrsNonPEPScores = GET_pd_FacRisks(ls_id)
#make base scores using existing sub-scores.
pd_exwrsBaseScores =  CalcWRSBaseScore(pd_exwrsNonPEPScores,pd_exWRSPEPBaseScore.loc[:,['Facility_ID','PEP_BaseRisk']])
display(pd_exwrsBaseScores)

# BMP FEASIBILITY EVALUATION
Talk about it...

Global variables related to existing sampling data include:  
 - 
 - 

Defined several functions that will be used by BMP Option Evaluation. These include:  
 - 
 - 

In [None]:
# %%capture cap --no-stderr
'''
#############################################################################################################
#                    EVALUATE BASE BMP FEASIBILITY at each facility  
#                Write results to the base_bmp_feasibility_test_results table.
############################################################################################################ 

'''
print('\n******Evaluating Base BMP feasibility at facilities.******')
ShowCalculations = True #flag indicating if steps should be outputted
Expr.ResetEvalErrorCount() #RESET EXPRESION EVALUATOR ERROR COUNT

#Only analyze bmps at facilities we have data for. make list of these facilities.
ls_id = [np.asscalar(id) for id in pd_exPEP_sum['Facility_ID']] #id given as numpy int. cast to python int https://stackoverflow.com/questions/9452775/converting-numpy-dtypes-to-native-python-types
for aFac in session.query(Facility_Chars).filter(Facility_Chars.id.in_(ls_id)):    
    if ShowCalculations: print ('\n***Evaluating base bmp feasibiilty tests for facility: ', aFac.Fac_Name), ' ***'
    myBMPs = session.query(Base_BMPs)
    for aBMP in myBMPs:
        if ShowCalculations:print ('\n######Evaluating feasibility of base_bmp: ', aBMP.bmp_name, ' ID: ', aBMP.id, '######')
        BBMP_Eval.Eval_base_bmp_feasibility_tests(aFac.id, aBMP, ShowCalculations)
session.commit
winsound.Beep(250,1000)
print ('*****************************************************************')
print ('* Completed evaluating Base BMP feasibility                     *')
if Expr.CountEvalErrors() >0:
    print (Expr.CountEvalErrors(), ' errors were encountered. Review output to identify location(s)')
    print ('Hint: expression evaluation error lines are prefixed by: FAULT!!!! Error occured while evaluating expression:')
else:
    print ('No errors detected.')
print ('*****************************************************************')

# with open('C:\\Users\\JonHonda\\Desktop\\Crap\\output.txt', 'w') as f:
#     f.write(cap.stdout)
# f.close()

In [None]:
'''
#############################################################################################################
#                           Make all combinations of base bmps  
#                     Write results to the combos bmp database table
############################################################################################################ 
#MAXIMUM POLLUTANT REMOVAL RATES ARE DETERMINED BY IDENTIFYING 
#  THE BASE_BMP IN THE COMBO THAT PROVIDES THE HIGHEST REMOVAL RATE FOR A GIVEN POLLUTANT
'''
import time
print ('get a coffee...this one takes a while!')
start_time = time.time()
CBMP_Eval.Make_ALL_bmp_base_option_combos()
session.commit()
print ('--- %s execution time in seconds ---' % (time.time() - start_time))

In [None]:
'''
#############################################################################################################
#                           TUNE BMP FEASIBILITY
#           
############################################################################################################ 
CODE TO TUNE BMP FEASIBILITY/.
Use base bmp feasibility results for each facility.
'''
# import itertools     #https://docs.python.org/3/library/itertools.html    
# import pandas as pd

# from sqlalchemy import and_

# def _Make_bmp_fingerprint(base_BMP_components):
#     #create fingerprint of the passed list of base_bmp_ids
#     #fingerprint is just a | separated list of ids of the base bmps that make up the combo bmp
#     #corresponds to bmp_options table's bmp_fingerprint field
#     #FORMAT: |bmp_option_base_component_id||bmp_option_base_component_id| w/ id's given in ascending order
#     fingerprint = '|' + '|'.join(str(id) + '|' for id in base_BMP_components)
#     return fingerprint

# def Eval_FacBMPCombo(pd_basebmps, myFacility, bmpCombo):
#     '''
#     input:
#         pdbasebmps: pandas built from a BBMP_Eval.evalFacility_BaseBMP dictionary list
#                     assme that pandas is passed in w/ index is set as base_bmp_id
#         myFacility: SQLA fac_chars record
#         bmpCombo: list of base_bmp_ids that make up this combo
    
#     #retrieve previously computed combo removal rate
#     #calculate combo cip and om cost, insert/update database
#     #insert/update combo data to Combo_BMP_Feasibility_Test_Results table

#     '''    
#     #get combo bmp pollutant removal rates into pandas 
#     q = session.query(Combo_BMPs.bmp_fingerprint, Combo_BMPs.id.label('combos_bmp_id'), PRR.id.label('PRR_id'),
#           PRR.r_tss, PRR.r_turbidity, PRR.r_p, PRR.r_n, PRR.r_nn, PRR.r_an,
#           PRR.r_og, PRR.r_cu, PRR.r_zn, PRR.r_fe, PRR.r_phmin, PRR.r_phmax
#         ).filter(Combo_BMPs.bmp_fingerprint == _Make_bmp_fingerprint(bmpCombo)).filter(
#         Combo_BMPs.bmp_option_removal_rate_id == PRR.id)  
#     pd_rr = pd.read_sql(q.statement,session.bind).applymap(lambda el: 0.00 if el is None else el) #el = 0. if nonetype to represent no removal rate change

#     #get costs in pandas
#     sumCIP = sum(pd_basebmps.loc[bmp_id,'calc_cip_cost'] for bmp_id in bmpCombo)
#     sumOM = sum(pd_basebmps.loc[bmp_id,'calc_om_cost'] for bmp_id in bmpCombo)
#     pd_sums = pd.DataFrame([{'calc_cip_cost':sumCIP, 'calc_om_cost': sumOM}])

#     #prepare dataframe to return:
#     pd_temp = pd.concat([pd_rr, pd_sums], axis = 1)#make one dataframe containing combo bmp's removal rates and costs:

# #     this one's for testing only (won't work with non-base bmps):
#     pd_temp.insert(0,'Facility_Name',myFacility.Fac_Name) #include facility id column
#     pd_temp.insert(3,'BMP_Name', session.query(Base_BMPs.bmp_name).filter(Base_BMPs.id == q.first().combos_bmp_id).first())    
#     return (pd_temp)
    

# def Eval_FacBMPCombos(aFac, ShowCalculations):
#     #a wrapper around Eval_FacBMPCombo
#     print('\n***Evaluating feasible bmp combos for facility: ', aFac.Fac_Name, '***')
#     print ('****Evaluating feasibile base bmps****')
#     df = pd.DataFrame(BBMP_Eval.evalFacility_BaseBMP(aFac, False)).set_index('base_bmp_id')
#     if ShowCalculations: display (df)   
#     df = df.loc[df['is_feasible'] == 1]
#     if ShowCalculations:
#         print ('****These are the feasible base bmps. I\'ll use them to make combos:****')
#         display (df)
#     feas_ls = df.index#send feasible base bmp ids to list
#     pd_temp = pd.DataFrame()

#     for feas in feas_ls:
#          pd_temp = pd.concat([pd_temp,Eval_FacBMPCombo(df,aFac, [feas])])            

#     print ('      There are ', len(pd_temp), ' combinations of feasible Base BMPs.')
#     return pd_temp
    
# def Eval_All_FacBMPCombos():
#     ShowCalculations = False
#     print ('Evaluating feasibile BMP Options for each facility:')
#     #Only analyze bmps at facilities we have data for. make list of these facilities.
#     ls_id = [np.asscalar(id) for id in pd_exPEP_sum['Facility_ID']] #id given as numpy int. cast to python int https://stackoverflow.com/questions/9452775/converting-numpy-dtypes-to-native-python-types
#     pd_BaseBMPCombos = pd.DataFrame() #create empty dataframe that we'll fill w/ the feasibile combos
#     for aFac in session.query(Facility_Chars).filter(Facility_Chars.id.in_(ls_id)):        
# #         Eval_FacBMPOptions(aFac, ShowCalculations)
#         pd_BaseBMPCombos = pd.concat([pd_BaseBMPCombos, Eval_FacBMPCombos(aFac, ShowCalculations)])
#     pd_BaseBMPCombos.reset_index(drop=True, inplace=True)
#     return pd_BaseBMPCombos


# #############################################################################################################
# #                           EVALUATE FEASIBILITY OF BMP COMBOS 
# #                   (DEFINE GLOBAL VARIABLE: pd_BaseBMPCombos)
# ############################################################################################################ 
# start_time = time.time()
# pd_BaseBMPCombos = Eval_All_FacBMPCombos()
# print ('--- %s execution time in seconds ---' % (time.time() - start_time))
# display(pd_BaseBMPCombos)
# # WRITE COMBOS RESULTS TO EXCEL FILE
# xlsFile = 'C:\\Users\\JonHonda\\Desktop\\Crap\\Combos.xls'
# print ('writing to excel file: ', xlsFile)
# writer = pd.ExcelWriter(xlsFile)
# pd_BaseBMPCombos.to_excel(writer,'Output')
# writer.save()
# session.commit()


In [None]:
# '''
# #############################################################################################################
# #                         EVALUATE FEASIBILITY OF BMP COMBOS 
# #            insert/update combo data to Combo_BMP_Feasibility_Test_Results table & pd_BaseBMPCombos 
# ############################################################################################################ 
# Use base bmp feasibility results for each facility.
# '''
# import itertools     #https://docs.python.org/3/library/itertools.html    
# import pandas as pd

from sqlalchemy import and_

def _Make_bmp_fingerprint(base_BMP_components):
    #create fingerprint of the passed list of base_bmp_ids
    #fingerprint is just a | separated list of ids of the base bmps that make up the combo bmp
    #corresponds to bmp_options table's bmp_fingerprint field
    #FORMAT: |bmp_option_base_component_id||bmp_option_base_component_id| w/ id's given in ascending order
    fingerprint = '|' + '|'.join(str(id) + '|' for id in base_BMP_components)
    return fingerprint

def _CalcPollReduction(FacID, pollLS, pd_rr, pd_ExConcs):
    '''    
        #calculate reduced pollutant concentrations and reduced WRS scores
        #return dataframes:
            pd_RedWRSBaseScore (reduced WRS)
            *others could be returned but don't to save memory

    '''    
    ls_rr = ['r_' + Constituent for Constituent in pollLS]
    ls_c = ['c_' + Constituent for Constituent in pollLS]
    
    pd_rr_slim = pd_rr.loc[:,ls_rr]
    pd_c_slim = pd_ExConcs.loc[pd_ExConcs['Facility_ID'] == FacID, ls_c]

#     display(pd_rr_slim)
#     display (pd_c_slim)
    
    #load in fac id and sampling date:
    pd_RedConcs = pd_ExConcs.loc[pd_ExConcs['Facility_ID'] == FacID, ['Facility_ID', 'sample_date']]
    #calculate reduced concentrations (red = c * (1-rr)) 
    for Constituent in pollLS:
        pd_RedConcs['c_' + Constituent] = pd_c_slim['c_' + Constituent].apply(lambda row: row * (1- pd_rr_slim['r_' + Constituent]))
#     display(pd_RedConcs)
    
    #calculate wrs reduction:
    pd_RedFacExceedances = CalcExceedances(pd_RedConcs, pollLS, pd_FacsNELs_Wet, pd_FacsNELs_Dry)
#     display(pd_RedFacExceedances)
    pd_RedAFWExceedances = AFWFacExceedances(pd_RedFacExceedances, pollLS)
#     display(pd_RedAFWExceedances )
    pd_RedPEP_raw = CalcPEP_Raw(pd_RedAFWExceedances, pollLS, pd_RunoffVols)
#     display(pd_RedPEP_raw)
    pd_RedPEP_norm = CalcPEP_norm(pd_RedPEP_raw, pollLS, pd_NormBaselinePEP)
#     display(pd_RedPEP_norm )
    pd_RedPEP_sum = SumNormPEPs(pd_RedPEP_norm)
#     display(pd_RedPEP_sum )
    pd_RedWRSPEPBaseScore = CalcWRSPEPBaseScore(pd_RedPEP_sum)
#     display(pd_RedWRSPEPBaseScore )
    pd_RedWRSBaseScore = CalcWRSBaseScore(pd_exwrsNonPEPScores, pd_RedWRSPEPBaseScore.loc[:,['Facility_ID','PEP_BaseRisk']])
#     display(pd_RedWRSBaseScore )
    return pd_RedWRSBaseScore
    
    
def Eval_FacBMPCombo(pd_basebmps, myFacility, bmpCombo, ShowCalculations):
    '''
    input:
        pdbasebmps: pandas built from a BBMP_Eval.evalFacility_BaseBMP dictionary list
                    assme that pandas is passed in w/ index is set as base_bmp_id
        myFacility: SQLA fac_chars record
        bmpCombo: list of base_bmp_ids that make up this combo
    return:
        pd_rr (reduction rates)
        pd_RedWRSBaseScore (reduced WRS scores)
        pd_sums (o&m and construction costs)
    #retrieve previously computed combo removal rate
    #calculate combo cip and om cost, insert/update database
    #insert/update combo data to Combo_BMP_Feasibility_Test_Results table

    '''    
    #get combo bmp pollutant removal rates into pandas 
    q = session.query(Combo_BMPs.bmp_fingerprint, Combo_BMPs.id.label('combos_bmp_id'), PRR.id.label('PRR_id'),
          PRR.r_tss, PRR.r_turbidity, PRR.r_p, PRR.r_n, PRR.r_nn, PRR.r_an,
          PRR.r_og, PRR.r_cu, PRR.r_zn, PRR.r_fe, PRR.r_phmin, PRR.r_phmax
        ).filter(Combo_BMPs.bmp_fingerprint == _Make_bmp_fingerprint(bmpCombo)).filter(
        Combo_BMPs.bmp_option_removal_rate_id == PRR.id)  
    pd_rr = pd.read_sql(q.statement,session.bind).applymap(lambda el: 0.00 if el is None else el) #el = 0. if nonetype to represent no removal rate change

    #use information in pd_rr to get CBFTR_record - make new record if necessary\n",
    myCBFTR = Base.metadata.tables['combo_bmp_feasibility_test_results']
    myCBFTR_id = SQLA_main.insertupdateRec(myCBFTR,{'facility_id':myFacility.id, 
                                                    'combo_bmps_id':pd_rr['combos_bmp_id'][0],
                                                    'is_feasible':1
                                                   },
                   and_(
                        myCBFTR.c['facility_id'] == myFacility.id,
                        myCBFTR.c['combo_bmps_id'] == pd_rr['combos_bmp_id'][0]
                        ))
    session.flush()
    
    pd_RedWRSBaseScore = _CalcPollReduction(myFacility.id, pollLS, pd_rr, pd_ExConcs)
    
    #get costs in pandas
    sumCIP = sum(pd_basebmps.loc[bmp_id,'calc_cip_cost'] for bmp_id in bmpCombo)
    sumOM = sum(pd_basebmps.loc[bmp_id,'calc_om_cost'] for bmp_id in bmpCombo)
    pd_sums = pd.DataFrame([{'calc_cip_cost':sumCIP, 'calc_om_cost': sumOM}])

    if ShowCalculations:
        print ('Here is a summary of the combo: ', list(bmpCombo))
        display(pd.concat([pd_rr,pd_sums,pd_RedWRSBaseScore], axis = 1))
    #return several dataframes:
    return pd_rr, pd_RedWRSBaseScore, pd_sums
    
def Eval_FacBMPCombos(aFac, ShowCalculations):
    #a wrapper around Eval_FacBMPCombo
    print('\n***Evaluating feasible bmp combos for facility: ', aFac.Fac_Name, '***')
    print ('****Evaluating feasibile base bmps****')
    df = pd.DataFrame(BBMP_Eval.evalFacility_BaseBMP(aFac, False)).set_index('base_bmp_id')
    if ShowCalculations: display (df)   
    df = df.loc[df['is_feasible'] == 1]
    if ShowCalculations:
        print ('****These are the feasible base bmps. I\'ll use them to make combos:****')
        display (df)
    feas_ls = df.index#send feasible base bmp ids to list
    pd_temp = pd.DataFrame()

#     for CBOLen in range (1, len(feas_ls)+1): #+1 so it's inclusive of last count
#         for combo in  itertools.combinations(feas_ls,CBOLen):
#             pd_temp = pd.concat([pd_temp,Eval_FacBMPCombo(df,aFac, list(combo),ShowCalculations)])
    print ('****Completed base bmp feasibility evaluation.****')
    print ('****Evaluating combinations of feasible base bmps...****')
    ls_pd = [Eval_FacBMPCombo(df,aFac, list(combo),ShowCalculations)
            for CBOLen in range (1, len(feas_ls)+1) #+1 so it's inclusive of last count
                 for combo in itertools.combinations(feas_ls,CBOLen)
            ]           

    print ('      There are ', len(ls_pd), ' combinations of feasible Base BMPs.')
    return ls_pd
    
def Eval_All_FacBMPCombos():
    ShowCalculations = False
    session.query().filter(CBFTR).delete(synchronize_session = False)
    
    print ('Evaluating feasibile BMP Options for each facility:')
    #Only analyze bmps at facilities we have data for. make list of these facilities.
    ls_id = [np.asscalar(id) for id in pd_exPEP_sum['Facility_ID']] #id given as numpy int. cast to python int https://stackoverflow.com/questions/9452775/converting-numpy-dtypes-to-native-python-types
    ls_id = [1]
    
    
#     for aFac in session.query(Facility_Chars).filter(Facility_Chars.id.in_(ls_id)):
#         ls_pd =  [Eval_FacBMPCombos(aFac, ShowCalculations) for aFac in session.query(Facility_Chars).filter(Facility_Chars.id.in_(ls_id))]
    return ls_pd
    


#############################################################################################################
#                           EVALUATE FEASIBILITY OF BMP COMBOS 
#                   (DEFINE GLOBAL VARIABLE: pd_BaseBMPCombos)
############################################################################################################ 
start_time = time.time()
ls_pd_BaseBMPCombos = Eval_All_FacBMPCombos()
print ('--- %s execution time in seconds ---' % (time.time() - start_time))

session.commit()

In [None]:
'''
#############################################################################################################
#                         EVALUATE FEASIBILITY OF BMP COMBOS 
#            insert/update combo data to Combo_BMP_Feasibility_Test_Results table & pd_BaseBMPCombos 
############################################################################################################ 
Use base bmp feasibility results for each facility.
'''
# import itertools     #https://docs.python.org/3/library/itertools.html    
# import pandas as pd

# from sqlalchemy import and_

# # def _Make_bmp_fingerprint(base_BMP_components):
# #     #create fingerprint of the passed list of base_bmp_ids
# #     #fingerprint is just a | separated list of ids of the base bmps that make up the combo bmp
# #     #corresponds to bmp_options table's bmp_fingerprint field
# #     #FORMAT: |bmp_option_base_component_id||bmp_option_base_component_id| w/ id's given in ascending order
# #     fingerprint = '|' + '|'.join(str(id) + '|' for id in base_BMP_components)
# #     return fingerprint

# # def _CalcPollReduction(FacID, pollLS, pd_rr, pd_ExConcs):
# #     '''    
# #         #calculate reduced pollutant concentrations and reduced WRS scores
# #         #return dataframes:
# #             pd_RedWRSBaseScore (reduced WRS)
# #             *others could be returned but don't to save memory

# #     '''    
# #     ls_rr = ['r_' + Constituent for Constituent in pollLS]
# #     ls_c = ['c_' + Constituent for Constituent in pollLS]
    
# #     pd_rr_slim = pd_rr.loc[:,ls_rr]
# #     pd_c_slim = pd_ExConcs.loc[pd_ExConcs['Facility_ID'] == FacID, ls_c]

# # #     display(pd_rr_slim)
# # #     display (pd_c_slim)
    
# #     #load in fac id and sampling date:
# #     pd_RedConcs = pd_ExConcs.loc[pd_ExConcs['Facility_ID'] == FacID, ['Facility_ID', 'sample_date']]
# #     #calculate reduced concentrations (red = c * (1-rr)) 
# #     for Constituent in pollLS:
# #         pd_RedConcs['c_' + Constituent] = pd_c_slim['c_' + Constituent].apply(lambda row: row * (1- pd_rr_slim['r_' + Constituent]))
# # #     display(pd_RedConcs)
    
# #     #calculate wrs reduction:
# #     pd_RedFacExceedances = CalcExceedances(pd_RedConcs, pollLS, pd_FacsNELs_Wet, pd_FacsNELs_Dry)
# # #     display(pd_RedFacExceedances)
# #     pd_RedAFWExceedances = AFWFacExceedances(pd_RedFacExceedances, pollLS)
# # #     display(pd_RedAFWExceedances )
# #     pd_RedPEP_raw = CalcPEP_Raw(pd_RedAFWExceedances, pollLS, pd_RunoffVols)
# # #     display(pd_RedPEP_raw)
# #     pd_RedPEP_norm = CalcPEP_norm(pd_RedPEP_raw, pollLS, pd_NormBaselinePEP)
# # #     display(pd_RedPEP_norm )
# #     pd_RedPEP_sum = SumNormPEPs(pd_RedPEP_norm)
# # #     display(pd_RedPEP_sum )
# #     pd_RedWRSPEPBaseScore = CalcWRSPEPBaseScore(pd_RedPEP_sum)
# # #     display(pd_RedWRSPEPBaseScore )
# #     pd_RedWRSBaseScore = CalcWRSBaseScore(pd_exwrsNonPEPScores, pd_RedWRSPEPBaseScore.loc[:,['Facility_ID','PEP_BaseRisk']])
# # #     display(pd_RedWRSBaseScore )
# #     return pd_RedWRSBaseScore
    
    
# # def Eval_FacBMPCombo(pd_basebmps, myFacility, bmpCombo, ShowCalculations):
# #     '''
# #     input:
# #         pdbasebmps: pandas built from a BBMP_Eval.evalFacility_BaseBMP dictionary list
# #                     assme that pandas is passed in w/ index is set as base_bmp_id
# #         myFacility: SQLA fac_chars record
# #         bmpCombo: list of base_bmp_ids that make up this combo
# #     return:
# #         pd_rr (reduction rates)
# #         pd_RedWRSBaseScore (reduced WRS scores)
# #         pd_sums (o&m and construction costs)
# #     #retrieve previously computed combo removal rate
# #     #calculate combo cip and om cost, insert/update database
# #     #insert/update combo data to Combo_BMP_Feasibility_Test_Results table

# #     '''    
# #     #get combo bmp pollutant removal rates into pandas 
# #     q = session.query(Combo_BMPs.bmp_fingerprint, Combo_BMPs.id.label('combos_bmp_id'), PRR.id.label('PRR_id'),
# #           PRR.r_tss, PRR.r_turbidity, PRR.r_p, PRR.r_n, PRR.r_nn, PRR.r_an,
# #           PRR.r_og, PRR.r_cu, PRR.r_zn, PRR.r_fe, PRR.r_phmin, PRR.r_phmax
# #         ).filter(Combo_BMPs.bmp_fingerprint == _Make_bmp_fingerprint(bmpCombo)).filter(
# #         Combo_BMPs.bmp_option_removal_rate_id == PRR.id)  
# #     pd_rr = pd.read_sql(q.statement,session.bind).applymap(lambda el: 0.00 if el is None else el) #el = 0. if nonetype to represent no removal rate change

# #     #use information in pd_rr to get CBFTR_record - make new record if necessary\n",
# #     myCBFTR = Base.metadata.tables['combo_bmp_feasibility_test_results']
# #     myCBFTR_id = SQLA_main.insertupdateRec(myCBFTR,{'facility_id':myFacility.id, 
# #                                                     'combo_bmps_id':pd_rr['combos_bmp_id'][0],
# #                                                     'is_feasible':1
# #                                                    },
# #                    and_(
# #                         myCBFTR.c['facility_id'] == myFacility.id,
# #                         myCBFTR.c['combo_bmps_id'] == pd_rr['combos_bmp_id'][0]
# #                         ))
# #     session.flush()
    
# #     pd_RedWRSBaseScore = _CalcPollReduction(myFacility.id, pollLS, pd_rr, pd_ExConcs)
    
# #     #get costs in pandas
# #     sumCIP = sum(pd_basebmps.loc[bmp_id,'calc_cip_cost'] for bmp_id in bmpCombo)
# #     sumOM = sum(pd_basebmps.loc[bmp_id,'calc_om_cost'] for bmp_id in bmpCombo)
# #     pd_sums = pd.DataFrame([{'calc_cip_cost':sumCIP, 'calc_om_cost': sumOM}])

# #     if ShowCalculations:
# #         print ('Here is a summary of the combo: ', list(bmpCombo))
# #         display(pd.concat([pd_rr,pd_sums,pd_RedWRSBaseScore], axis = 1))
# #     #return several dataframes:
# #     return pd_rr, pd_RedWRSBaseScore, pd_sums

# def Eval_FacBMPCombos(ls_FacIDs, ShowCalculations):
#     '''
#     #make a big DF - EMPTY at first. populated as simulation advances:
#     COLUMNS:
#     [FACILITY_ID, COMBO_ID, BMP_FINGERPRINT, [removal_rates], [exConcs], [exPEPScores],exWRSTotal ,[redConcs], [redPEPScores], redWRSTotal
    
    
#     '''
    
    
#     ls_dict = [result for FacID in ls_FacIDs for result in BBMP_Eval.evalFacility_BaseBMP(FacID, ShowCalculations)]
#     pd_FsblBMPs = pd.DataFrame(ls_dict).set_index('base_bmp_id')
#     if ShowCalculations:     display(pd_FsblBMPs) 
#     pd_FsblBMPs = pd_FsblBMPs.loc[pd_FsblBMPs['is_feasible'] == 1]
#     if ShowCalculations:
#         print ('****These are the feasible base bmps. I\'ll use them to make combos:****')
#         display (pd_FsblBMPs)
        
#     feas_ls = df.index#send feasible base bmp ids to list
        
# #     for CBOLen in range (1, len(feas_ls)+1): #+1 so it's inclusive of last count
# #         for combo in  itertools.combinations(feas_ls,CBOLen):    
    


# #     pd_temp = pd.DataFrame()

# # #     for CBOLen in range (1, len(feas_ls)+1): #+1 so it's inclusive of last count
# # #         for combo in  itertools.combinations(feas_ls,CBOLen):
# # #             pd_temp = pd.concat([pd_temp,Eval_FacBMPCombo(df,aFac, list(combo),ShowCalculations)])
# #     print ('****Completed base bmp feasibility evaluation.****')
# #     print ('****Evaluating combinations of feasible base bmps...****')
# #     ls_pd = [Eval_FacBMPCombo(df,aFac, list(combo),ShowCalculations)
# #             for CBOLen in range (1, len(feas_ls)+1) #+1 so it's inclusive of last count
# #                  for combo in itertools.combinations(feas_ls,CBOLen)
# #             ]           

# #     print ('      There are ', len(ls_pd), ' combinations of feasible Base BMPs.')
# #     return ls_pd

    
    
# def Eval_All_FacBMPCombos():
#     #we need 2 dataframes:
#     #1. Ex Concs for each facility
#     #2. A Feasibile bmp option for each facility, including removal rates
    
    
#     ShowCalculations = False
#     session.query(CBFTR).delete(synchronize_session = False)
    
#     print ('Evaluating feasibile BMP Options for each facility:')
#     #Only analyze bmps at facilities we have data for. make list of these facilities.
#     ls_FacIDs = [np.asscalar(id) for id in pd_exPEP_sum['Facility_ID']] #id given as numpy int. cast to python int https://stackoverflow.com/questions/9452775/converting-numpy-dtypes-to-native-python-types
#     ls_FacIDs = [1,2]
#     Eval_FacBMPCombos(ls_FacIDs,ShowCalculations)
    
# #     for aFac in session.query(Facility_Chars).filter(Facility_Chars.id.in_(ls_id)):
# #         ls_pd =  [Eval_FacBMPCombos(aFac, ShowCalculations) for aFac in session.query(Facility_Chars).filter(Facility_Chars.id.in_(ls_id))]
# #     return ls_pd
    


# #############################################################################################################
# #                           EVALUATE FEASIBILITY OF BMP COMBOS 
# #                   (DEFINE GLOBAL VARIABLE: pd_BaseBMPCombos)
# ############################################################################################################ 
# start_time = time.time()
# Eval_All_FacBMPCombos()
# print ('--- %s execution time in seconds ---' % (time.time() - start_time))

# session.commit()

In [None]:
print(type(session.query(Facility_Chars.Fac_Name).filter(Facility_Chars.id == 1).first()[0]))

# BMP FEASIBILITY EVALUATION
Talk about it...

Global variables related to existing sampling data include:  
 - 
 - 

Defined several functions that will be used by BMP Option Evaluation. These include:  
 - 
 - 

In [None]:
'''
ESTIMATE FACIITY PEP RISK REDUCTION BY USING CERTAIN BMP COMBOS
'''
import random
def CalWRS_PEP_Reduction(pd_Reduction):
    pd_return = pd_Reduction * pdWRS_PEP

FacGroup = pd_BaseBMPCombos.groupby('Facility_ID') #group combo options by facility_id
pd_BMPOpt = FacGroup.apply(lambda aFac:  aFac.iloc[random.randint(0,aFac.shape[0]-1)]) #randomly select a combo option for each facilityp


In [None]:
# session.close()
# engine.dispose()


# This is scratch paper...

In [None]:
# http://pythonhow.com/accessing-dataframe-columns-rows-and-cells/
import pandas as pd #import in pandas library
print ('#get csv data and read into pandas')
df1=pd.read_csv("http://pythonhow.com/wp-content/uploads/2016/01/Income_data.csv")
print (df1)
print ('#write new dataframe w/ index set to the "State" column in the csv')
df2=df1.set_index("State").copy()
print (df2)
print ('#extract a portion of the dataframe: States = Alaska to Arkansas; and Dates 2005:2007')
print (df2.loc["Alaska":"Arkansas","2005":"2007"])

print ('Get only certain States, using a list of states:')
getStates = ['Alaska', 'Arizona']
print (df2.loc[getStates])

print ('#slice a column:')
df2.loc[: , "2005"]
print ('get a cell:')
df2.loc['Alaska','2005']
print ('#get max of 2005 data')
print (df2.loc[:,'2005'].max())
print ('take 2005 column and put into list')
LS = df2['2005'].tolist() #this is a series. we use the .tolist() to convert from series to list
print (type(LS))


In [None]:
df = pd.DataFrame({'col1' : [1.0] * 5, 
                   'col2' : [2.0] * 5, 
                   'col3' : [3.0] * 5 }, index = range(1,6),)
display(df)
df2 = pd.DataFrame({'col1' : [10.0] * 5, 
                    'col2' : [100.0] * 5, 
                    'col3' : [1000.0] * 5 }, index = range(1,6),)
display(df2)
df.mul(df2, 0) # element by element multiplication no problems

In [None]:
import datetime

# xmin = datetime.datetime.strptime('1/1/2018', "%m/%d/%Y").date()
# xmax = datetime.datetime.strptime('5/6/2018', "%m/%d/%Y").date()

# xmin <= datetime.date(2018,1,5) <= xmax

#     Wet Season is from: January 1 through April 30 and November 1 through December 31
#     Dry Season is from: May 1 through October 31

SampleDate = datetime.date(2018,11,1)

#Wet Season 1:
if datetime.date(SampleDate.year, 1,1) <= SampleDate <= datetime.date(SampleDate.year, 4,30):
    print ('ws 1')
elif datetime.date(SampleDate.year, 5,1) <= SampleDate <= datetime.date(SampleDate.year, 10,31):
    print ('dry')
else:
    print ('ws 2')
    
    
import numpy as np    
# np.max([float('nan'),0])
np.max([0,float('nan')])

if math.isnan(10)

In [None]:
#import the pandas library
import pandas as pd

ipl_data = {'Team': ['Riders', 'Riders', 'Devils', 'Devils', 'Kings',
         'kings', 'Kings', 'Kings', 'Riders', 'Royals', 'Royals', 'Riders'],
         'Rank': [1, 2, 2, 3, 3,4 ,1 ,1,2 , 4,1,2],
         'Year': [2014,2015,2014,2015,2014,2015,2016,2017,2016,2014,2015,2017],
         'Points':[876,789,863,673,741,812,756,788,694,701,804,690]}
df = pd.DataFrame(ipl_data)

# display (df)

# display (df.groupby('Team').groups)


# import numpy as np


# grouped = df.groupby('Year')
# print (grouped['Points'].agg(np.max))

grp = df.groupby(['Team','Year'])
print (grp)
grp.apply(lambda x: print (x.iloc[0]['Points']))


In [None]:
df = pd.DataFrame({'col1': [1, 2], 'col2': [0.1, 0.2]},
                      index=['a', 'b'])
for idx in df.index:
    print (idx)
    print (df.loc[idx]['col2'])

In [38]:
import pandas as pd
import numpy as np
data = {'name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 
        'year': [2012, 2012, 2013, 2014, 2014], 
        'reports': [4, 24, 31, 2, 3],
        'coverage': [25, 94, 57, 62, 70]}
df = pd.DataFrame(data, index = ['Cochice', 'Pima', 'Santa Cruz', 'Maricopa', 'Yuma'])
display(df)

dfx = df.copy()

capitalizer = lambda x: x.upper()

dfx['name'] = dfx['name'].apply(capitalizer)
display(df)
display(dfx)

data.name.unique

Unnamed: 0,coverage,name,reports,year
Cochice,25,Jason,4,2012
Pima,94,Molly,24,2012
Santa Cruz,57,Tina,31,2013
Maricopa,62,Jake,2,2014
Yuma,70,Amy,3,2014


Unnamed: 0,coverage,name,reports,year
Cochice,25,Jason,4,2012
Pima,94,Molly,24,2012
Santa Cruz,57,Tina,31,2013
Maricopa,62,Jake,2,2014
Yuma,70,Amy,3,2014


Unnamed: 0,coverage,name,reports,year
Cochice,25,JASON,4,2012
Pima,94,MOLLY,24,2012
Santa Cruz,57,TINA,31,2013
Maricopa,62,JAKE,2,2014
Yuma,70,AMY,3,2014


AttributeError: 'dict' object has no attribute 'name'

In [None]:
def div0( a, b ):
    """ ignore / 0, div0( [-1, 0, 1], 0 ) -> [0, 0, 0] """
    with np.errstate(divide='ignore', invalid='ignore'):
        c = np.true_divide( a, b )
        c[ ~ np.isfinite( c )] = 0  # -inf inf NaN
    return c

div0( [-1, 0, 1], 0 )


In [None]:
afile = open("testfile.txt","a") 
# file.write("Hello World") 
# file.write("This is our new text file") 
# file.write("and this is another line.") 
# file.write("Why? Because we can.") 
print ("test", file=afile)
afile.close() 

In [None]:
# import pandas as pd
# x = pd.DataFrame({0: [1,2,float('nan')], 1: [4,5,6], 2: [7,8,9] })
# y = pd.Series([-1, float('nan'), -1])
# print (y)
# # display(x.apply(lambda col: y*col))

# q = session.query(
#       PRR.r_tss, PRR.r_turbidity, PRR.r_p, PRR.r_n, PRR.r_nn, PRR.r_an,
#       PRR.r_og, PRR.r_cu, PRR.r_zn, PRR.r_fe, PRR.r_phmin, PRR.r_phmax
# ).limit(1)
# pd_rr = pd.read_sql(q.statement,session.bind).applymap(lambda el: 0 if el is None else el) #change nonetypes to 0 rem
# display(pd_rr)
# ls_c = ['c_' + Constituent for Constituent in pollLS] #make list of concentration headers 
# pd_c = pd_ExConcs.loc[pd_ExConcs['Facility_ID'] == 1, ls_c]
# display(pd_c)

# print (pd_rr['r_tss'][0])

# # pd_ConcsRed = pd_ExConcs.loc[pd_ExConcs['Facility_ID'] ==155, ['Facility_ID', 'sample_date']]
# # for Constituent in pollLS:
# #     pd_ConcsRed['c_' + Constituent] = pd_c['c_' + Constituent] * pd_rr['r_' + Constituent][0]



# display(pd_ConcsRed)

In [None]:
#WHICH METHOD IS FASTER? APPLY OR LIST COMPREHENSION?
num = 10000
start_time = time.time()
def fun1a(x):
    return pd.DataFrame([{'fun1':x.loc['id']}])
pd_maina = pd.DataFrame([{'id': i} for i in range(0,num)])
pd_maina['fun1'] = pd_maina.apply(lambda x: fun1a(x), axis=1)
print ('--- %s execution time in seconds ---' % (time.time() - start_time))
# display(pd_maina)

start_time = time.time()
def fun1b(x):
    return pd.DataFrame([{'fun1':x}])
pd_mainb = pd.DataFrame([{'id': i} for i in range(0,num)])
ls = [fun1b(x) for x in range(0,num)]
pd_tmp = pd.concat(ls)
pd_tmp.reset_index(drop=True, inplace=True)
# display(pd_mainb)
# display(pd_tmp)
g = pd.concat([pd_mainb,pd_tmp], axis=1)
print ('--- %s execution time in seconds ---' % (time.time() - start_time))
# display(g)

# display(pd_main)

In [None]:
#nested list comprehension to concat dataframes:


def fun(x):
    return pd.DataFrame([{'1':x*1}]), pd.DataFrame([{'2':x*2}]), pd.DataFrame([{'3':x*3}])
#this LC stores 3 dataframes per fun call into a single element of list LS
ls = [fun(x) for x in range(0,10)]

#this concats the stored dataframe lists into 3 separate dataframes, outputting each dataframe as we go
for i in range(0,3):
    col = [x[i] for x in ls]
    dfcol = pd.concat(col)
    display(dfcol)

#this LC captures the dataframes into a list and then concatenates to a single dataframe
lsall = [pd.concat([x[i] for i in range(0,3)], axis=1)  for x in ls]
df = pd.concat(lsall)
df

In [None]:
def xx(g):
    return g*1, 2, 4


ls_pd = [
            xx(i) for CBOLen in range (1,4) for i in range(0,CBOLen)
        ]
print (len(ls_pd))
print (ls_pd)

for CBOLen in range (1,4):
    for i in range(0,CBOLen):
        print (xx(i))
        

In [None]:
def fun(i):
    return [{'a':1, 'b':2}]
# ls = [{}]
# ls = ls + fun()
ls = [a for i in range(4) for a in fun(i) ]

print (ls)

In [None]:
#SHALLOW COPY PANDAS
import pandas as pd



ipl_data = {'Team': ['Riders', 'Riders', 'Devils', 'Devils', 'Kings',
         'kings', 'Kings', 'Kings', 'Riders', 'Royals', 'Royals', 'Riders'],
         'Rank': [1, 2, 2, 3, 3,4 ,1 ,1,2 , 4,1,2],
         'Year': [2014,2015,2014,2015,2014,2015,2016,2017,2016,2014,2015,2017],
         'Points':[876,789,863,673,741,812,756,788,694,701,804,690]}
df = pd.DataFrame(ipl_data)
# display (df)

def fun(pd_me):
    print ('df id in fun: ', id(pd_me))
    pd_me = pd.concat([pd_me,pd_me]) #<--this causes a new item to be made
    return pd_me

print ('df id before fun pass ', id(df))
df_new = fun(df)
print ('df id after fun pass ', id(df_new))

df_new['g'] = 0
print (id(df_new))


def fun2(aslice):
    aslice['a'] = 100
    return aslice
    
df.loc[:,['Year']] = fun2(df.loc[:,['Year']])
display (df)

In [None]:
#RENAME PANDAS COLUMNS
ipl_data = {'gTeam': ['Riders', 'Riders', 'Devils', 'Devils', 'Kings',
         'kings', 'Kings', 'Kings', 'Riders', 'Royals', 'Royals', 'Riders'],
         'gRank': [1, 2, 2, 3, 3,4 ,1 ,1,2 , 4,1,2],
         'gYear': [2014,2015,2014,2015,2014,2015,2016,2017,2016,2014,2015,2017],
         'gPoints':[876,789,863,673,741,812,756,788,694,701,804,690]}
df = pd.DataFrame(ipl_data)


ls = list(df.columns)
dict_repl = {col: col.replace('g','',1) for col in ls}
df = df.rename(columns = dict_repl)
print (dict_repl)
print (df.shape[0])
df


In [34]:
df = pd.DataFrame({'rep':[0,1,1,1,2,3,2,3,4,5,1]})
df['rep_f'] = df['rep']!=df['rep']
display(df)
df['rep_f'] = (df['rep']!=df['rep'].shift())
df['rep_f'] = (df['rep']!=df['rep'].shift()).cumsum()-1
df

Unnamed: 0,rep,rep_f
0,0,False
1,1,False
2,1,False
3,1,False
4,2,False
5,3,False
6,2,False
7,3,False
8,4,False
9,5,False


Unnamed: 0,rep,rep_f
0,0,0
1,1,1
2,1,1
3,1,1
4,2,2
5,3,3
6,2,4
7,3,5
8,4,6
9,5,7
