In [16]:

#Import Libaries
#***************
from datetime import datetime  as dt
import pandas as pd
import re
import math

#Named variables Definition
#**************************
call_rate = 125
form_rate = 125
minimum_call_duration = 60
brand_report_file = "endurance/endurance_brand_report_201908.csv"
ca_report_file = "endurance/endurance_ca_report_201908.csv"
brand_report_separator = ";"

#Field names in Brand Report
brand_report_phone_field_name = 'Phone'
brand_report_uuid_field_name = 'Vendor Id2'
brand_report_email_field_name = 'Email'

#The following are field names for the CA report
#These are not expected to change but I don't want to have to 
#make a massive mod if they do for whatever reason
ca_report_originating_number_field_name = 'originating_number'
ca_report_phone_number_field_name = 'phone_number'
ca_report_phone_form_field_name = 'form_phone'
ca_report_uuid_field_name = 'uuid'
ca_report_email_field_name = 'form_email'
ca_report_referral_path_field_name = 'referral_path'
ca_report_form_referral_path_field_value = 'form'
ca_report_call_referral_path_field_value = 'call'
ca_report_click_referral_path_field_value = 'click'
ca_report_raw_referral_id_field_name = 'raw_referral_id'
ca_report_reason_why_not_billed_field_name = 'reason_why_not_billed'
ca_report_phone_duration_field_name = 'phone_duration'


#Transformation functions
#------------------------

#The clean_US_phone_number function makes the following assumptions
#All numbers belong to the US. We only remove the first character of the phone if it is the US country code: 1
#Number can contain extension numbers and other additional info at the end of the number, and not at the beginning
#This function has a lot of deffensive code
def clean_US_phone_number(text_phone_number):
    
    if pd.isnull(text_phone_number):
        return ''
    
    modified = re.sub("\D+","", str(text_phone_number))
    
    if modified == '':
        return ''

    #Remove the country code
    if modified[0] == '1':
        modified = modified[1:]
    
    #Cover corner case: too few characters
    if len(modified) < 10:
        return modified
    
    #Remove anything else at the end of the number
    modified = modified[:10]
    return modified
    
#The following function is more forgiving in the country code but is unable to remove extension info
def clean_phone_number_no_ext(text_phone_number):
    modified = re.sub("\D+","", str(text_phone_number))
    #Remove the country code
    modified = modified[-10:]
    return modified

def clean_phone_duration(phone_duration):
    if math.isnan(phone_duration):
        return 0
    return int(phone_duration)

def is_blank(myString):
    return not (str(myString) and str(myString).strip())

def is_not_blank(myString):
    return bool(str(myString) and str(myString).strip())

#Valid for Pandas DataFrames
def is_blank_or_nan(myString):
    return not (str(myString) and str(myString).strip() and not pd.isna(myString))

#Valid for Pandas DataFrames
def is_not_blank_or_nan(myString):
    return bool(str(myString) and str(myString).strip() and not pd.isna(myString))


#Data Gathering and Wrangling
#****************************

#Load the files. The brand report comes from an Excel file, whose default separator to export to CSV is ;
#he CA report always comes from a Mode query
ca_report = pd.read_csv(ca_report_file)
ca_report_small = ca_report.iloc[100:120].copy()
print(ca_report_small)



      submitted_cst_date                 campaign_name  raw_referral_id  \
100  2019-08-01 00:00:00       Endurance Auto Warranty          7614028   
101  2019-08-01 00:00:00       Endurance Auto Warranty          7610543   
102  2019-08-01 00:00:00       Endurance Auto Warranty          7614534   
103  2019-08-01 00:00:00       Endurance Auto Warranty          7610869   
104  2019-08-01 00:00:00                  Endurance TV          7615631   
105  2019-08-01 00:00:00  Endurance Auto Warranty - RL          7610973   
106  2019-08-01 00:00:00                  Endurance TV          7615637   
107  2019-08-01 00:00:00       Endurance Auto Warranty          7611091   
108  2019-08-01 00:00:00       Endurance Auto Warranty          7610858   
109  2019-08-01 00:00:00       Endurance Auto Warranty          7611500   
110  2019-08-01 00:00:00  Endurance Auto Warranty - RL          7611886   
111  2019-08-01 00:00:00       Endurance Auto Warranty          7611529   
112  2019-08-01 00:00:00 

In [22]:
ca_report_small['accepted_referral'] = (
    ca_report_small[ca_report_reason_why_not_billed_field_name].apply(is_blank_or_nan))

#print(ca_report_small)

ca_report_small_accepted = (ca_report_small.loc[(ca_report_small['accepted_referral']) & 
                                            (ca_report_small[ca_report_phone_duration_field_name]>=60)]).copy()
print(ca_report_small_accepted)


nan
nan
nan
nan
Short Call
nan
Short Call
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
      submitted_cst_date            campaign_name  raw_referral_id  \
100  2019-08-01 00:00:00  Endurance Auto Warranty          7614028   
102  2019-08-01 00:00:00  Endurance Auto Warranty          7614534   

           object uuid originating_number  phone_number form_phone  \
100         My CA  NaN       +16145600872  1.855707e+10        NaN   
102  Buyers Guide  NaN       +18086887650  1.877370e+10        NaN   

    reason_why_not_billed_duplicate_column_name_1 referral_path  \
100                                           NaN          call   
102                                           NaN          call   

    conaffid_medium form_email  phone_duration form_city form_state  \
100             PPC        NaN           506.0       NaN        NaN   
102         Organic        NaN          1746.0       NaN        NaN   

    crm_status reason_why_not_billed  cpl_revenue  accepted_referral