In [1]:
import pandas as pd
import numpy as np
pd.options.display.max_rows = 6

import argparse
import httplib2
import pprint
import time
import datetime
from StringIO import StringIO

from apiclient.discovery import build
from oauth2client import GOOGLE_TOKEN_URI
from oauth2client.client import OAuth2Credentials
from googleapiclient.errors import HttpError


def create_credentials():
    """Create Google OAuth2 credentials.

    Args:
        client_id: Client id of a Google Cloud console project.
        client_secret: Client secret of a Google Cloud console project.
        refresh_token: A refresh token authorizing the Google Cloud console project
          to access the DS data of some Google user.
    Returns:
        OAuth2Credentials
    """
    return OAuth2Credentials(access_token=None,
                           client_id='549790627766-qnth4m8qvuimg87pnsp4b82lhte7dk5a.apps.googleusercontent.com',
                           client_secret='Vta4lQLOL49vVYvktkcPGRNb',
                           refresh_token='1/ab7pCGMu3K5AveG0UOUpQ0J08vCp6uM357O8qmoPDMs',
                           token_expiry=None,
                           token_uri="https://accounts.google.com/o/oauth2/token",
                           user_agent=None)

def get_service(credentials):
    """Set up a new DoubleClick Search service.

    Args:
        credentials: An OAuth2Credentials generated with create_credentials, or
        flows in the oatuh2client.client package.
    Returns:
        An authorized Doubleclicksearch serivce.
    """
    # Use the authorize() function of OAuth2Credentials to apply necessary credential
    # headers to all requests.
    http = credentials.authorize(http = httplib2.Http())

    # Construct the service object for the interacting with the DoubleClick Search API.
    service = build('doubleclicksearch', 'v2', http=http)
    return service

def poll_report(service, report_id):
    """Poll the API with the reportId until the report is ready, up to ten times.

    Args:
        service: An authorized Doublelcicksearch service.
        report_id: The ID DS has assigned to a report.
    Returns:
        pd.DataFrame, report file
    """
    for _ in xrange(10):
        try:
            request = service.reports().get(reportId=report_id)
            json_data = request.execute()
            if json_data['isReportReady']:
                pprint.pprint('The report is ready.')

                # For large reports, DS automatically fragments the report into multiple
                # files. The 'files' property in the JSON object that DS returns contains
                # the list of URLs for file fragment. To download a report, DS needs to
                # know the report ID and the index of a file fragment.
                report = pd.DataFrame()
                for i in range(len(json_data['files'])):
                    pprint.pprint('Downloading fragment ' + str(i) + ' for report ' + report_id)
                    report = report.append(download_files(service, report_id, str(i)), ignore_index = True) # See Download the report.
                return report

            else:
                pprint.pprint('Report is not ready. I will try again.')
                time.sleep(10)
        except HttpError as e:
            error = simplejson.loads(e.content)['error']['errors'][0]

            # See Response Codes
            pprint.pprint('HTTP code %d, reason %s' % (e.resp.status, error['reason']))
            break
        
def download_files(service, report_id, report_fragment):
    """Generate and print sample report.

    Args:
        service: An authorized Doublelcicksearch service.
        report_id: The ID DS has assigned to a report.
        report_fragment: The 0-based index of the file fragment from the files array.
    Returns:
        pd.DataFrame report file
    """
    request = service.reports().getFile(reportId=report_id, reportFragment=report_fragment)
    return pd.read_csv(StringIO(request.execute()))

def request_report(service, start_date, end_date, columns):
    """Request sample report and print the report ID that DS returns. See Set Up Your Application.

    Args:
        service: An authorized Doublelcicksearch service.
        columns: list of columns will be in the report
    Returns:
        The report id.
    """
    request = service.reports().request(
        body={
                "reportScope": {
                    "agencyId": "20100000000000932",
                    "advertiserId": "21700000001406447", # Callaway Apparel - Perry Ellis International
                    #"engineAccountId": "700000001564770" # Google - Callaway Apparel
                    #"advertiserId": "21700000001131725", # Celebrity Cruise
                    #"engineAccountId": "700000001217833" # Celebrity Cruise
                    #"engineAccountId": "700000001561242" # Celebrity Cruise - Juba Plus
                },
                "reportType": "keyword",
                "columns": [{'columnName': column} for column in columns],   
                "timeRange" : {
                    "startDate" : start_date,
                    "endDate" : end_date
                    },
                
                #"filters": [
                #    {
                #        "column" : { "columnName": "keywordLabels" },
                #        "operator" : "containsElement",
                #        "values" : ["JubaNovTest",]
                #    }
                #],
                
                "downloadFormat": "csv",
                "maxRowsPerFile": 100000000,
                "statisticsCurrency": "agency",
                "verifySingleTimeZone": "false",
                "includeRemovedEntities": "false"
            }
    )
    json_data = request.execute()
    return json_data['id']



In [14]:
creds = create_credentials()

service = get_service(creds)

end_date = "2017-06-15"
start_date = "2017-04-02"

print start_date, end_date
REPORTID_nonHVA = request_report(service, start_date, end_date, 
                                 ['campaign', 'adGroup', 'keywordText', 'keywordMatchType', 'engineStatus', 
                                  'effectiveKeywordMaxCpc', 'keywordMaxCpc', 'impr', 'clicks', 'cost', 
                                  'ctr', 'avgCpc', 'avgPos', 'dfaRevenue', 'dfaTransactions'])
REPORTID_HVA = request_report(service, start_date, end_date, 
                              ['campaign', 'adGroup', 'keywordText', 'keywordMatchType', 
                               'floodlightActivity', 'dfaActions', 'dfaRevenue', 'dfaTransactions'])

2017-04-02 2017-06-15


In [15]:
non_hva = poll_report(service, REPORTID_nonHVA)
hva = poll_report(service, REPORTID_HVA)

'Report is not ready. I will try again.'
'The report is ready.'
u'Downloading fragment 0 for report AAAn3BM_GDEsQqAq'
'The report is ready.'
u'Downloading fragment 0 for report AAAnNBwe9wY_9xrz'


In [5]:
column_hva1 = ['Callaway - About Us (Who We Are)', 'Callaway - Create Account',
               'Callaway - Subscription (Join Our Email List)', 'Callaway - Subscription Success']
column_hva2 = ['Callaway - Big & Tall', 'Callaway - Clearance,Callaway - Unsubscribe - HVA 2', 
               'Callaway - Search - HVA2', 'Callaway - Big & Tall_Belts', 'Callaway - Big & Tall_Outerwear', 
               'Callaway - Big & Tall_Pants-Shorts', 'Callaway - Big & Tall_Polos', 
               'Callaway - Clearance_Big & Tall Clearance', 'Callaway - Clearance_Mens Clearance', 
               'Callaway - Clearance_Womens Clearance', 'Callaway - Features_Best-Sellers', 
               'Callaway - Features_Callaway X', 'Callaway - Features_New Arrivals', 
               'Callaway - Features_Opti-Series', 'Callaway - Men Golf Shoes', 'Callaway - Men_Belts', 
               'Callaway - Men_Outerwear', 'Callaway - Men_Pants', 'Callaway - Men_Polos', 
               'Callaway - Men_Shorts', 'Callaway - Men_Standard Collection', 'Callaway - Products - Belts', 
               'Callaway - Products - Jacket', 'Callaway - Products - Jackets', 
               'Callaway - Products - Men Golf Shoes', 'Callaway - Products - Pants', 
               'Callaway - Products - Polo', 'Callaway - Products - Polos', 'Callaway - Products - Short', 
               'Callaway - Products - Shorts', 'Callaway - Products - Skorts', 'Callaway - Products - Sweaters', 
               'Callaway - Products - Vests', 'Callaway - Products - Women Golf Shoes', 
               'Callaway - Women Golf Shoes', 'Callaway - Women_Belts', 'Callaway - Women_Outerwear', 
               'Callaway - Women_Pants', 'Callaway - Women_Polos', 'Callaway - Women_Skorts & Shorts', 
               'Callaway - Women_Standard Collection']
column_hva3 = ['Callaway - Add to Cart - HVA 3', 'Callaway - Order Status (Orders)', 'Callaway - Billing_Payment',
               'Callaway - Check Out', 'Callaway - Order Review', 'Callaway - Paypal', 'Callaway - Secure Checkout',
               'Callaway - Shipping']

In [33]:
def merge_hva_and_non_hva(hva, non_hva):
    '''merge two reports downloaded by download_reports().
    Args:
        hva: pd.DataFrame
        non_hva: pd.DataFrame
        columns_hva: list of string
    Returns:
        pd.DataFrame
    '''   
    columns_hva= ['Callaway - Big & Tall', 'Callaway - Clearance,Callaway - Unsubscribe - HVA 2', 
                'Callaway - Search - HVA2', 'Callaway - Big & Tall_Belts', 'Callaway - Big & Tall_Outerwear', 
                'Callaway - Big & Tall_Pants-Shorts', 'Callaway - Big & Tall_Polos', 
                'Callaway - Clearance_Big & Tall Clearance', 'Callaway - Clearance_Mens Clearance', 
                'Callaway - Clearance_Womens Clearance', 'Callaway - Features_Best-Sellers', 
                'Callaway - Features_Callaway X', 'Callaway - Features_New Arrivals', 
                'Callaway - Features_Opti-Series', 'Callaway - Men Golf Shoes', 'Callaway - Men_Belts', 
                'Callaway - Men_Outerwear', 'Callaway - Men_Pants', 'Callaway - Men_Polos', 
                'Callaway - Men_Shorts', 'Callaway - Men_Standard Collection', 'Callaway - Products - Belts', 
                'Callaway - Products - Jacket', 'Callaway - Products - Jackets', 
                'Callaway - Products - Men Golf Shoes', 'Callaway - Products - Pants', 
                'Callaway - Products - Polo', 'Callaway - Products - Polos', 'Callaway - Products - Short', 
                'Callaway - Products - Shorts', 'Callaway - Products - Skorts', 'Callaway - Products - Sweaters', 
                'Callaway - Products - Vests', 'Callaway - Products - Women Golf Shoes', 
                'Callaway - Women Golf Shoes', 'Callaway - Women_Belts', 'Callaway - Women_Outerwear', 
                'Callaway - Women_Pants', 'Callaway - Women_Polos', 'Callaway - Women_Skorts & Shorts', 
                'Callaway - Women_Standard Collection',
                'Callaway - Add to Cart - HVA 3', 'Callaway - Order Status (Orders)', 'Callaway - Billing_Payment',
                'Callaway - Check Out', 'Callaway - Order Review', 'Callaway - Paypal', 'Callaway - Secure Checkout',
                'Callaway - Shipping']
    
    result = pd.DataFrame(columns=['campaign', 'adGroup', 'keywordText', 'keywordMatchType']+columns_hva)
    
    for (campaign, ad_group, keyword, keyword_match_type), group in hva.groupby(['campaign', 'adGroup', 
                                                                                 'keywordText', 'keywordMatchType']):
        df = pd.DataFrame([{
            'campaign': campaign,
            'adGroup': ad_group,
            'keywordText' : keyword,
            'keywordMatchType': keyword_match_type
        }])

        for column in columns_hva:
            if column in group['floodlightActivity'].values:
                df[column] = group[group['floodlightActivity'] == column]['dfaActions'].values[0]
            else:
                df[column] = 0
                
        result = result.append(df, ignore_index = True)

    # combine hva and non_hva
    merged = non_hva.merge(result, 
                           on = ['campaign', 'adGroup', 'keywordText', 'keywordMatchType'], 
                           how = 'left')

    # generate baseline and resid compare
    merged = merged.fillna(value = 0)
    
    # generate new fields
    merged['HVA'] = merged[columns_hva].sum(axis=1).apply(int)   
    merged['ROI'] = merged['dfaRevenue'] / merged['cost']
    return merged[['campaign', 'adGroup', 'keywordText', 'keywordMatchType', 
                   'keywordMaxCpc', 'impr', 'clicks', 'HVA', 'avgCpc', 
                   'avgPos', 'cost', 'dfaRevenue', 'ROI']]

In [34]:
CAMPAIGN = 'G_Non-Brand_Callaway Apparel_Big & Tall Clothing_EST-2696'
hva_1 = hva[hva['campaign'] == CAMPAIGN]
non_hva_1 = non_hva[non_hva['campaign'] == CAMPAIGN]

df = merge_hva_and_non_hva(hva_1, non_hva_1)

df

Unnamed: 0,campaign,adGroup,keywordText,keywordMatchType,keywordMaxCpc,impr,clicks,HVA,avgCpc,avgPos,cost,dfaRevenue,ROI
0,G_Non-Brand_Callaway Apparel_Big & Tall Clothi...,Apparel - Exact,big and tall golf clothes,Exact,0.93,238,33,98,1.572727,1.773109,51.90,93.31,1.797881
1,G_Non-Brand_Callaway Apparel_Big & Tall Clothi...,Apparel - Exact,big and tall golf apparel,Exact,0.99,992,75,244,1.927467,1.536290,144.56,737.00,5.098229
2,G_Non-Brand_Callaway Apparel_Big & Tall Clothi...,Pants - Exact,tall golf pants,Exact,1.37,389,22,68,1.826818,1.727506,40.19,0.00,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
385,G_Non-Brand_Callaway Apparel_Big & Tall Clothi...,Vests - BMM,+3xl +golf +vest,Broad,1.25,105,3,2,0.750000,1.628571,2.25,0.00,0.000000
386,G_Non-Brand_Callaway Apparel_Big & Tall Clothi...,Vests - BMM,+large +golf +vest,Broad,0.63,1018,1,0,0.850000,2.133595,0.85,0.00,0.000000
387,G_Non-Brand_Callaway Apparel_Big & Tall Clothi...,Vests - BMM,+xxxl +golf +vest,Broad,0.66,70,3,5,0.410000,1.328571,1.23,0.00,0.000000


In [26]:
df.to_csv('/root/jzou/reporting/merged.csv', index=False)

In [23]:
set(non_hva['campaign'])

{'(ROI) Big and Tall',
 '(ROI) Big and Tall RLSA Only',
 '(ROI) Brand General',
 '(ROI) Branded',
 '(ROI) Competitors',
 '(ROI) DSA',
 '(ROI) DSK',
 '(ROI) FNK',
 '(ROI) GSP',
 '(ROI) Golf Jackets',
 '(ROI) Golf Pants',
 '(ROI) Golf Polos',
 '(ROI) Golf Shorts',
 '(ROI) Golfers',
 '(ROI) RLSA',
 '(ROI) Remarketing',
 '(ROI) Remarketing - Dynamic',
 '(ROI) Shirts',
 '(ROI) Shirts RLSA Only',
 '(ROI) Shopping - Exact',
 '(ROI) Shopping - Phrase',
 '(ROI) Shopping Branded KWs - Broad',
 '(ROI) Shopping Branded KWs - Exact',
 '(ROI) Shopping Branded KWs - Phrase',
 "(ROI) Women's Golf RLSA",
 "(ROI) Women's Golf Shirts",
 "(ROI) Women's Golf Shorts",
 'B_Brand_Callaway Apparel_ Shoes _ EST-2696',
 'B_Brand_Callaway Apparel_Big & Tall Clothing_EST-2696',
 'B_Brand_Callaway Apparel_Callaway Test_ EST-2696',
 'B_Brand_Callaway Apparel_Clearance_EST-2696',
 'B_Brand_Callaway Apparel_Collections_EST-2696',
 'B_Brand_Callaway Apparel_Core_EST-2696',
 'B_Brand_Callaway Apparel_General Clothing_ES

In [27]:
non_hva[non_hva['keywordText'] == 'Display Network Stats']

Unnamed: 0,campaign,adGroup,keywordText,keywordMatchType,engineStatus,effectiveKeywordMaxCpc,keywordMaxCpc,impr,clicks,cost,ctr,avgCpc,avgPos,dfaRevenue,dfaTransactions
57350,(ROI) Shirts,CA || B | Callaway > Shirts > YY > ZZ,Display Network Stats,,,1.20,,0,0,0.0,,,,0.0,0
60619,(ROI) Shirts,CA || B | Callaway > Shirts > Poplin > ZZ,Display Network Stats,,,1.20,,0,0,0.0,,,,0.0,0
60787,(ROI) Shirts,Golf Shirts Closeouts,Display Network Stats,,,0.25,,0,0,0.0,,,,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140997,G_Brand_Callaway Apparel_Callaway Test_ EST-2696,Big & Tall Pants - BMM,Display Network Stats,,,0.01,,0,0,0.0,,,,0.0,0
141003,G_Brand_Callaway Apparel_Callaway Test_ EST-2696,Men General - BMM,Display Network Stats,,,0.01,,0,0,0.0,,,,0.0,0
141006,G_Brand_Callaway Apparel_Callaway Test_ EST-2696,Women Shorts & Skorts- Exact,Display Network Stats,,,0.01,,0,0,0.0,,,,0.0,0


In [18]:
hva

Unnamed: 0,campaign,adGroup,keywordText,keywordMatchType,floodlightActivity,dfaActions,dfaRevenue,dfaTransactions
0,CA_US || B | Leggings,CA || B | Callaway > Leggings > Layered > ZZ,+leggins +callaway +layered,Broad,,0,0.0,0
1,CA_US || B | Leggings,CA || B | Callaway > Leggings > Layered > ZZ,+layered +leggins +callaway,Broad,,0,0.0,0
2,CA_US || B | Leggings,CA || B | Callaway > Leggings > Layered > ZZ,layered leggins callaway,Phrase,,0,0.0,0
...,...,...,...,...,...,...,...,...
152949,G_Brand_Callaway Apparel_Callaway Test_ EST-2696,Hats General - BMM,Display Network Stats,,,0,0.0,0
152950,G_Brand_Callaway Apparel_Callaway Test_ EST-2696,Shoes General - Exact,Display Network Stats,,,0,0.0,0
152951,G_Brand_Callaway Apparel_Callaway Test_ EST-2696,Women General - BMM,+callaway +women,Broad,,0,0.0,0


In [11]:
columns_hva = ['CC Website Search','Cruise Packages','Destination Alaska','Destination Bahamas','Destination Bermuda','Destination Europe','Search Results','Specials', u'Staterooms',u'Reservation Step 1',u'Reservation Step 2',u'Reservation Step 3','Courtesy Hold Confirmation Page','Courtesy Hold Creation Page','Courtesy Hold Creation Page 2']
result = pd.DataFrame(columns = ['keywordText', 'deviceSegment', 'date', 'Paid Bookings', 'Held Booking', 'Held Booking Revenue', 'Paid Booking Revenue']+columns_hva)
for (keyword, device, date), group in hva.groupby(['keywordText', 'deviceSegment', 'date']):
    df = pd.DataFrame([{'keywordText' : keyword,
                       'deviceSegment': device,
                       'date': date,
                       }])
    if 'Paid Booking Confirmation 2015' in group['floodlightActivity'].values:
        df['Paid Bookings'] = group[group['floodlightActivity'] == 'Paid Booking Confirmation 2015']['dfaTransactions'].values[0]
        df['Paid Booking Revenue'] = group[group['floodlightActivity'] == 'Paid Booking Confirmation 2015']['dfaRevenue'].values[0]
    else: 
        df['Paid Bookings'] = 0
        df['Paid Booking Revenue'] = 0.0
    
    if 'Held Booking Confirmation 2015' in group['floodlightActivity'].values:
        df['Held Booking'] = group[group['floodlightActivity'] == 'Held Booking Confirmation 2015']['dfaTransactions'].values[0]
        df['Held Booking Revenue'] = group[group['floodlightActivity'] == 'Held Booking Confirmation 2015']['dfaRevenue'].values[0]
    else: 
        df['Held Booking'] = 0
        df['Held Booking Revenue'] = 0.0
        
    for column in columns_hva:
        if column in group['floodlightActivity'].values:
            df[column] = group[group['floodlightActivity'] == column]['dfaActions'].values[0]
        else:
            df[column] = 0
    result = result.append(df, ignore_index = True)

KeyError: 'keywordText'

In [12]:
result

Unnamed: 0,keywordText,deviceSegment,date,Paid Bookings,Held Booking,Held Booking Revenue,Paid Booking Revenue,CC Website Search,Cruise Packages,Destination Alaska,...,Destination Europe,Search Results,Specials,Staterooms,Reservation Step 1,Reservation Step 2,Reservation Step 3,Courtesy Hold Confirmation Page,Courtesy Hold Creation Page,Courtesy Hold Creation Page 2


In [13]:
ds = non_hva.merge(result, on = ['keywordText', 'deviceSegment', 'date'], how = 'left')
ds

KeyError: 'keywordText'

In [84]:
dcm = ds.fillna(value = 0)
dcm = dcm.rename(index=str, columns={ "clicks": "Clicks", "cost": "Cost", "impr": "Impr", "avgPos": "Avg pos"})
# generate new fields
dcm['Conversions'] = dcm['Paid Bookings'] + dcm['Held Booking']
dcm['Conversion_revenue'] = dcm['Held Booking Revenue'] + dcm['Paid Booking Revenue']
dcm['HVA1'] = dcm[['CC Website Search','Cruise Packages','Destination Alaska','Destination Bahamas','Destination Bermuda','Destination Europe','Search Results','Specials']].sum(axis=1) 
dcm['HVA2'] = dcm[[u'Staterooms',u'Reservation Step 1',u'Reservation Step 2',u'Reservation Step 3']].sum(axis=1)       
dcm['HVA3'] = dcm[['Courtesy Hold Confirmation Page', 'Courtesy Hold Creation Page', 'Courtesy Hold Creation Page 2']].sum(axis=1)   
dcm['Weekday'] = dcm['date'].apply(lambda x : datetime.datetime.strptime(x, '%Y-%m-%d').weekday())
# sort by date
dcm = dcm.sort_values(by = 'date')

# output columns
columns = ['Keyword', 'Device', 'Weekday', 'Days', u'Clicks', u'Cost', u'Impr', 'HVA1', 'HVA2', 'HVA3', 'Conversions', 'Conversion_revenue', 'CTR', 'Avg CPC', 'Avg pos', 'Cost/HVA1', 'Cost/HVA2', 'Cost/HVA3', 'ROI', 'old_daily_budget', 'Scenario','hva1/days*min(2.5, actual)', 'hva2/days*min(50, actual)', 'hva3/days*min(150, actual)']
columns2 = ['Keyword', 'Device', 'Weekday', 'Days', u'Impr', u'Clicks', 'CTR', u'Cost', 'Avg CPC', 'Avg pos', 'HVA1', 'HVA2', 'HVA3', 'Cost/HVA1', 'Cost/HVA2', 'Cost/HVA3', 'Conversions', 'Conversion_revenue', 'ROI', 'Impr_90days_avg', 'Clicks_90days_avg', 'CTR_90days_avg', 'Cost_90days_avg', 'Avg CPC_90days_avg', 'Avg pos_90days_avg', 'HVA1_90days_avg', 'HVA2_90days_avg', 'HVA3_90days_avg', 'Cost/HVA1_90days_avg', 'Cost/HVA2_90days_avg', 'Cost/HVA3_90days_avg', 'Conversions_90days_avg', 'Conversion_revenue_90days_avg', 'ROI_90days_avg']
columns_sum = ['Clicks', 'Cost', 'Impr', 'HVA1', 'HVA2', 'HVA3', 'Conversions', 'Conversion_revenue']
columns_avg = ['CTR', 'Avg CPC', 'Avg pos', 'Cost/HVA1', 'Cost/HVA2', 'Cost/HVA3', 'ROI']

# excel of Keyword_device_by_weekday 7*3 = 21 rows, 6 tabs
writer = pd.ExcelWriter('/root/jzou/reporting/reports/DS_API 90 days baseline of Keyword by device and weekday.xlsx')
writer_resid = pd.ExcelWriter('/root/jzou/reporting/reports/DS_API 7 days daily_resid_compare.xlsx')

# 6 KW's baseline of Keyword_device 6*3 = 18 rows
baseline = pd.DataFrame()

for KEYWORD, dcm_1 in dcm.groupby('keywordText'):    
    # create baseline groupby device
    for device, group_device in dcm_1.groupby('deviceSegment'):
        if device not in {'Desktop', 'Mobile', 'Tablet'}: continue
        df = baseline_generator(KEYWORD, device, 'Total', group_device, columns_sum)
        baseline = baseline.append(df, ignore_index = True)
    
    # 1 KW's report for last 90 days 3*7 = 21 rows
    dcm90days_weekday = pd.DataFrame()
    
    # create baseline groupby device and weekday for one keyword 3*7 = 21 rows
    baseline_device_weekday = pd.DataFrame()
    for device, group_device in dcm_1.groupby('deviceSegment'):
        if device not in {'Desktop', 'Mobile', 'Tablet'}: continue
        for weekday, group_weekday in group_device.groupby('Weekday'):
            df = baseline_generator(KEYWORD, device, weekday, group_weekday, columns_sum)
            baseline_device_weekday = baseline_device_weekday.append(df, ignore_index = True)
            
            dcm90days_weekday = dcm90days_weekday.append(df, ignore_index = True)
    
    # 90days average columns
    for column in columns_sum:
        dcm90days_weekday['%s_90days_avg'%column] = dcm90days_weekday[column] / dcm90days_weekday['Days']
    for column in columns_avg:
        dcm90days_weekday['%s_90days_avg'%column] = dcm90days_weekday[column]
    dcm90days_weekday = dcm90days_weekday[columns2]
            
    # report for last 7 days
    dcm7days = dcm_1[dcm_1['date'] >= start_date_resid]
    dcm7days_weekday = pd.DataFrame(columns = ['Keyword', 'Device', 'Weekday', 'Date', 'Days', u'Impr', u'Clicks', 'CTR', u'Cost', 'Avg CPC', 'Avg pos', 'HVA1', 'HVA2', 'HVA3', 'Cost/HVA1', 'Cost/HVA2', 'Cost/HVA3', 'Conversions', 'Conversion_revenue', 'ROI'])
    for device, group_device in dcm7days.groupby('deviceSegment'):
        if device not in {'Desktop', 'Mobile', 'Tablet'}: continue
        for weekday, group_weekday in group_device.groupby('Weekday'):
            df = baseline_generator(KEYWORD, device, weekday, group_weekday, columns_sum)
            dcm7days_weekday = dcm7days_weekday.append(df, ignore_index = True)
    dcm7days_weekday = dcm7days_weekday[['Keyword', 'Device', 'Weekday', 'Date', 'Days', u'Impr', u'Clicks', 'CTR', u'Cost', 'Avg CPC', 'Avg pos', 'HVA1', 'HVA2', 'HVA3', 'Cost/HVA1', 'Cost/HVA2', 'Cost/HVA3', 'Conversions', 'Conversion_revenue', 'ROI']]
    
    # merge 90days and 7 days file
    daily_file = dcm90days_weekday.merge(dcm7days_weekday, on = ['Keyword', 'Device', 'Weekday'], how = 'left', suffixes = ['_90days_cum', '_last7days']).fillna(value = 0)

    # compare resid columns
    columns_resid = ['Clicks', 'CTR', 'Cost', 'Avg CPC', 'Avg pos', 'HVA1', 'HVA2', 'HVA3', 'Cost/HVA1', 'Cost/HVA2', 'Cost/HVA3']
    for column in columns_resid:
        last7days = daily_file['%s_last7days'%column]
        try:
            last90days = daily_file['%s_90days_avg'%column]
        except KeyError:
            last90days = daily_file['%s_90days_cum'%column]
        daily_file['R.%s'%column] = last7days - last90days

    for column in columns_resid:
        last7days = daily_file['R.%s'%column]
        try:
            last90days = daily_file['%s_90days_avg'%column]
        except KeyError:
            last90days = daily_file['%s_90days_cum'%column]
        daily_file['R%%%s'%column] = last7days / last90days
    
    daily_file.to_excel(writer_resid, index = False, sheet_name = '%s'% KEYWORD if len(KEYWORD) < 30 else KEYWORD[:30], float_format='%.2f', na_rep = 'null')
    baseline_device_weekday = baseline_device_weekday[columns]
    baseline_device_weekday.to_excel(writer, index = False, sheet_name = '%s'% KEYWORD if len(KEYWORD) < 30 else KEYWORD[:30], float_format='%.2f')
      
classified_keywords = baseline
classified_keywords['new_daily_budget'] = None
# daily budget optimization
for index, row in classified_keywords.iterrows():
    if row['ROI'] > 1:
        classified_keywords.set_value(index, 'new_daily_budget', row['old_daily_budget'] * 1.1)
    elif row['ROI'] > 0:
        classified_keywords.set_value(index, 'new_daily_budget', row['old_daily_budget'] * 0.9)
    else:
        classified_keywords.set_value(index, 'new_daily_budget', 
                                      max(row['hva1/days*min(2.5, actual)'], 
                                          row['hva2/days*min(50, actual)'],
                                          row['hva3/days*min(150, actual)']) * 0.9)
        
classified_keywords['daily_budget_new-old'] = classified_keywords['new_daily_budget'] - classified_keywords['old_daily_budget']
columns = ['Keyword', 'Device', 'Days', u'Clicks', u'Cost', u'Impr', 'HVA1', 'HVA2', 'HVA3', 'Conversions', 'Conversion_revenue', 'CTR', 'Avg CPC', 'Cost/HVA1', 'Cost/HVA2', 'Cost/HVA3', 'ROI', 'old_daily_budget', 'new_daily_budget', 'daily_budget_new-old', 'Scenario', 'hva1/days*min(2.5, actual)', 'hva2/days*min(50, actual)', 'hva3/days*min(150, actual)']
classified_keywords = classified_keywords[columns]

classified_keywords.to_excel(writer, index = False, sheet_name = 'summary baseline by device only', float_format='%.2f')

writer.save() 
writer_resid.save()

# aggregate hourly reports

In [17]:
# reformat hva file
def reformat_hva(hva):
    columns_hva = ['CC Website Search','Cruise Packages','Destination Alaska','Destination Bahamas','Destination Bermuda','Destination Europe','Search Results','Specials', u'Staterooms',u'Reservation Step 1',u'Reservation Step 2',u'Reservation Step 3','Courtesy Hold Confirmation Page','Courtesy Hold Creation Page','Courtesy Hold Creation Page 2']
    hva_formatted = pd.DataFrame(columns = ['deviceSegment', 'Paid Bookings', 'Held Booking', 'Held Booking Revenue', 'Paid Booking Revenue']+columns_hva)
    for device, group in hva.groupby('deviceSegment'):
        df = pd.DataFrame([{
                         'deviceSegment': device,
                         }])
        if 'Paid Booking Confirmation 2015' in group['floodlightActivity'].values:
            df['Paid Bookings'] = group[group['floodlightActivity'] == 'Paid Booking Confirmation 2015']['dfaTransactions'].values[0]
            df['Paid Booking Revenue'] = group[group['floodlightActivity'] == 'Paid Booking Confirmation 2015']['dfaRevenue'].values[0]
        else: 
            df['Paid Bookings'] = 0
            df['Paid Booking Revenue'] = 0.0

        if 'Held Booking Confirmation 2015' in group['floodlightActivity'].values:
            df['Held Booking'] = group[group['floodlightActivity'] == 'Held Booking Confirmation 2015']['dfaTransactions'].values[0]
            df['Held Booking Revenue'] = group[group['floodlightActivity'] == 'Held Booking Confirmation 2015']['dfaRevenue'].values[0]
        else: 
            df['Held Booking'] = 0
            df['Held Booking Revenue'] = 0.0

        for column in columns_hva:
            if column in group['floodlightActivity'].values:
                df[column] = group[group['floodlightActivity'] == column]['dfaActions'].values[0]
            else:
                df[column] = 0
        hva_formatted = hva_formatted.append(df, ignore_index = True)
    return hva_formatted

# combine hva_formatted and non_hva
def merge_non_hva_and_hva(hva_formatted, non_hva):
    ds = non_hva.merge(hva_formatted, on = ['deviceSegment',], how = 'left')

    # generate baseline and resid compare
    dcm = ds.fillna(value = 0)
    dcm = dcm.rename(index=str, columns={ "clicks": "Clicks", "cost": "Cost", "impr": "Impr", "avgPos": "Avg pos"})
    # generate new fields
    dcm['Conversions'] = dcm['Paid Bookings'] + dcm['Held Booking']
    dcm['Conversion_revenue'] = dcm['Held Booking Revenue'] + dcm['Paid Booking Revenue']
    dcm['HVA1'] = dcm[['CC Website Search','Cruise Packages','Destination Alaska','Destination Bahamas','Destination Bermuda','Destination Europe','Search Results','Specials']].sum(axis=1) 
    dcm['HVA2'] = dcm[[u'Staterooms',u'Reservation Step 1',u'Reservation Step 2',u'Reservation Step 3']].sum(axis=1)       
    dcm['HVA3'] = dcm[['Courtesy Hold Confirmation Page','Courtesy Hold Creation Page','Courtesy Hold Creation Page 2']].sum(axis=1)   
    return dcm[['campaign', 'deviceSegment', 'date', 'hour', 'minute','Impr', 'Clicks', 'Cost', 'Avg pos', 'HVA1', 'HVA2', 'HVA3', 'Conversions', 'Conversion_revenue']]

In [2]:
import glob
import pandas as pd
import datetime
pd.options.display.max_rows = 10
lst = []
path = '/root/jzou/reporting/DS_minute/'
non_hva_files= glob.glob(path + "/non_hva*")
non_hva_files.sort()

hva_files = glob.glob(path+'/hva*')
hva_files.sort()

In [3]:
files = zip(non_hva_files, hva_files)

In [14]:
hva_formatted = reformat_hva(pd.read_csv(files[10][1]))
hva_formatted

Unnamed: 0,CC Website Search,Courtesy Hold Confirmation Page,Courtesy Hold Creation Page,Courtesy Hold Creation Page 2,Cruise Packages,Destination Alaska,Destination Bahamas,Destination Bermuda,Destination Europe,Held Booking,Held Booking Revenue,Paid Booking Revenue,Paid Bookings,Reservation Step 1,Reservation Step 2,Reservation Step 3,Search Results,Specials,Staterooms,deviceSegment
0,3.0,0.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,Desktop
1,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,Mobile


In [15]:
non_hva = pd.read_csv(files[10][0])
non_hva['hour']= files[10][0][-9:-7]
non_hva['minute']= files[10][0][-6:-4]
non_hva

Unnamed: 0,campaign,deviceSegment,date,impr,clicks,cost,avgPos,hour,minute
0,+caribbean +cruise,Desktop,2017-03-27,16,0,0.0,4.375,17,46
1,+caribbean +cruise,Tablet,2017-03-27,2,0,0.0,2.0,17,46
2,+caribbean +cruise,Mobile,2017-03-27,17,0,0.0,3.058824,17,46


In [18]:
df1 = merge_non_hva_and_hva(hva_formatted, non_hva)
df1

Unnamed: 0,campaign,deviceSegment,date,hour,minute,Impr,Clicks,Cost,Avg pos,HVA1,HVA2,HVA3,Conversions,Conversion_revenue
0,+caribbean +cruise,Desktop,2017-03-27,17,46,16,0,0.0,4.375,9.0,0.0,0.0,0.0,0.0
1,+caribbean +cruise,Tablet,2017-03-27,17,46,2,0,0.0,2.0,0.0,0.0,0.0,0.0,0.0
2,+caribbean +cruise,Mobile,2017-03-27,17,46,17,0,0.0,3.058824,5.0,0.0,0.0,0.0,0.0


In [21]:
df1 = df1[3:] # remove first 3 rows
for non_hva_file, hva_file in files:
    non_hva = pd.read_csv(non_hva_file)
    hva = pd.read_csv(hva_file)
    non_hva['hour'] = non_hva_file[-9:-7]
    non_hva['minute'] = non_hva_file[-6:-4]
    hva_formatted = reformat_hva(hva)
    df1 = df1.append(merge_non_hva_and_hva(hva_formatted, non_hva))

In [24]:
df1.reset_index(inplace=True, drop=True)
df1

Unnamed: 0,level_0,index,campaign,deviceSegment,date,hour,minute,Impr,Clicks,Cost,Avg pos,HVA1,HVA2,HVA3,Conversions,Conversion_revenue
0,0,0,+caribbean +cruise,0,2017-03-27,17,36,0,0,0.00,0.000000,0.0,0.0,0.0,0.0,0.0
1,1,0,+caribbean +cruise,0,2017-03-27,17,37,0,0,0.00,0.000000,0.0,0.0,0.0,0.0,0.0
2,2,0,+caribbean +cruise,0,2017-03-27,17,38,0,0,0.00,0.000000,0.0,0.0,0.0,0.0,0.0
3,3,0,+caribbean +cruise,Desktop,2017-03-27,17,39,16,0,0.00,4.375000,0.0,0.0,0.0,0.0,0.0
4,4,1,+caribbean +cruise,Tablet,2017-03-27,17,39,2,0,0.00,2.000000,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
505,505,1,+caribbean +cruise,Tablet,2017-03-27,20,26,30,0,0.00,4.200000,0.0,0.0,0.0,0.0,0.0
506,506,2,+caribbean +cruise,Mobile,2017-03-27,20,26,140,7,19.63,2.914286,18.0,0.0,0.0,0.0,0.0
507,507,0,+caribbean +cruise,Desktop,2017-03-27,20,27,76,2,5.75,3.881579,16.0,0.0,0.0,0.0,0.0
508,508,1,+caribbean +cruise,Tablet,2017-03-27,20,27,30,0,0.00,4.200000,0.0,0.0,0.0,0.0,0.0


In [28]:
del df1['index']

In [25]:
df1 = df1.rename(index = str, columns={'Impr': 'Impr_api', 'Clicks': 'Clicks_api', 
                                       'Cost': 'Cost_api', 'Avg pos': 'avgPos_api',
                                       'HVA1': 'HVA1_api', 'HVA2': 'HVA2_api',
                                       'HVA3': 'HVA3_api', 'Conversions': 'Conversions_api', 
                                       'Conversion_revenue': 'Conversion_revenue_api'})

In [33]:
df1['HVA1_api'] = df1['HVA1_api'].apply(int)
df1['HVA2_api'] = df1['HVA2_api'].apply(int)
df1['HVA3_api'] = df1['HVA3_api'].apply(int)
df1

Unnamed: 0,campaign,deviceSegment,date,hour,minute,Impr_api,Clicks_api,Cost_api,avgPos_api,HVA1_api,HVA2_api,HVA3_api,Conversions_api,Conversion_revenue_api
0,+caribbean +cruise,0,2017-03-27,17,36,0,0,0.00,0.000000,0,0,0,0.0,0.0
1,+caribbean +cruise,0,2017-03-27,17,37,0,0,0.00,0.000000,0,0,0,0.0,0.0
2,+caribbean +cruise,0,2017-03-27,17,38,0,0,0.00,0.000000,0,0,0,0.0,0.0
3,+caribbean +cruise,Desktop,2017-03-27,17,39,16,0,0.00,4.375000,0,0,0,0.0,0.0
4,+caribbean +cruise,Tablet,2017-03-27,17,39,2,0,0.00,2.000000,0,0,0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
505,+caribbean +cruise,Tablet,2017-03-27,20,26,30,0,0.00,4.200000,0,0,0,0.0,0.0
506,+caribbean +cruise,Mobile,2017-03-27,20,26,140,7,19.63,2.914286,18,0,0,0.0,0.0
507,+caribbean +cruise,Desktop,2017-03-27,20,27,76,2,5.75,3.881579,16,0,0,0.0,0.0
508,+caribbean +cruise,Tablet,2017-03-27,20,27,30,0,0.00,4.200000,0,0,0,0.0,0.0


In [34]:
df1.to_csv('/root/jzou/reporting/DS_minute.csv', index=False)

In [10]:
df1['hour'] = df1['hour'].apply(int)
df1['date'] = df1['date'].apply(lambda x:datetime.datetime.strptime(x,'%Y-%m-%d'))

In [11]:
df1.columns

Index([u'campaign', u'deviceSegment', u'date', u'hour', u'Impr_api',
       u'Clicks_api', u'Cost_api', u'avgPos_api', u'HVA1_api', u'HVA2_api',
       u'HVA3_api', u'Conversions_api', u'Conversion_revenue_api'],
      dtype='object')

In [12]:
import pandas as pd
ui_report = pd.read_csv('/root/jzou/reporting/DS_ui.csv')
ui_report#['Paid Booking Revenue'].apply(lambda x: float(x.replace(',', '')))
ui_report.columns

Index([u'Row Type', u'Action', u'Status', u'Sync errors', u'Hour of day',
       u'From', u'To', u'Device segment', u'All labels', u'Label', u'Ad group',
       u'Engine status', u'Ad group search max CPC', u'Clicks', u'Impr',
       u'Cost', u'CTR', u'Avg CPC', u'Avg pos', u'Phone calls',
       u'Paid Bookings', u'Paid Booking Revenue', u'Held Booking',
       u'Held Booking Revenue', u'CC Website Search', u'Courtesy Confirmation',
       u'Courtesy Hold', u'Courtesy Hold 2', u'Cruise Packages',
       u'Destination Alaska', u'Destination Bahamas', u'Destination Bermuda',
       u'Destination Europe', u'Reservation Step 1', u'Reservation Step 2',
       u'Reservation Step 3', u'Search Results', u'Specials', u'Staterooms',
       u'Advertiser ID', u'Advertiser', u'Account ID', u'Account',
       u'Campaign ID', u'Campaign', u'Ad group ID', u'Country',
       u'Effective country', u'Region', u'Effective region', u'Metro', u'City',
       u'Effective city', u'Excluded country', u'Effect

In [29]:
ui_report

Unnamed: 0,Hour of day,From,Device segment,Campaign,Clicks,Impr,Cost,Avg pos,HVA1,HVA2,HVA3,Conversions,Conversion_revenue
0,15,2017-03-24,Desktop,+caribbean +cruise,0,0,0.00,,6,0,0,0,0
1,15,2017-03-24,Tablet,+caribbean +cruise,0,0,0.00,,4,0,0,0,0
2,19,2017-03-24,Mobile,+caribbean +cruise,1,30,2.94,3.20,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,15,2017-03-25,Tablet,+caribbean +cruise,0,4,0.00,3.25,0,0,0,0,0
79,15,2017-03-25,Mobile,+caribbean +cruise,0,18,0.00,2.61,0,0,0,0,0
80,16,2017-03-25,Tablet,+caribbean +cruise,0,4,0.00,2.50,0,0,0,0,0


In [38]:
ui_report = pd.read_csv('/root/jzou/reporting/DS_ui.csv')

#ui_report['Paid Booking Revenue'] = ui_report['Paid Booking Revenue'].apply(lambda x: float(x.replace(',', '')))
#ui_report['Held Booking Revenue'] = ui_report['Held Booking Revenue'].apply(lambda x: float(x.replace(',', '')))

ui_report['Conversions'] = ui_report['Paid Bookings'] + ui_report['Held Booking']
ui_report['Conversion_revenue'] = ui_report['Held Booking Revenue'] + ui_report['Paid Booking Revenue']
ui_report['HVA1'] = ui_report[['CC Website Search','Cruise Packages','Destination Alaska','Destination Bahamas','Destination Bermuda','Destination Europe','Search Results','Specials']].sum(axis=1) 
ui_report['HVA2'] = ui_report[[u'Staterooms',u'Reservation Step 1',u'Reservation Step 2',u'Reservation Step 3']].sum(axis=1)       
ui_report['HVA3'] = ui_report[[u'Courtesy Confirmation', u'Courtesy Hold', u'Courtesy Hold 2']].sum(axis=1)   

ui_report = ui_report[['Hour of day', 'From', 'Device segment', 'Campaign', 'Clicks', 'Impr', 'Cost', 'Avg pos', 'HVA1', 'HVA2', 'HVA3', 'Conversions', 'Conversion_revenue']]

ui_report['From'] = ui_report['From'].apply(lambda x:datetime.datetime.strptime(x,'%m/%d/%y'))

ui_report['Hour of day'] = ui_report['Hour of day'].apply(int)
ui_report['Clicks'] = ui_report['Clicks'].apply(int)
ui_report['Impr'] = ui_report['Impr'].apply(int)
ui_report['HVA1'] = ui_report['HVA1'].apply(int)
ui_report['HVA2'] = ui_report['HVA2'].apply(int)
ui_report['HVA3'] = ui_report['HVA3'].apply(int)

In [67]:
DEVICE = 'Tablet'
api = df1[df1['deviceSegment'] == DEVICE].reset_index(drop=True)

api['hour'] = api['hour'] -1

ui = ui_report[ui_report['Device segment'] == DEVICE].sort_values(['From','Hour of day']).reset_index(drop=True)

ui[['Impr_ui', 'Clicks_ui', 'Cost_ui', 'HVA1_ui', 'HVA2_ui', 'HVA3_ui', 'Conversions_ui', 'Conversion_revenue_ui']] \
    = ui[['Impr', 'Clicks', 'Cost', 'HVA1', 'HVA2', 'HVA3', 'Conversions', 'Conversion_revenue']]

ui[:10]

Unnamed: 0,Hour of day,From,Device segment,Campaign,Clicks,Impr,Cost,Avg pos,HVA1,HVA2,...,Conversions,Conversion_revenue,Impr_ui,Clicks_ui,Cost_ui,HVA1_ui,HVA2_ui,HVA3_ui,Conversions_ui,Conversion_revenue_ui
0,11,2017-03-24,Tablet,+caribbean +cruise,1,2,2.86,1.5,0,0,...,0,0,2,1,2.86,0,0,0,0,0
1,15,2017-03-24,Tablet,+caribbean +cruise,0,0,0.0,,4,0,...,0,0,0,0,0.0,4,0,0,0,0
2,17,2017-03-24,Tablet,+caribbean +cruise,0,9,0.0,3.78,0,0,...,0,0,9,0,0.0,0,0,0,0,0
3,18,2017-03-24,Tablet,+caribbean +cruise,0,5,0.0,4.2,0,0,...,0,0,5,0,0.0,0,0,0,0,0
4,19,2017-03-24,Tablet,+caribbean +cruise,0,3,0.0,3.67,0,0,...,0,0,3,0,0.0,0,0,0,0,0
5,20,2017-03-24,Tablet,+caribbean +cruise,0,8,0.0,4.75,0,0,...,0,0,8,0,0.0,0,0,0,0,0
6,21,2017-03-24,Tablet,+caribbean +cruise,0,11,0.0,3.82,0,0,...,0,0,11,0,0.0,0,0,0,0,0
7,22,2017-03-24,Tablet,+caribbean +cruise,0,8,0.0,4.25,0,0,...,0,0,8,0,0.0,0,0,0,0,0
8,23,2017-03-24,Tablet,+caribbean +cruise,0,5,0.0,4.6,0,0,...,0,0,5,0,0.0,0,0,0,0,0
9,1,2017-03-25,Tablet,+caribbean +cruise,0,3,0.0,4.0,0,0,...,0,0,3,0,0.0,0,0,0,0,0


In [68]:
for i in range(1,9):
    for column in ['Impr', 'Clicks', 'Cost', 'HVA1', 'HVA2', 'HVA3', 'Conversions', 'Conversion_revenue']:
        ui.set_value(i, '%s_ui'%column, ui.iloc[i-1]['%s_ui'%column] + ui.iloc[i][column])

for i in range(10,26):
    for column in ['Impr', 'Clicks', 'Cost', 'HVA1', 'HVA2', 'HVA3', 'Conversions', 'Conversion_revenue']:
        ui.set_value(i, '%s_ui'%column, ui.iloc[i-1]['%s_ui'%column] + ui.iloc[i][column])

ui[:10]

Unnamed: 0,Hour of day,From,Device segment,Campaign,Clicks,Impr,Cost,Avg pos,HVA1,HVA2,...,Conversions,Conversion_revenue,Impr_ui,Clicks_ui,Cost_ui,HVA1_ui,HVA2_ui,HVA3_ui,Conversions_ui,Conversion_revenue_ui
0,11,2017-03-24,Tablet,+caribbean +cruise,1,2,2.86,1.5,0,0,...,0,0,2,1,2.86,0,0,0,0,0
1,15,2017-03-24,Tablet,+caribbean +cruise,0,0,0.0,,4,0,...,0,0,2,1,2.86,4,0,0,0,0
2,17,2017-03-24,Tablet,+caribbean +cruise,0,9,0.0,3.78,0,0,...,0,0,11,1,2.86,4,0,0,0,0
3,18,2017-03-24,Tablet,+caribbean +cruise,0,5,0.0,4.2,0,0,...,0,0,16,1,2.86,4,0,0,0,0
4,19,2017-03-24,Tablet,+caribbean +cruise,0,3,0.0,3.67,0,0,...,0,0,19,1,2.86,4,0,0,0,0
5,20,2017-03-24,Tablet,+caribbean +cruise,0,8,0.0,4.75,0,0,...,0,0,27,1,2.86,4,0,0,0,0
6,21,2017-03-24,Tablet,+caribbean +cruise,0,11,0.0,3.82,0,0,...,0,0,38,1,2.86,4,0,0,0,0
7,22,2017-03-24,Tablet,+caribbean +cruise,0,8,0.0,4.25,0,0,...,0,0,46,1,2.86,4,0,0,0,0
8,23,2017-03-24,Tablet,+caribbean +cruise,0,5,0.0,4.6,0,0,...,0,0,51,1,2.86,4,0,0,0,0
9,1,2017-03-25,Tablet,+caribbean +cruise,0,3,0.0,4.0,0,0,...,0,0,3,0,0.0,0,0,0,0,0


In [69]:
merged = ui.merge(api[['date', 'hour','Impr_api', 'Clicks_api', 'Cost_api', 'avgPos_api', u'HVA1_api', 
                       u'HVA2_api', u'HVA3_api', u'Conversions_api', u'Conversion_revenue_api']], 
                 left_on = ['From', 'Hour of day'], right_on=['date', 'hour'])

for column in ['Impr', 'Clicks', 'Cost', 'HVA1', 'HVA2', 'HVA3', 'Conversions', 'Conversion_revenue']:
    merged['%s_diff'%column] = merged['%s_api'%column] - merged['%s_ui'%column]
    
merged.to_csv('/root/jzou/reporting/merged_%s.csv' % DEVICE, index = False, float_format='%.2f')
merged

Unnamed: 0,Hour of day,From,Device segment,Campaign,Clicks,Impr,Cost,Avg pos,HVA1,HVA2,...,Conversions_api,Conversion_revenue_api,Impr_diff,Clicks_diff,Cost_diff,HVA1_diff,HVA2_diff,HVA3_diff,Conversions_diff,Conversion_revenue_diff
0,15,2017-03-24,Tablet,+caribbean +cruise,0,0,0.00,,4,0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
1,17,2017-03-24,Tablet,+caribbean +cruise,0,9,0.00,3.78,0,0,...,0.0,0.0,2.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
2,18,2017-03-24,Tablet,+caribbean +cruise,0,5,0.00,4.20,0,0,...,0.0,0.0,2.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
3,19,2017-03-24,Tablet,+caribbean +cruise,0,3,0.00,3.67,0,0,...,0.0,0.0,4.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
4,20,2017-03-24,Tablet,+caribbean +cruise,0,8,0.00,4.75,0,0,...,0.0,0.0,2.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20,14,2017-03-25,Tablet,+caribbean +cruise,0,2,0.00,5.50,0,0,...,0.0,0.0,2.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
21,15,2017-03-25,Tablet,+caribbean +cruise,0,4,0.00,3.25,0,0,...,0.0,0.0,1.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
22,16,2017-03-25,Tablet,+caribbean +cruise,0,4,0.00,2.50,0,0,...,0.0,0.0,15.0,2.0,5.50,13.0,0.0,0.0,0.0,0.0
23,17,2017-03-25,Tablet,+caribbean +cruise,3,32,7.99,4.31,13,0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0


In [50]:
df1.to_csv('/root/jzou/reporting/DS_api.csv', index=False)