In [17]:
from googleads import adwords

In [18]:
import time

In [19]:
from StringIO import StringIO
import pandas as pd
import numpy as np

In [38]:
a = ad_quality('us', '20170101', '20170731')

In [39]:
a.get_accounts()

In [41]:
a.get_all_enabled_ads(4403620381)

Unnamed: 0,Campaign ID,Ad group ID,Ad ID,Ad type,Impressions,Clicks,Cost
0,350327311,24697866391,86168230111,Text ad,0,0,0
1083156,349931671,24650113591,85930773151,Text ad,0,0,0
1083155,350327071,24694214671,86145182551,Text ad,0,0,0
1083154,349931311,24644066191,162147070733,Expanded text ad,0,0,0
1083153,350329231,24700715191,162132553135,Expanded text ad,0,0,0
1083151,349931191,24642955711,85884634351,Text ad,0,0,0
1083150,349986871,24653898871,85953732511,Text ad,0,0,0
1083149,349987351,24663401911,162173983596,Expanded text ad,0,0,0
1083148,349931551,24649185991,85924830751,Text ad,0,0,0
1083147,349930951,24637220071,85848041791,Text ad,0,0,0


In [37]:
class ad_quality(object):
    def __init__(self, country, start, end, version='v201705', max_ads=6):
        self._country = country
        self._client = adwords.AdWordsClient.LoadFromStorage()
        self._page_size = 500
        self._max_ads = max_ads
        self._start = start
        self._end = end
        self._active = 20170718
        self._version = version
        self._all_accounts = {}
        self._new_accounts_df = None
        
    
    def get_accounts(self):
        managed_customer_service = self._client.GetService('ManagedCustomerService', version=self._version)
        selector = {
            'fields': ['CustomerId', 'Name'],
            'predicates': [{'field' : 'ExcludeHiddenAccounts',
                            'operator' : 'EQUALS' ,
                            'values' : 'true'
                            }]
        }    
        account_ids = []
        page = managed_customer_service.get(selector)
        if 'entries' in page and page['entries']:
            for account in page['entries']:
                if str(account['name']).startswith(self._country + '_') and 'jobseeker' in str(account['name']) \
                    and '_aed_' not in str(account['name']) and '_app' not in str(account['name'])\
                    and '_disabled' not in str(account['name']) and '_dsa' not in str(account['name'])\
                    and '_display_' not in str(account['name']) and '_content_' not in str(account['name']):               
                    self._all_accounts[account['customerId']] = account['name']
            
    def get_all_enabled_ads(self, account_id):
        self._client.SetClientCustomerId(account_id)
        report_downloader = self._client.GetReportDownloader(version=self._version)
        qry = ('SELECT CampaignId, AdGroupId, Id, AdType, Impressions, Clicks, Cost FROM '
               'AD_PERFORMANCE_REPORT WHERE Status IN [ENABLED] AND CampaignStatus in [ENABLED] '
               'AND AdGroupStatus in [ENABLED] DURING {0}, {1}').format(self._start, self._end)  
        stream_data = report_downloader.DownloadReportAsStringWithAwql(qry, 'CSV',
                skip_report_header=True, skip_column_header=False,
                skip_report_summary=True, include_zero_impressions=True)
        report_data = StringIO(stream_data)
        report_df = pd.DataFrame.from_csv(report_data, sep=',').reset_index()
        report_df = report_df.sort_values('Impressions', ascending=True)
        return report_df
            
                
    def low_impression_share(self, account_id, campaign_id):
        data = self.get_ad_data(account_id, campaign_id)
        count_ads = data.groupby(['Campaign ID', 'Ad group ID'])['Ad ID'].count().reset_index()
        sum_impressions = data.groupby(['Campaign ID', 'Ad group ID'])['Impressions'].sum().reset_index()   
        sum_impressions.columns = ['Campaign ID', 'Ad group ID', 'Total Impressions']
        #adgroup with greater than _max_ads
        filtered = count_ads[count_ads['Ad ID']>self._max_ads][['Campaign ID', 'Ad group ID']]
        #add info 
        with_info = pd.merge(filtered, data, how='left', on=['Campaign ID', 'Ad group ID'])
        with_info = pd.merge(with_info, sum_impressions, how='left', on=['Campaign ID', 'Ad group ID'])
        with_info = with_info[['Ad group ID', 'Ad ID', 'Impressions', 'Clicks', 'Total Impressions']]
        #ctr & impression share
        with_info['ctr'] = with_info['Clicks']/with_info['Impressions']
        with_info['imp_share'] = with_info['Impressions']/with_info['Total Impressions']
        with_info = with_info.fillna(0)
        #rank
        share_rank = with_info.groupby(['Ad group ID'])['imp_share'].rank(ascending=False).to_frame()
        share_rank.columns = ['share_rank']
        #merged
        merged = pd.merge(with_info, share_rank, left_index=True, right_index=True)
        merged = merged.sort_values(['Ad group ID', 'share_rank'])
        return merged


    def ads_to_pause_low_ctr_high_std_dev(self, account_id, campaign_id):
        data = self.get_ad_data(account_id, campaign_id)
        count_ads = data.groupby(['Campaign ID', 'Ad group ID'])['Ad ID'].count().reset_index()
        #adgroup with greater than _max_ads
        filtered = count_ads[count_ads['Ad ID']>self._max_ads][['Campaign ID', 'Ad group ID']]
        #add info 
        with_info = pd.merge(filtered, data, how='left', on=['Campaign ID', 'Ad group ID'])
        with_info = with_info[['Ad group ID', 'Ad ID', 'Impressions', 'Clicks']]
        #ctr
        with_info['ctr'] = with_info['Clicks']/with_info['Impressions']
        with_info = with_info.fillna(0)
        with_info['std'] = np.sqrt((with_info['ctr']*(1-with_info['ctr']))/with_info['Impressions'])
        with_info = with_info.fillna(0)
        with_info['ctr_adjusted'] = with_info['ctr'] - with_info['std']
        #rank
        ranking = with_info.groupby(['Ad group ID'])['ctr_adjusted'].rank(ascending=False).to_frame()
        ranking.columns = ['ranking']
        #merged
        merged = pd.merge(with_info, ranking, left_index=True, right_index=True)
        #rank >= max_ads
        #TODO: handle ties
        to_be_paused = merged[(merged.ranking>=self._max_ads)]
        return zip(list(to_be_paused['Ad group ID'].values),list(to_be_paused['Ad ID'].values))   

        
    def ads_to_pause(self, account_id, campaign_id):
        data = self.get_ad_data(account_id, campaign_id)
        count_ads = data.groupby(['Campaign ID', 'Ad group ID'])['Ad ID'].count().reset_index()
        #adgroup with greater than _max_ads
        filtered = count_ads[count_ads['Ad ID']>self._max_ads][['Campaign ID', 'Ad group ID']]
        #add info 
        with_info = pd.merge(filtered, data, how='left', on=['Campaign ID', 'Ad group ID'])
        with_info = with_info[['Ad group ID', 'Ad ID', 'Impressions', 'Clicks']]
        #ctr
        with_info['ctr'] = with_info['Clicks']/with_info['Impressions']
        with_info = with_info.fillna(0)
        #rank
        ranking = with_info.groupby(['Ad group ID'])['ctr'].rank(ascending=False).to_frame()
        ranking.columns = ['ranking']
        #merged
        merged = pd.merge(with_info, ranking, left_index=True, right_index=True)
        #rank >= max_ads & impressions > 100
        to_be_paused = merged[(merged.ranking>=self._max_ads)&(merged.Impressions>100)]
        return zip(list(to_be_paused['Ad group ID'].values),list(to_be_paused['Ad ID'].values))
    

    def pause_ads(self, account_id, ad_group_id, ad_id, verbose=True):
        self._client.SetClientCustomerId(account_id)
        ad_group_ad_service = self._client.GetService('AdGroupAdService', version='v201705')
    
        operations = [{
            'operator': 'SET',
            'operand': {
                'adGroupId': ad_group_id,
                'ad': {'id': ad_id},
                'status': 'PAUSED'
            }
        }]
        ads = ad_group_ad_service.mutate(operations)
        
        if verbose:
            # Display results.
            for ad in ads['value']:
                print 'Ad with id \'%s\' was updated.'% ad['ad']['id']    
    
    def run(self):
        accounts = self.get_accounts()
        for account in accounts:
            campaigns = self.get_campaigns(account)
            for campaign in campaigns:
                pause_these = self.ads_to_pause(account, campaign)
                if pause_these:
                    for k in pause_these:
                        ad_group, ad = k
                        self.pause_ads(account, ad_group, ad)
                        
    def pause_low_ctr_high_std_dev(self):
        accounts = self.get_accounts()
        for account in accounts:
            campaigns = self.get_campaigns(account)
            for campaign in campaigns:
                pause_these = self.ads_to_pause_low_ctr_high_std_dev(account, campaign)
                print pause_these
                if pause_these:
                    for k in pause_these:
                        ad_group, ad = k
                        try:
                            self.pause_ads(account, ad_group, ad)
                        except:
                            pass

In [25]:
a = pause_bad_performing_ads('fi', '20170101', '20170731')

In [117]:
greater_than_3.groupby(['Ad group ID']).aggregate(lambda x: list(x))

Unnamed: 0_level_0,Campaign ID,Ad ID_x,Ad ID_y,Impressions,Clicks,Cost,ctr
Ad group ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
45713167844,"[874677791, 874677791, 874677791, 874677791]","[4, 4, 4, 4]","[205016021996, 205016021999, 205016022125, 205...","[356, 379, 166, 337]","[110, 107, 0, 19]","[2650000, 2920000, 0, 560000]","[0.308988764045, 0.282321899736, 0.0, 0.056379..."
45713169524,"[874677791, 874677791, 874677791, 874677791]","[4, 4, 4, 4]","[205016022362, 205016022365, 205016022236, 205...","[0, 0, 0, 0]","[0, 0, 0, 0]","[0, 0, 0, 0]","[0.0, 0.0, 0.0, 0.0]"


In [315]:
#greater_than_3.groupby(['Ad group ID']).apply(lambda tdf: pd.Series(dict([[vv,tdf[vv].unique().tolist()] for vv in tdf if vv not in ['A']])  )) 
greater_than_3.groupby(['Campaign ID', 'Ad group ID']).apply(lambda tdf: pd.Series(dict([[vv,tdf[vv].unique().tolist()] for vv in tdf if vv not in ['A']])  )) 

Unnamed: 0_level_0,Unnamed: 1_level_0,Ad ID_x,Ad ID_y,Ad group ID,Campaign ID,Clicks,Cost,Impressions,ctr
Campaign ID,Ad group ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
874677791,45713167844,[4],"[205016021996, 205016021999, 205016022125, 205...",[45713167844],[874677791],"[110, 107, 0, 19]","[2650000, 2920000, 0, 560000]","[356, 379, 166, 337]","[0.308988764045, 0.282321899736, 0.0, 0.056379..."
874677791,45713169524,[4],"[205016022362, 205016022365, 205016022236, 205...",[45713169524],[874677791],[0],[0],[0],[0.0]
