In [9]:
import pandas as pd
import numpy as np
pd.options.display.max_rows = 6
import os
os.chdir('/Users/JayLiang/Desktop/Media Storm/PEI/Penguin/Keywords')
import argparse
import httplib2
import pprint
import time
import datetime
from io import StringIO

from apiclient.discovery import build
from oauth2client import GOOGLE_TOKEN_URI
from oauth2client.client import OAuth2Credentials
from googleapiclient.errors import HttpError


def create_credentials():
    """Create Google OAuth2 credentials.

    Args:
        client_id: Client id of a Google Cloud console project.
        client_secret: Client secret of a Google Cloud console project.
        refresh_token: A refresh token authorizing the Google Cloud console project
          to access the DS data of some Google user.
    Returns:
        OAuth2Credentials
    """
    return OAuth2Credentials(access_token=None,
                           client_id='549790627766-qnth4m8qvuimg87pnsp4b82lhte7dk5a.apps.googleusercontent.com',
                           client_secret='Vta4lQLOL49vVYvktkcPGRNb',
                           refresh_token='1/ab7pCGMu3K5AveG0UOUpQ0J08vCp6uM357O8qmoPDMs',
                           token_expiry=None,
                           token_uri="https://accounts.google.com/o/oauth2/token",
                           user_agent=None)

def get_service(credentials):
    """Set up a new DoubleClick Search service.

    Args:
        credentials: An OAuth2Credentials generated with create_credentials, or
        flows in the oatuh2client.client package.
    Returns:
        An authorized Doubleclicksearch serivce.
    """
    # Use the authorize() function of OAuth2Credentials to apply necessary credential
    # headers to all requests.
    http = credentials.authorize(http = httplib2.Http())

    # Construct the service object for the interacting with the DoubleClick Search API.
    service = build('doubleclicksearch', 'v2', http=http)
    return service

def poll_report(service, report_id):
    """Poll the API with the reportId until the report is ready, up to ten times.

    Args:
        service: An authorized Doublelcicksearch service.
        report_id: The ID DS has assigned to a report.
    Returns:
        pd.DataFrame, report file
    """
    for _ in range(10):
        try:
            request = service.reports().get(reportId=report_id)
            json_data = request.execute()
            if json_data['isReportReady']:
                pprint.pprint('The report is ready.')

                # For large reports, DS automatically fragments the report into multiple
                # files. The 'files' property in the JSON object that DS returns contains
                # the list of URLs for file fragment. To download a report, DS needs to
                # know the report ID and the index of a file fragment.
                report = pd.DataFrame()
                for i in range(len(json_data['files'])):
                    pprint.pprint('Downloading fragment ' + str(i) + ' for report ' + report_id)
                    report = report.append(download_files(service, report_id, str(i)), ignore_index = True) # See Download the report.
                return report

            else:
                pprint.pprint('Report is not ready. I will try again.')
                time.sleep(10)
        except HttpError as e:
            error = simplejson.loads(e.content)['error']['errors'][0]

            # See Response Codes
            pprint.pprint('HTTP code %d, reason %s' % (e.resp.status, error['reason']))
            break
        
def download_files(service, report_id, report_fragment):
    """Generate and print sample report.

    Args:
        service: An authorized Doublelcicksearch service.
        report_id: The ID DS has assigned to a report.
        report_fragment: The 0-based index of the file fragment from the files array.
    Returns:
        pd.DataFrame report file
    """
    request = service.reports().getFile(reportId=report_id, reportFragment=report_fragment)
    return pd.read_csv(StringIO(request.execute().decode('utf-8')))

def request_report(service, start_date, end_date, columns):
    """Request sample report and print the report ID that DS returns. See Set Up Your Application.

    Args:
        service: An authorized Doublelcicksearch service.
        columns: list of columns will be in the report
    Returns:
        The report id.
    """
    request = service.reports().request(
        body={
                "reportScope": {
                    "agencyId": "20100000000000932",
                    "advertiserId": "21700000001424088", # PEI - Original Penguin - Search
                },
                "reportType": "keyword",
                "columns": [{'columnName': column} for column in columns],   
                "timeRange" : {
                    "startDate" : start_date,
                    "endDate" : end_date
                    },
                
                #"filters": [
                #    {
                #        "column" : { "columnName": "keywordLabels" },
                #        "operator" : "containsElement",
                #        "values" : ["JubaNovTest",]
                #    }
                #],
                
                "downloadFormat": "csv",
                "maxRowsPerFile": 100000000,
                "statisticsCurrency": "agency",
                "verifySingleTimeZone": "false",
                "includeRemovedEntities": "false"
            }
    )
    json_data = request.execute()
    return json_data['id']

In [10]:
# download reports
creds = create_credentials()

service = get_service(creds)

end_date = "2017-08-28"
start_date = "2017-07-18"

print(start_date, end_date)
REPORTID_nonHVA = request_report(service, start_date, end_date, 
                                 ['campaign', 'adGroup', 'keywordText', 'keywordMatchType', 'status', 
                                  'effectiveKeywordMaxCpc', 'keywordMaxCpc', 'topOfPageBidCurrent',
                                  'topOfPageBidAvg', 'impr', 'clicks', 'cost', 
                                  'avgCpc', 'avgPos', 'dfaRevenue'])
REPORTID_HVA = request_report(service, start_date, end_date, 
                              ['campaign', 'adGroup', 'keywordText', 'keywordMatchType', 
                               'floodlightActivity', 'dfaActions'])

non_hva = poll_report(service, REPORTID_nonHVA)
hva = poll_report(service, REPORTID_HVA)

2017-07-18 2017-08-28
'Report is not ready. I will try again.'
'Report is not ready. I will try again.'
'The report is ready.'
'Downloading fragment 0 for report AAAnVWQ3piFj2FXR'
'The report is ready.'
'Downloading fragment 0 for report AAAnvMuEkpb70I_i'


In [11]:
non_hva

Unnamed: 0,campaign,adGroup,keywordText,keywordMatchType,status,effectiveKeywordMaxCpc,keywordMaxCpc,topOfPageBidCurrent,topOfPageBidAvg,impr,clicks,cost,avgCpc,avgPos,dfaRevenue
0,OPG_BRM || B | Brand,TM PenguinClothing,penguinclothing,Phrase,Active,2.24,2.24,,,0,0,0.00,,,0.0
1,OPG_BRM || B | Brand,TM PenguinClothing,www.penguinclothing.com,Phrase,Active,2.01,2.01,,,0,0,0.00,,,0.0
2,OPG_BRM || B | Brand,TM PenguinClothing,www penguinclothing com,Exact,Active,5.02,5.02,,,0,0,0.00,,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399091,G_Non-Brand_Original Penguin_Fragrances_EST-2775,Fragrances - BMM,+mens +eau +de +toilette +fragrances,Broad,Active,3.00,3.00,,,1123,0,0.00,,1.622440,0.0
399092,G_Non-Brand_Original Penguin_Fragrances_EST-2775,Fragrances - BMM,+mens +gift +set +5 +piece,Broad,Active,3.00,3.00,,,6,0,0.00,,1.500000,0.0
399093,G_Non-Brand_Original Penguin_Fragrances_EST-2775,Fragrances - BMM,+mens +sprays,Broad,Active,3.00,3.00,,,19631,5,9.16,1.832,1.452091,0.0


In [6]:
hva

Unnamed: 0,campaign,adGroup,keywordText,keywordMatchType,floodlightActivity,dfaActions
0,Y_Brand_Original Penguin_Big & Tall_EST-2775,Big & Tall Polos - Exact,Original Penguin big and tall daddy polos,Exact,,0
1,Y_Brand_Original Penguin_Big & Tall_EST-2775,Big & Tall Polos - Exact,Original Penguin the earl big and tall polos,Exact,,0
2,Y_Brand_Original Penguin_Tops_EST-2775,Shirts - Exact,Original Penguin oxford shirts,Exact,,0
...,...,...,...,...,...,...
400378,B_Non-Brand_Original Penguin_Fragrances_EST-2775,Fragrances - Exact,mens eau de toilette sprays,Exact,,0
400379,B_Non-Brand_Original Penguin_Fragrances_EST-2775,Fragrances - Exact,mens Five piece gift set,Exact,,0
400380,B_Non-Brand_Original Penguin_Fragrances_EST-2775,Fragrances - Exact,mens gift set 5 piece,Exact,,0


# merge reports

In [3]:
def merge_hva_and_non_hva(hva, non_hva):
    '''merge two reports downloaded by download_reports().
    Args:
        hva: pd.DataFrame
        non_hva: pd.DataFrame
        columns_hva: list of string
    Returns:
        pd.DataFrame
    '''   
    columns_hva= ['Callaway - Big & Tall', 'Callaway - Clearance,Callaway - Unsubscribe - HVA 2', 
                'Callaway - Search - HVA2', 'Callaway - Big & Tall_Belts', 'Callaway - Big & Tall_Outerwear', 
                'Callaway - Big & Tall_Pants-Shorts', 'Callaway - Big & Tall_Polos', 
                'Callaway - Clearance_Big & Tall Clearance', 'Callaway - Clearance_Mens Clearance', 
                'Callaway - Clearance_Womens Clearance', 'Callaway - Features_Best-Sellers', 
                'Callaway - Features_Callaway X', 'Callaway - Features_New Arrivals', 
                'Callaway - Features_Opti-Series', 'Callaway - Men Golf Shoes', 'Callaway - Men_Belts', 
                'Callaway - Men_Outerwear', 'Callaway - Men_Pants', 'Callaway - Men_Polos', 
                'Callaway - Men_Shorts', 'Callaway - Men_Standard Collection', 'Callaway - Products - Belts', 
                'Callaway - Products - Jacket', 'Callaway - Products - Jackets', 
                'Callaway - Products - Men Golf Shoes', 'Callaway - Products - Pants', 
                'Callaway - Products - Polo', 'Callaway - Products - Polos', 'Callaway - Products - Short', 
                'Callaway - Products - Shorts', 'Callaway - Products - Skorts', 'Callaway - Products - Sweaters', 
                'Callaway - Products - Vests', 'Callaway - Products - Women Golf Shoes', 
                'Callaway - Women Golf Shoes', 'Callaway - Women_Belts', 'Callaway - Women_Outerwear', 
                'Callaway - Women_Pants', 'Callaway - Women_Polos', 'Callaway - Women_Skorts & Shorts', 
                'Callaway - Women_Standard Collection',
                'Callaway - Add to Cart - HVA 3', 'Callaway - Order Status (Orders)', 'Callaway - Billing_Payment',
                'Callaway - Check Out', 'Callaway - Order Review', 'Callaway - Paypal', 'Callaway - Secure Checkout',
                'Callaway - Shipping']
    
    result = pd.DataFrame(columns=['campaign', 'adGroup', 'keywordText', 'keywordMatchType']+columns_hva)
    
    for (campaign, ad_group, keyword, keyword_match_type), group in hva.groupby(['campaign', 'adGroup', 
                                                                                 'keywordText', 'keywordMatchType']):
        df = pd.DataFrame([{
            'campaign': campaign,
            'adGroup': ad_group,
            'keywordText' : keyword,
            'keywordMatchType': keyword_match_type
        }])

        for column in columns_hva:
            if column in group['floodlightActivity'].values:
                df[column] = group[group['floodlightActivity'] == column]['dfaActions'].values[0]
            else:
                df[column] = 0
                
        result = result.append(df, ignore_index = True)

    # combine hva and non_hva
    merged = non_hva.merge(result, 
                           on = ['campaign', 'adGroup', 'keywordText', 'keywordMatchType'], 
                           how = 'left')

    # generate baseline and resid compare
    merged = merged.fillna(value = 0)
    
    # generate new fields
    merged['HVA'] = merged[columns_hva].sum(axis=1).apply(int)   
    merged['ROI'] = merged['dfaRevenue'] / merged['cost']
    merged['ROI'] = merged['ROI'].fillna(0)
    return merged[['campaign', 'adGroup', 'keywordText', 'keywordMatchType', 
                   'status', 'keywordMaxCpc', 'effectiveKeywordMaxCpc', 
                   'topOfPageBidCurrent', 'topOfPageBidAvg', 'impr', 'clicks', 
                   'HVA', 'avgCpc', 'avgPos', 'cost', 'dfaRevenue', 'ROI']]

In [7]:
writer = pd.ExcelWriter('/Users/JayLiang/Desktop/Media Storm/PEI/CallAway/Auto_Keywords/CallawaySEM_optimization.xlsx', engine='xlsxwriter')
campaigns = {
    'G_NonBrand BigTall': 'G_Non-Brand_Callaway Apparel_Big & Tall Clothing_EST-2773',
    'G_Non-Brand Men': 'G_Non-Brand_Callaway Apparel_Mens Clothing_EST-2773',
    'G_Non-Brand Women': 'G_Non-Brand_Callaway Apparel_Womens Clothing_EST-2773',
    'B_NonBrand BigTall': 'B_Non-Brand_Callaway Apparel_Big & Tall Clothing_EST-2773',
    'B_Non-Brand Men': 'B_Non-Brand_Callaway Apparel_Mens Clothing_EST-2773',
    'B_Non-Brand Women': 'B_Non-Brand_Callaway Apparel_Womens Clothing_EST-2773',
    'Y_Non-Brand BigTall': 'Y_Non-Brand_Callaway Apparel_Big & Tall Clothing_EST-2773',
    'Y_Non-Brand Men': 'Y_Non-Brand_Callaway Apparel_Mens Clothing_EST-2773',
    'Y_Non-Brand women': 'Y_Non-Brand_Callaway Apparel_Womens Clothing_EST-2773'
}


for tab in campaigns:
    CAMPAIGN = campaigns[tab]
    hva_1 = hva[hva['campaign'] == CAMPAIGN]
    non_hva_1 = non_hva[non_hva['campaign'] == CAMPAIGN]
    df = merge_hva_and_non_hva(hva_1, non_hva_1)
    df = df[(df['keywordText'] != 'Display Network Stats') & (df['status'] == 'Active')]

    df['group'] = None

    # group1
    df.loc[(df['ROI'] > 0)&(df['clicks'] < 30), 'group'] = 'group1_lowClick'
    df.loc[(df['ROI'] > 0)&(df['clicks'] >= 30), 'group'] = 'group1'

    # group2
    HVA_mean = df['HVA'].mean()
    if HVA_mean < 10:
        HVA_mean = df[df['HVA'] > 0]['HVA'].mean()
    impr_75 = sorted(list(df['impr']))[int(len(df)*3/4)]

    df.loc[(df['ROI'] == 0)&
           (df['HVA'] >= HVA_mean)&
           (df['clicks'] >= 30)&
           (df['impr']>=impr_75), 'group'] = 'group2'
    df.loc[(df['ROI'] == 0)&
           (df['HVA'] >= HVA_mean)&
           (df['clicks'] >= 30)&
           (df['impr']<impr_75), 'group'] = 'group2_highImpr'
    df.loc[(df['ROI'] == 0)&
           (df['HVA'] >= HVA_mean)&
           (df['clicks'] < 30), 'group'] = 'group2_lowClick'

    impr_mean = df[df['group'].isnull()]['impr'].mean() #Not allocated group 'impr' mean

    # group3
    df.loc[(df['ROI'] == 0)&
           (df['HVA'] < HVA_mean)&
           (df['impr'] >= impr_mean)&
           (df['cost'] > 0)&
           (df['clicks'] >= 30), 'group'] = 'group3'
    df.loc[(df['ROI'] == 0)&
           (df['HVA'] < HVA_mean)&
           (df['impr'] >= impr_mean)&
           (df['cost'] > 0)&
           (df['clicks'] < 30), 'group'] = 'group3_lowClick'

    # group4
    df.loc[(df['ROI'] == 0)&
           (df['HVA'] < HVA_mean)&
           (df['impr'] >= impr_mean)&
           (df['avgPos'] <= 2)&
           (df['cost'] == 0), 'group'] = 'group4'
    df.loc[(df['ROI'] == 0)&
           (df['HVA'] < HVA_mean)&
           (df['impr'] >= impr_mean)&
           (df['avgPos'] > 2)&
           (df['cost'] == 0), 'group'] = 'group4_lowPosition'

    # group5
    df.loc[(df['ROI'] == 0)&
           (df['HVA'] < HVA_mean)&
           (df['impr'] < impr_mean)&
           (df['clicks'] >= 30), 'group'] = 'group5'
    df.loc[(df['ROI'] == 0)&
           (df['HVA'] < HVA_mean)&
           (df['impr'] < impr_mean)&
           (df['clicks'] < 30), 'group'] = 'group5_lowClick'

    applications_FB_Programatic = {
        'group1': 'Add Products',
        'group1_lowClick': 'keep_running',
        'group2': 'Add Products',
        'group2_highImpr': 'Add Products',
        'group2_lowClick': 'keep_running',
        'group3': 'n.a',
        'group3_lowClick': 'keep_running',
        'group4': 'n.a',
        'group4_lowPosition': 'n.a',
        'group5': 'n.a',
        'group5_lowClick': 'keep_running'}
    applications_SEM = {
        'group1': 'Maintain',
        'group1_lowClick': 'keep_running',
        'group2': '75% Desktop, 25% Mobile',
        'group2_highImpr': 'Type= Exact, add ngatiove KWs',
        'group2_lowClick': 'keep_running',
        'group3': 'pause/ Add to sitelinkes in other ad-groups',
        'group3_lowClick': 'keep_running',
        'group4': 'keep_running',
        'group4_lowPosition': 'improve position to 1-2 (increase CPC)',
        'group5': 'pause',
        'group5_lowClick': 'keep_running'}
    applications_PLA = {
        'group1': 'Maintain/add Products',
        'group1_lowClick': 'keep_running',
        'group2': 'Filter by Zips',
        'group2_highImpr': 'Add Products',
        'group2_lowClick': 'keep_running',
        'group3': 'pause',
        'group3_lowClick': 'keep_running',
        'group4': 'keep running',
        'group4_lowPosition': 'improve position to 1-2 (increase CPC)',
        'group5': 'n.a',
        'group5_lowClick': 'keep_running'}
    applications_Display = {
        'group1': 'Add Products',
        'group1_lowClick': 'keep_running',
        'group2': 'Filter by Zips/100% Desktop',
        'group2_highImpr': 'n.a',
        'group2_lowClick': 'keep_running',
        'group3': 'n.a.',
        'group3_lowClick': 'keep_running',
        'group4': 'n.a',
        'group4_lowPosition': 'n.a',
        'group5': 'n.a',
        'group5_lowClick': 'keep_running'}
    criterion = {
        'group1': 'ROI > 0',
        'group1_lowClick': 'clicks are too low to drive conclusion',
        'group2': 'ROI = 0, high HVA',
        'group2_highImpr': 'ROI = 0, high HVA, high impression',
        'group2_lowClick': 'clicks are too low to drive conclusion',
        'group3': 'ROI = 0, low HVA, high impression, cost > 0',
        'group3_lowClick': 'clicks are too low to drive conclusion',
        'group4': 'ROI = 0, low HVA, high impression, cost = 0, position already high',
        'group4_lowPosition': 'ROI = 0, low HVA, high impression, cost = 0, low position',
        'group5': 'ROI = 0, low HVA, low impression',
        'group5_lowClick': 'clicks are too low to drive conclusion'}
    
    df['criterion'] = df['group'].apply(lambda x: criterion[x])
    df['applications_FB_Programatic'] = df['group'].apply(lambda x: applications_FB_Programatic[x])
    df['applications_SEM'] = df['group'].apply(lambda x: applications_SEM[x])
    df['applications_PLA'] = df['group'].apply(lambda x: applications_PLA[x])
    df['applications_Display'] = df['group'].apply(lambda x: applications_Display[x])
    
    df.sort_values('group', inplace=True)

    df.to_excel(writer, sheet_name=tab, index=False)
################

new_kws = pd.read_csv('/Users/JayLiang/Desktop/Media Storm/PEI/CallAway/Auto_Keywords/Callaway new kw list.csv')
new_kws = new_kws[new_kws['Brand Nonbrand'] == 'Nonbrand']
new_kws = new_kws.drop_duplicates(['Campaign', 'New KW'])

curr_kws = non_hva[non_hva['campaign'].isin(campaigns.values())][[
        'campaign', 'adGroup', 'keywordText', 'keywordMatchType', 
        'keywordMaxCpc', 'effectiveKeywordMaxCpc', 'topOfPageBidCurrent', 
        'topOfPageBidAvg', 'status', 'impr', 'clicks', 'cost', 'dfaRevenue'
    ]].copy()

campaign_abbr = {
    'B_Non-Brand_Callaway Apparel_Big & Tall Clothing_EST-2773': 'Big Tall',
    'G_Non-Brand_Callaway Apparel_Big & Tall Clothing_EST-2773': 'Big Tall',
    'Y_Non-Brand_Callaway Apparel_Big & Tall Clothing_EST-2773': 'Big Tall',
    'B_Non-Brand_Callaway Apparel_Mens Clothing_EST-2773': 'Men',
    'G_Non-Brand_Callaway Apparel_Mens Clothing_EST-2773': 'Men',
    'Y_Non-Brand_Callaway Apparel_Mens Clothing_EST-2773': 'Men',
    'G_Non-Brand_Callaway Apparel_Womens Clothing_EST-2773': 'Women',
    'B_Non-Brand_Callaway Apparel_Womens Clothing_EST-2773': 'Women',
    'Y_Non-Brand_Callaway Apparel_Womens Clothing_EST-2773': 'Women'
}

curr_kws['campaign_abbr'] = curr_kws['campaign'].apply(lambda x: campaign_abbr[x])
curr_kws['ROI'] = (curr_kws['dfaRevenue'] / curr_kws['cost']).fillna(0)

curr_kws = curr_kws.merge(new_kws, left_on=['campaign_abbr', 'keywordText'], right_on=['Campaign', 'New KW'], how='left')

action_to_new_kw_provided = curr_kws[curr_kws['New KW'].notnull()].copy()

def insight(row):
    if row['status'] == 'Paused':
        if row['clicks'] < 30:
            return 'status was Paused, clicks < 30'
        elif row['ROI'] == 0:
            return 'status was Paused, clicks >=30, ROI = 0, keep Paused'
        else:
            return 'status was Paused, clicks >= 30, ROI > 0'
        
    elif row['clicks'] < 30:
        return 'status was activate, clicks < 30'
    elif row['ROI'] == 0:
        return 'status was activate, clicks >= 30, ROI = 0'
    else:
        return 'status was activate, clicks >= 30, ROI > 0'

def action(row):
    if row['status'] == 'Paused':
        if row['ROI'] == 0 and row['clicks'] >=30:
            return 'No Action'
        else:
            return 'Activate'
    elif row['clicks'] < 30:
        return 'No Action'
    elif row['ROI'] == 0:
        return 'Paused'
    else:
        return 'No Action'
    
action_to_new_kw_provided['insight'] = action_to_new_kw_provided.apply(lambda row: insight(row), axis=1)       
action_to_new_kw_provided['Strategy'] = action_to_new_kw_provided.apply(lambda row: action(row), axis=1)

paused_kw_not_in_new_list = curr_kws[curr_kws['New KW'].isnull()].copy()  
paused_kw_not_in_new_list = paused_kw_not_in_new_list[paused_kw_not_in_new_list['status'] == 'Paused']
paused_kw_not_in_new_list['insight'] = paused_kw_not_in_new_list.apply(lambda row: insight(row), axis=1)
paused_kw_not_in_new_list['Strategy'] = paused_kw_not_in_new_list.apply(lambda row: action(row), axis=1)

del action_to_new_kw_provided['Campaign']
del action_to_new_kw_provided['New KW']
del action_to_new_kw_provided['campaign_abbr']
del action_to_new_kw_provided['Brand Nonbrand']
action_to_new_kw_provided.to_excel(writer, sheet_name='action_to_new_kw_provided', index=False)

del paused_kw_not_in_new_list['Campaign']
del paused_kw_not_in_new_list['New KW']
del paused_kw_not_in_new_list['campaign_abbr']
del paused_kw_not_in_new_list['Brand Nonbrand']
paused_kw_not_in_new_list.to_excel(writer, sheet_name='paused_kw_not_in_new_list', index=False)

writer.save()

In [15]:
new_kws = pd.read_csv('/Users/JayLiang/Desktop/Media Storm/PEI/CallAway/Auto_Keywords/Callaway new kw list.csv')
new_kws = new_kws[new_kws['Brand Nonbrand'] == 'Nonbrand']


In [16]:
new_kws2 = new_kws.drop_duplicates(['Campaign', 'New KW'])
new_kws2

Unnamed: 0,Campaign,New KW,Brand Nonbrand
0,Big Tall,big tall block polos,Nonbrand
1,Big Tall,big tall cargo shorts,Nonbrand
2,Big Tall,big tall golf block polos,Nonbrand
...,...,...,...
533,Women,ladies zip jackets,Nonbrand
535,Women,ladies golf sleeveless top,Nonbrand
536,Women,ladies stretch belt,Nonbrand


In [17]:
new_kws2

Unnamed: 0,Campaign,New KW,Brand Nonbrand
0,Big Tall,big tall block polos,Nonbrand
1,Big Tall,big tall cargo shorts,Nonbrand
2,Big Tall,big tall golf block polos,Nonbrand
...,...,...,...
533,Women,ladies zip jackets,Nonbrand
535,Women,ladies golf sleeveless top,Nonbrand
536,Women,ladies stretch belt,Nonbrand


In [None]:
new_kws = pd.read_csv('/Users/JayLiang/Desktop/Media Storm/PEI/CallAway/Auto_Keywords/Callaway new kw list.csv')
new_kws = new_kws[new_kws['Brand Nonbrand'] == 'Nonbrand']
new_kws = new_kws.drop_duplicates(['Campaign', 'New KW'])