In [1]:
import time

start = time.time()

In [2]:
import pandas as pd
intent = pd.read_csv('intent.csv')
persona = pd.read_csv('persona.csv')
subject = pd.read_csv('subject.csv')
template = pd.read_csv('template.csv')
pma = pd.read_csv('pma.csv')
match_type = {'match_type': ['br', 'xx']}
match_type = pd.DataFrame(match_type)
ad_group_name = {'ad_group_template': ['subject,intent,pma,match_type']}
ad_group_name = pd.DataFrame(ad_group_name)
location = pd.read_csv("location.csv")
market = pd.read_csv('market.csv', usecols=['language', 'market', 'account', 'account_id'])
campaign = pd.read_csv('campaign.csv', usecols=['market', 'campaign_name_template'])
other = pd.read_csv('other.csv')
medium = pd.read_csv('medium.csv')

In [3]:
intermediate = template.merge(pma, on='intent_localised_id', how='inner')[['intent_localised_id', 'intent_localised_x', 'keyword_template', 'language', 'pma']]
intermediate = intermediate.merge(match_type, how='cross')
intermediate = intermediate.merge(ad_group_name, how='cross')
intermediate = intermediate.merge(market, on='language', how='inner')
# intermediate = intermediate.merge(other, on='language', how='inner')
# intermediate = intermediate.merge(medium, on='language', how='inner')



In [4]:
intermediate.rename(columns={'intent_localised_x': 'intent'}, inplace=True)

In [5]:
import numpy as np
from itertools import product

dimension_dfs = {
    'subject': subject,
    'persona': persona,
    'other' : other,
    'medium': medium
}

def create_keywords(row):
    dimensions = row['keyword_template'].split(',')
    language = row['language']
    market = row['market']
    combinations = []

    for dimension in dimensions:

        if dimension == 'location':
            combinations.append(location['location'][location.market == market].values.tolist())
            continue

        if dimension == 'intent':
            combinations.append([row["intent"]])
            continue
        
        if dimension in dimension_dfs:
            combinations.append(dimension_dfs[dimension].loc[dimension_dfs[dimension]['language'] == language, f'{dimension}_localised'].values.tolist())
        else:
            combinations.append([dimension])

    # Create all possible combinations
    combinations = list(product(*combinations))
    combinations = [' '.join(combination) for combination in combinations]

    return combinations

In [6]:
intermediate['keywords'] = intermediate.apply(create_keywords, axis=1)

In [7]:
intermediate = intermediate.explode('keywords')

In [8]:
intermediate = intermediate.merge(campaign, on='market', how='inner')

In [9]:
intermediate.dropna(subset=['keywords'], inplace=True)

In [10]:
def retrieve_dimension(input_string, keyword_string, dimension): 
    
    split_string = input_string.split(",")

    subject_index = split_string.index(dimension)

    split_keyword = keyword_string.split(" ")

    retrieved_word = split_keyword[subject_index]
    
    return retrieved_word

In [11]:
# Extract dimension from keywords
intermediate['subject'] = intermediate.apply(lambda row: retrieve_dimension(row['keyword_template'], row['keywords'], 'subject'), axis=1)

# Vectorized computation of ad group name
intermediate['ad_group_name'] = intermediate['subject'] + '_' + intermediate['intent'] + '_' + intermediate['pma'] + '_' + intermediate['match_type']

In [12]:
# def create_ad_group_name(row):
#     # this part creates ad group name
#     subject = retrieve_dimension(row['keyword_template'], row['keywords'], "subject")
#     intent = retrieve_dimension(row['keyword_template'], row['keywords'], "intent")
#     pma = row['pma']
#     match_type = row['match_type']
#     ad_group_name = "_".join([subject, intent, pma, match_type])

#     # taking advantage of the fact that we already have subject extracted, we are going to add it below
#     # as per blueprint requirements

#     return ad_group_name, subject

In [13]:
# intermediate[["ad_group_name", "subject"]] = intermediate.apply(lambda x :create_ad_group_name(x), axis=1, result_type='expand')

In [14]:
# def assign_campaign_template_order(market):
#     if len(market) == 2 and market != 'ww':
#         return 1
    
#     if len(market) > 2 or market == 'ww':
#         return 2

# def create_campaign_name(row):
#     language = row['language']
#     market = row['market']
#     match_type = row['match_type']
#     campaign_template_order = assign_campaign_template_order(market)

#     if campaign_template_order == 1:
#         return "".join(['stu_sem_generic_web_0_', language, '_', market, '_xx_multiplesub_', match_type])
    
#     if campaign_template_order == 2:
#         return "".join(['stu_sem_generic_web_0_', language, '_xx_multiplesub_', match_type, "-", market])

In [15]:
# intermediate['campaign_name'] = intermediate.apply(lambda x: create_campaign_name(x), axis=1)

In [16]:
# Pre-calculate variables for assign_campaign_template_order
market_len = intermediate['market'].str.len()
market_ww = intermediate['market'] == 'ww'

# Assign campaign template order (vectorized)
intermediate['campaign_template_order'] = np.where((market_len == 2) & ~market_ww, 1, 2)

# Vectorized computation of campaign name
intermediate['campaign_name'] = np.where(intermediate['campaign_template_order'] == 1, 'stu_sem_generic_web_0_' + intermediate['language'] + '_' + intermediate['market'] + '_xx_multiplesub_' + intermediate['match_type'],
                                                           'stu_sem_generic_web_0_' + intermediate['language'] + '_xx_multiplesub_' + intermediate['match_type'] + '-' + intermediate['market'])

In [17]:
intermediate[['language', 'account', 'campaign_name', 'subject', 'ad_group_name', 'intent', 'keywords']]

Unnamed: 0,language,account,campaign_name,subject,ad_group_name,intent,keywords
0,eng,AR,stu_sem_generic_web_0_eng_ae_xx_multiplesub_br,Arabic,Arabic_chat_method_br,chat,chat Arabic
1,eng,AR,stu_sem_generic_web_0_eng_ae_xx_multiplesub_br,Bengali,Bengali_chat_method_br,chat,chat Bengali
2,eng,AR,stu_sem_generic_web_0_eng_ae_xx_multiplesub_br,Croatian,Croatian_chat_method_br,chat,chat Croatian
3,eng,AR,stu_sem_generic_web_0_eng_ae_xx_multiplesub_br,Czech,Czech_chat_method_br,chat,chat Czech
4,eng,AR,stu_sem_generic_web_0_eng_ae_xx_multiplesub_br,English,English_chat_method_br,chat,chat English
...,...,...,...,...,...,...,...
10129735,ukr,RU UA,stu_sem_generic_web_0_ukr_ua_xx_multiplesub_xx,Хімія,Хімія_Direct_competitor_xx,Direct,Хімія Direct
10129736,ukr,RU UA,stu_sem_generic_web_0_ukr_ua_xx_multiplesub_xx,Тагальська,Тагальська_Direct_competitor_xx,Direct,Тагальська Direct
10129737,ukr,RU UA,stu_sem_generic_web_0_ukr_ua_xx_multiplesub_xx,Китайська,Китайська_Direct_competitor_xx,Direct,Китайська Direct
10129738,ukr,RU UA,stu_sem_generic_web_0_ukr_ua_xx_multiplesub_xx,Іспанська,Іспанська_Direct_competitor_xx,Direct,Іспанська Direct


In [18]:
end = time.time()
runtime = (end - start) / 60
print(f'The code ran in {runtime} minutes.')

The code ran in 2.15787756840388 minutes.
