In [41]:
import time

start = time.time()

In [42]:
import pandas as pd
intent = pd.read_csv('intent.csv')
persona = pd.read_csv('persona.csv')
subject = pd.read_csv('subject.csv')
template = pd.read_csv('template.csv')
pma = pd.read_csv('pma.csv')
match_type = {'match_type': ['br', 'xx']}
match_type = pd.DataFrame(match_type)
ad_group_name = {'ad_group_template': ['subject,intent,pma,match_type']}
ad_group_name = pd.DataFrame(ad_group_name)
location = pd.read_csv("location.csv")
market = pd.read_csv('market.csv', usecols=['language', 'market', 'account', 'account_id'])
campaign = pd.read_csv('campaign.csv', usecols=['market', 'campaign_name_template'])
other = pd.read_csv('other.csv')
medium = pd.read_csv('medium.csv')

In [43]:
intermediate = template.merge(pma, on='intent_localised_id', how='inner')[['intent_localised_id', 'intent_localised_x', 'keyword_template', 'language', 'pma']]
intermediate = intermediate.merge(match_type, how='cross')
intermediate = intermediate.merge(ad_group_name, how='cross')
intermediate = intermediate.merge(market, on='language', how='inner')

In [44]:
intermediate.rename(columns={'intent_localised_x': 'intent'}, inplace=True)

In [45]:
import numpy as np
from itertools import product

dimension_dfs = {
    'subject': subject,
    'persona': persona,
    'other' : other,
    'medium': medium
}

def create_keywords(row):
    dimensions = row['keyword_template'].split(',')
    language = row['language']
    market = row['market']
    combinations = []

    for dimension in dimensions:

        if dimension == 'location':
            combinations.append(location['location'][location.market == market].values.tolist())
            continue

        if dimension == 'intent':
            combinations.append([row["intent"]])
            continue
        
        if dimension in dimension_dfs:
            combinations.append(dimension_dfs[dimension].loc[dimension_dfs[dimension]['language'] == language, f'{dimension}_localised'].values.tolist())
        else:
            combinations.append([dimension])

    # Create all possible combinations
    combinations = list(product(*combinations))
    combinations = [' '.join(combination) for combination in combinations]

    return combinations

In [46]:
intermediate['keywords'] = intermediate.apply(create_keywords, axis=1)

In [47]:
intermediate = intermediate.explode('keywords')

In [48]:
intermediate = intermediate.merge(campaign, on='market', how='inner')

In [49]:
intermediate.dropna(subset=['keywords'], inplace=True)

In [50]:
def retrieve_dimension(input_string, keyword_string, dimension): 
    
    split_string = input_string.split(",")

    subject_index = split_string.index(dimension)

    split_keyword = keyword_string.split(" ")

    retrieved_word = split_keyword[subject_index]
    
    return retrieved_word

In [51]:
# Extract dimension from keywords
intermediate['subject'] = intermediate.apply(lambda row: retrieve_dimension(row['keyword_template'], row['keywords'], 'subject'), axis=1)

# Vectorized computation of ad group name
intermediate['ad_group_name'] = intermediate['subject'] + '_' + intermediate['intent'] + '_' + intermediate['pma'] + '_' + intermediate['match_type']
intermediate['ad_group_name'] = intermediate['ad_group_name'].str.lower()

In [None]:
# Pre-calculate variables for assign_campaign_template_order
market_len = intermediate['market'].str.len()
market_ww = intermediate['market'] == 'ww'

# Assign campaign template order (vectorized)
intermediate['campaign_template_order'] = np.where((market_len == 2) & ~market_ww, 1, 2)

# Vectorized computation of campaign name
intermediate['campaign_name'] = np.where(intermediate['campaign_template_order'] == 1, 'stu_sem_generic_web_0_' + intermediate['language'] + '_' + intermediate['market'] + '_xx_multiplesub_' + intermediate['match_type'],
                                                           'stu_sem_generic_web_0_' + intermediate['language'] + '_xx_multiplesub_' + intermediate['match_type'] + '-' + intermediate['market'])

In [None]:
intermediate[['language', 'account', 'campaign_name', 'subject', 'ad_group_name', 'intent', 'keywords']]

In [None]:
end = time.time()
runtime = (end - start) / 60
print(f'The code ran in {runtime} minutes.')