<a href="https://colab.research.google.com/github/ktynski/Marketing_Automations_Notebooks_With_GPT/blob/main/Large_Language_Model_Search_Optimization_(Public).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install openai
!pip install fuzzywuzzy
!pip install retrying



In [None]:
import pandas as pd
import openai
from collections import Counter
import concurrent.futures
from retrying import retry
from fuzzywuzzy import fuzz, process

# Replace with your actual OpenAI API Key
openai.api_key = "Your OpenAI API Key"
gpt_call_counter = 0



# Define a function that determines whether to retry an exception or not.
# In this case, it always retries.
def retry_if_exception(exception):
    return True

@retry(retry_on_exception=retry_if_exception, stop_max_attempt_number=3)
def gpt_call(messages):
    global gpt_call_counter
    gpt_call_counter += 1
    print(f"GPT called {gpt_call_counter} times.")
    return openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=messages
    )

def generate_related_queries(topic, n):
    print(f"Generating related queries for topic: '{topic}'")
    response = gpt_call([
        {"role": "system", "content": "You are all-knowing genius at understanding how people search. Your task is to generate specific and high-intent questions or searches related to a topic."},
        {"role": "user", "content": f"I need {n} distinct questions or keyword searches related to {topic}. They should reflect common and detailed inquiries that people might have about the topic that reflect high purchase intent. Do not print anything other than the questions and/or searches themselves. No intro or outro text should be included under any circumstances."}
    ])
    queries = response['choices'][0]['message']['content'].strip().split('\n')
    return queries

def extract_brands(response):
    print(f"Extracting brands from response.")
    chat_response = gpt_call([
        {"role": "system", "content": "You are a helpful assistant. Your task is to identify specific brand names mentioned in a text."},
        {"role": "user", "content": f"Can you identify any specific brand names mentioned in the following text: \"{response}\"? Please respond with only a list of the brands in the following format: [\"Brand1\", \"Brand2\", \"Brand3\"]."}
    ])
    try:
        brands = eval(chat_response['choices'][0]['message']['content'])
    except SyntaxError:
        brands = []
    return brands

def query_gpt(query, n):
    print(f"Querying GPT for query: '{query}'")
    df = pd.DataFrame(columns=['Query', 'Response', 'Brand', 'Mentions', 'Prominence'])

    # Create a list of tasks for concurrent execution
    tasks = [({'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': f"I need detailed information on this topic: \"{query}\". If relevant, please mention specific brand names."}) for _ in range(n)]

    # Use ThreadPoolExecutor for concurrent API calls
    with concurrent.futures.ThreadPoolExecutor(max_workers=n) as executor:
        futures = {executor.submit(gpt_call, task) for task in tasks}
        for future in concurrent.futures.as_completed(futures):
            response = future.result()['choices'][0]['message']['content'].strip()
            brands = extract_brands(response)
            mentions = Counter(brands)

            if not brands:
                temp_df = pd.DataFrame([{'Query': query, 'Response': response, 'Brand': 'N/A', 'Mentions': 0, 'Prominence': 0}], index=[0])
                df = pd.concat([df, temp_df], ignore_index=True)
            else:
                for brand, mention_count in mentions.items():
                    print(f"Found brand '{brand}' mentioned {mention_count} time(s) in response.")
                    prominence = 1 / (response.lower().split().index(brand.lower()) + 1 if brand.lower() in response.lower().split() else len(response))
                    temp_df = pd.DataFrame([{'Query': query, 'Response': response, 'Brand': brand, 'Mentions': mention_count, 'Prominence': prominence}], index=[0])
                    df = pd.concat([df, temp_df], ignore_index=True)
    return df

def analyze_brands(df):
    print("Analyzing brands...")
    # Get a list of unique brands in the DataFrame
    unique_brands = df[df['Brand'] != 'N/A']['Brand'].unique()

    # Initialize a new DataFrame for the analysis results
    analysis_df = pd.DataFrame(columns=['Brand', 'Total Mentions', 'Average Prominence', 'Contexts', 'Context Summaries'])

    for brand in unique_brands:
        print(f"Analyzing brand: '{brand}'")
        # Filter the original DataFrame for each unique brand
        brand_df = df[df['Brand'] == brand]

        # Aggregate the total mentions and average prominence for this brand
        total_mentions = brand_df['Mentions'].sum()
        avg_prominence = brand_df['Prominence'].mean()

        # Gather the contexts (i.e., full responses) where this brand is mentioned
        contexts = brand_df['Response'].unique()

        # Initialize a list to store the context summaries
        context_summaries = []

        # Send each context to GPT for analysis and summarization
        for context in contexts:
            chat_response = gpt_call([
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": f"Given the following text, please provide a summary focusing on how the brand '{brand}' is being discussed: {context}"}
            ])
            summary = chat_response['choices'][0]['message']['content']
            context_summaries.append(summary)

        # Append the analysis results to the DataFrame
        temp_df = pd.DataFrame({'Brand': [brand], 'Total Mentions': [total_mentions], 'Average Prominence': [avg_prominence], 'Contexts': [contexts], 'Context Summaries': [context_summaries]})
        analysis_df = pd.concat([analysis_df, temp_df], ignore_index=True)

    return analysis_df

def generate_report(analysis_df, specific_brand):
    print(f"Generating report for brand: '{specific_brand}'")
    # Generate the natural language report for the specific brand
    specific_brand_df = analysis_df[analysis_df['Brand'] == specific_brand]
    if not specific_brand_df.empty:
        chat_response = gpt_call([
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"Please provide a detailed report on the brand '{specific_brand}', which was mentioned {specific_brand_df.iloc[0]['Total Mentions']} times with an average prominence of {specific_brand_df.iloc[0]['Average Prominence']}. Here are the contexts and their summaries: {specific_brand_df.iloc[0]['Contexts']} {specific_brand_df.iloc[0]['Context Summaries']}"}
        ])
        report = chat_response['choices'][0]['message']['content']
    else:
        report = f"The brand {specific_brand} was not mentioned in the responses."

    return report


def unify_similar_brands(df, threshold=90):
    brands = df['Brand'].unique()
    for brand in brands:
        matches = process.extract(brand, brands, limit=len(brands), scorer=fuzz.token_sort_ratio)
        similar_brands = [match[0] for match in matches if match[1] >= threshold and match[0] != brand]
        if similar_brands:
            for similar_brand in similar_brands:
                df['Brand'] = df['Brand'].replace(similar_brand, brand)
    return df


# Set your topic and the number of queries and responses
topic = 'Content Marketing Agency'
n_queries = 10
n_responses = 2

# Generate the queries
queries = generate_related_queries(topic, n_queries)

# Process each query, producing a DataFrame for each
df_list = [query_gpt(query, n_responses) for query in queries]

# Combine all the DataFrames into one
final_df = pd.concat(df_list, ignore_index=True)

# Unify similar brand names
final_df = unify_similar_brands(final_df)

# Specify the brand you want a detailed report for
specific_brand = 'Fractl'

# Perform the brand analysis
analysis_df = analyze_brands(final_df)

# Generate the report for the specific brand
brand_report = generate_report(analysis_df, specific_brand)

# Save final_df to an Excel file
final_df.to_excel("final_dataframe.xlsx", index=False)

# Save analysis_df to an Excel file
analysis_df.to_excel("analysis_dataframe.xlsx", index=False)

# Save the brand_report to a text file
with open("brand_report.txt", "w") as text_file:
    text_file.write(brand_report)

# Print the analysis DataFrame and the brand report
print(analysis_df)
print(brand_report)

# Print total number of GPT calls
print(f"Total GPT calls made: {gpt_call_counter}")



In [None]:
brand_report

In [None]:
analysis_df

In [None]:
final_df