<img width="8%" alt="Google Sheets.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/Google%20Sheets.png" style="border-radius: 15%">

# Google Sheets - Update sales database

**Tags:** #googlesheets #gsheet #data #naas_drivers #sales-engine #automation

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel/)

**Description:** This notebook updates "Sales" database.

## Input

### Import libraries

In [None]:
from naas_drivers import gsheet
import pandas as pd
import os
from datetime import date
import naas_data_product
try:
    import openai
except:
    !pip install openai --user
    import openai
import time

### Setup variables
**Inputs**
- `openai_api_key`: OpenAI API Key.
- `spreadsheet_url`: Google Sheets spreadsheet URL.
- `sheet_name_input`: Google Sheets sheet name storing growth data.
- `leads_companies_name`: Google Sheets sheet name storing leads profiles.
- `leads_profiles_name`: Google Sheets sheet name storing leads profiles.
- `prompt_sales_messagings`: Prompt for sales messagings

**Outputs**
- `output_dir`: Output directory to save file to.
- `file_sales`: Output file name to save as picke.
- `sheet_name_output`: Google Sheets sheet name storing sales qualified leads.

In [None]:
# Inputs
openai_api_key = naas.secret.get("OPENAI_API_KEY")
spreadsheet_url = naas.secret.get("ABI_SPREADSHEET") or "YOUR_GOOGLE_SPREADSHEET_URL"
sheet_name_input = "GROWTH"
leads_profiles_name = "LEADS"
leads_companies_name = "LEADS_COMPANIES"
prompt_sales_messagings = pload(os.path.join(naas_data_product.OUTPUTS_PATH, "entity"), "prompt_sales_messagings") or "YOUR_PROMPT_SALES"

# Outputs
output_dir = os.path.join(naas_data_product.OUTPUTS_PATH, "sales-engine", date.today().isoformat())
file_sales = "sales"
sheet_name_output = "SALES"

## Model

### Get growth data

In [None]:
df_input = gsheet.connect(spreadsheet_url).get(sheet_name=sheet_name_input)
print("Rows:", len(df_input))
df_input.head(3)

### Get leads

In [None]:
df_leads = gsheet.connect(spreadsheet_url).get(sheet_name=leads_profiles_name)
if not isinstance(df_leads, pd.DataFrame):
    df_leads = pd.DataFrame()
print("- Existing Leads:", len(df_leads))
# df_leads.head(3)

### Get leads companies

In [None]:
df_leads_companies = gsheet.connect(spreadsheet_url).get(sheet_name=leads_companies_name)
if not isinstance(df_leads_companies, pd.DataFrame):
    df_leads_companies = pd.DataFrame()
print("Leads Companies:", len(df_leads_companies))
# df_leads_companies.head(3)

### Create sales database

In [None]:
def create_db_sales(
    df_input,
    df_leads,
    df_companies,
):
    # Init
    df = df_input.copy()
    
    # Groupby
    to_group = [
        "ENTITY",
        "SCENARIO",
        "LAST_INTERACTION_DATE",
        "INTERACTIONS",
        "PROFILE_URL",
    ]
    to_agg = {
        "INTERACTION_SCORE": "sum"
    }
    df = df.groupby(to_group, as_index=False).agg(to_agg)
    df = df.sort_values(by=["SCENARIO"], ascending=[True]).reset_index(drop=True)

    # Cumsum
    df["INTERACTION_CUM"] = df.groupby("PROFILE_URL", as_index=False)["INTERACTION_SCORE"].cumsum()
    
    # Enrich with leads data
    cols_leads = [
        "PROFILE_URL",
        "FULLNAME",
        "OCCUPATION",
        "ICP",
        "CRM_CONTACT",
        "COMPANY",
    ]
    df_leads = df_leads[cols_leads]
    df = pd.merge(df, df_leads, how="left")
    df = df[(df["COMPANY"] != "TBD")].reset_index(drop=True)

    # Enrich with leads companies data
    col_companies = [
        "COMPANY", 
        "COMPANY_NAME", 
        "INDUSTRY", 
        "COUNTRY",
        "STAFF_RANGE_NAME",
        "STAFF_COUNT", 
        "CRM_COMPANY"
    ]
    df_companies = df_companies[col_companies]
    df = pd.merge(df, df_companies, how="left", left_on="COMPANY", right_on="COMPANY").fillna("NA")
    
    # Filter data
    df = df[
        (df["INTERACTION_CUM"] >= 3) &
        (df["COMPANY_NAME"] != "NA") &
        (~df["ICP"].isin(["NotICP", "NA"])) & 
        (df["CRM_CONTACT"].astype(str) == "False") & 
        (df["CRM_COMPANY"].astype(str) == "False")
    ].drop_duplicates("PROFILE_URL")

    # Cleaning
    to_order = [
        "ENTITY",
        "SCENARIO",
        "LAST_INTERACTION_DATE",
        "FULLNAME",
        "OCCUPATION",
        "ICP",
        "COMPANY_NAME",
        "INDUSTRY",
        "COUNTRY",
        "STAFF_RANGE_NAME",
        "STAFF_COUNT",
        "INTERACTION_CUM",
        "INTERACTIONS",
        "PROFILE_URL",
    ]
    to_rename = {
        "LAST_INTERACTION_DATE": "DATE",
        "INTERACTION_CUM": "INTERACTION_SCORE"
    }
    df = df[to_order].rename(columns=to_rename)
    df = df.sort_values(by=["SCENARIO", "DATE", "INTERACTION_SCORE"], ascending=[False, False, False])
    return df.reset_index(drop=True)

db_sales = create_db_sales(
    df_input,
    df_leads,
    df_leads_companies
)
print("- Sales Qualified leads:", len(db_sales))
# db_sales.head(3)

### Create messaging options

In [None]:
df = db_sales.copy()

# Init df to be used in prompt
data = df.drop(["ENTITY", "SCENARIO", "INTERACTION_SCORE"], axis=1)
entity = df.loc[0, "ENTITY"]
    
prompt_messaging = prompt_sales_messagings + """
The options should be presented as follow: 1.xxxx, 2.xxxx, 3.xxxx,
The data you will received will be structured as follow:
- DATE: last interaction date with content published by [ENTITY]
- FULLNAME: Name of the person
- OCCUPATION: Job position
- ICP: Customer profile for sales team
- COMPANY_NAME: Name of the company of the person
- INDUSTRY: Company industry
 -COUNTRY: Company country
- STAFF_RANGE_NAME: Company size
- STAFF_COUNT: Number of employees of the company
- INTERACTIONS: Interactions made by the person on the content published
"""
prompt_messaging = prompt_messaging.replace("[ENTITY]", entity)

# Add messagings options to df
if not "MESSAGING_OPTIONS" in df.columns:
    df["MESSAGING_OPTIONS"] = "TBD"
    
# Get existing messagings
df_sales = pload(output_dir, file_sales)
messagings = pload(output_dir, "messagings")
if messagings is None and df_sales is not None:
    messagings = df_sales[df_sales["MESSAGING_OPTIONS"] != "TBD"].set_index('PROFILE_URL')['MESSAGING_OPTIONS'].to_dict()
    pdump(output_dir, messagings, "messagings")
elif messagings is None and df_sales is None:
    messagings = {}
    
# Apply messagings to profiles
for row in df.itertuples():
    index = row.Index
    fullname = row.FULLNAME
    profile_url = row.PROFILE_URL
    if profile_url not in messagings:
        print(f"{index} - Messaging for:", fullname)
        tmp_df = data[data["PROFILE_URL"] == profile_url].reset_index(drop=True)
        messaging = create_chat_completion(openai_api_key, prompt_messaging, str(tmp_df.to_dict()))
        messagings[profile_url] = messaging
        pdump(output_dir, messagings, "messagings")
    else:
        messaging = messagings.get(profile_url)
    df.loc[index, "MESSAGING_OPTIONS"] = messaging

# Display result
print("Rows:", len(df))
df.head(3)

## Output

### Save data

In [None]:
pdump(output_dir, df, file_sales)

### Send data to Google Sheets spreadsheet

In [None]:
gsheet.connect(spreadsheet_url).send(sheet_name=sheet_name_output, data=df, append=False)