<img width="8%" alt="Google Sheets.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/Google%20Sheets.png" style="border-radius: 15%">

# Google Sheets - Update contacts view

**Tags:** #googlesheets #gsheet #data #naas_drivers #growth #people #organizations #contacts

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel/)

**Description:** This notebook updates your contacts view in Google Sheets.

## Input

### Import libraries

In [None]:
from naas_drivers import gsheet, linkedin
import pandas as pd
import os
from datetime import date, datetime
import naas_data_product

### Setup variables
**Inputs**
- `entity_dir`: This variable represents the entity directory.
- `input_dir`: Input directory to retrieve file from.
- `file_people`: Name of the file storing people data to be retrieved.
- `file_organizations`: Name of the file storing organization data to be retrieved.
- `spreadsheet_url`: Google Sheets spreadsheet URL.
- `sheet_contacts`: Google Sheets sheet name storing leads profiles.

**Outputs**
- `output_dir`: Output directory to save file to.
- `file_contacts`: Output file name to save as picke.

In [None]:
# Inputs
entity_dir = pload(os.path.join(naas_data_product.OUTPUTS_PATH, "entities", "0"), "entity_dir")
input_dir = os.path.join(entity_dir, "growth-engine", date.today().isoformat())
file_people = "people"
file_organizations = "organizations"
spreadsheet_url = pload(os.path.join(naas_data_product.OUTPUTS_PATH, "entities", "0"), "abi_spreadsheet")
sheet_contacts = "CONTACTS"
api_key = os.environ.get("NAAS_API_TOKEN") or naas.secret.get('NAAS_API_TOKEN')
entity_name = pload(os.path.join(naas_data_product.OUTPUTS_PATH, "entities", "0"), "entity_name")
linkedin_url = pload(os.path.join(naas_data_product.OUTPUTS_PATH, "entities", "0"), "linkedin_url")
datalake_dir = naas_data_product.OUTPUTS_PATH

# Outputs
output_dir = os.path.join(entity_dir, "growth-engine", date.today().isoformat())
file_contacts = "contacts"

## Model

### Get contacts

In [None]:
df_init = gsheet.connect(spreadsheet_url).get(sheet_name=sheet_contacts)
if not isinstance(df_init, pd.DataFrame):
    df_init = pd.DataFrame()
    messaging_options = {}
else:
    messaging_options = get_dict_from_df(df_init, "MESSAGING_OPTIONS", "PEOPLE_PROFILE_URL", "messaging_options", output_dir)
print("- Contacts (init):", len(df_init))
df_init.head(3)

### Get people

In [None]:
df_people = pload(input_dir, file_people)    
print('- People:', len(df_people))
df_people.head(1)

### Get organizations

In [None]:
df_organizations = pload(input_dir, file_organizations)    
print('- Organizations:', len(df_organizations))
df_organizations.head(1)

### Get entity organization

In [None]:
def get_entity_org(
    linkedin_url,
    datalake_dir
):
    # Init
    organization = None
    
    # Get data
    if "/in/" in linkedin_url:
        linkedin_dir = os.path.join(datalake_dir, "datalake", "linkedin", "profiles")
        df = get_linkedin_data(linkedin_url, linkedin_dir, "top_card")
    else:
        linkedin_dir = os.path.join(datalake_dir, "datalake", "linkedin", "organizations")  
        df = get_linkedin_data(linkedin_url, linkedin_dir, "company_info")
    if len(df) > 0:
        organization = df.loc[0, "COMPANY_NAME"]
    return organization

organization = get_entity_org(linkedin_url, datalake_dir)
print("- Entity URL:", linkedin_url)
print("- Organization:", organization)

### Get prompt sales messagings

In [None]:
df_gsheet = gsheet.connect(spreadsheet_url).get(sheet_name="ENTITY").fillna("NA")
df_gsheet = df_gsheet[df_gsheet["LINKEDIN_URL"] == linkedin_url].reset_index(drop=True)
prompt_sales_messagings = df_gsheet.loc[0, "PROMPT_SALES_MESSAGINGS"]
print("- Prompt sales messagings:", prompt_sales_messagings)

### Create contact view

In [None]:
def create_contacts_view(
    df_people,
    df_organizations,
    api_key,
    prompt_sales_messagings,
    messaging_options,
    entity_name,
    limit_llm=50
):
    # People
    df1 = df_people.copy()
    df1.loc[:, "LEAD_STATUS"] = "Lead"
    df1.loc[df1["MQL_DATE"] != "NA", "LEAD_STATUS"] = "Marketing Qualified Lead"
    df1.loc[df1["SQL_DATE"] != "NA", "LEAD_STATUS"] = "Sales Qualified Lead"
    to_keep = [
        "ENTITY",
        "SCENARIO",
        'FULLNAME',
        'OCCUPATION',
        'SENIORITY',
        'DEPARTMENT',
        'INTERACTION_SCORE',
        "LEAD_STATUS",
        'CREATED_DATE',
        'MQL_DATE',
        'SQL_DATE',
        'LAST_INTERACTION_DATE',
        'NOTES',
        'PROFILE_URL',
        'CRM_CONTACT_ID',
        'ORGANIZATION',
    ]
    df1 = df1[to_keep]
    for c in to_keep:
        if "DATE" not in c and c not in ["ENTITY", "SCENARIO", "LEAD_STATUS", "ORGANIZATION", "CRM_CONTACT_ID"]:
            df1 = df1.rename(columns={c: f"PEOPLE_{c}"})
    
    # Organizations
    df2 = df_organizations.copy()
    to_keep = [
        'ORGANIZATION',
        'INDUSTRY',
        'CITY',
        'COUNTRY',
        'STAFF_RANGE_NAME',
        'STAFF_COUNT',
        'FOLLOWERS_COUNT',
        'INTERACTION_SCORE',
        'WEBSITE',
        'TAGLINE',
        'DESCRIPTION',
        'ORG_NAME',
        'ORG_LINKEDIN_ID',
        'ORG_LINKEDIN_URL',
        'CRM_ORG_ID'
    ]
    df2 = df2[to_keep]
    for c in to_keep:
        if not c.startswith("ORG") and c not in ["CRM_ORG_ID"]:
            df2 = df2.rename(columns={c: f"ORG_{c}"})
    
    # Merge dfs
    df = pd.merge(df1, df2, how="left").fillna('NA')
    
    # Exclude entity org
    df = df[df["ORGANIZATION"] != organization].reset_index(drop=True)
    
    # Add messagings options to df
    df["MESSAGING_OPTIONS"] = df["PEOPLE_PROFILE_URL"].map(messaging_options).fillna('TBD')
    
    # Generate messaging options for SQL
    col_messaging = [
        "PEOPLE_PROFILE_URL",
        "LAST_INTERACTION_DATE",
        "PEOPLE_FULLNAME",
        "PEOPLE_OCCUPATION",
        "PEOPLE_SENIORITY",
        "PEOPLE_DEPARTMENT",
        "PEOPLE_NOTES",
        "ORG_INDUSTRY",
        "ORG_COUNTRY",
    ]
    data = df[col_messaging]
    prompt_messaging = prompt_sales_messagings + """
    The options should be presented as follow: 1.xxxx, 2.xxxx, 3.xxxx,
    The data you will received will be structured as follow:
    - LAST_INTERACTION_DATE: last interaction date with content published by [ENTITY]
    - PEOPLE_FULLNAME: Name of the person
    - PEOPLE_OCCUPATION: Job position
    - PEOPLE_SENIORITY: Job seniority
    - PEOPLE_DEPARTMENT: Job department
    - PEOPLE_NOTES: Interactions made by the person on the content published
    - ORGANIZATION: Name of the organization the person is working with
    - ORG_INDUSTRY: Company industry
    """
    prompt_messaging = prompt_messaging.replace("[ENTITY]", entity_name)
    
    filter_df = df[
        (df["MESSAGING_OPTIONS"] == "TBD") &
        (df["LEAD_STATUS"] == "Sales Qualified Lead") & 
        (~df["PEOPLE_PROFILE_URL"].isin(list(messaging_options.keys()))) &
        (df["ENTITY"].str.contains(entity_name))
    ]
    filter_df = filter_df.sort_values(by="LAST_INTERACTION_DATE", ascending=False)
    print("-> New Sales Qualified Leads:", len(filter_df))
    
    count = 1
    for row in filter_df.itertuples():
        index = row.Index
        fullname = row.PEOPLE_FULLNAME
        profile_url = row.PEOPLE_PROFILE_URL
        lead_status = row.LEAD_STATUS
        messaging = "TBD"
        if count <= limit_llm:
            print(f"{count} - Messaging for:", fullname)
            tmp_df = data[data["PEOPLE_PROFILE_URL"] == profile_url].reset_index(drop=True)
            try:
                messaging = create_chat_completion(api_key, prompt_messaging, str(tmp_df.to_dict()))
            except Exception as e:
                print(e)
            print("Messaging options:", messaging)
            print()
            messaging_options[profile_url] = messaging
            pdump(output_dir, messaging_options, "messaging_options")
            df.loc[index, "MESSAGING_OPTIONS"] = messaging
            count += 1
    return df.reset_index(drop=True)

df_contacts = create_contacts_view(
    df_people,
    df_organizations,
    api_key,
    prompt_sales_messagings,
    messaging_options,
    entity_name
)
df_contacts

## Output

### Save data

In [None]:
pdump(output_dir, df_contacts, file_contacts)

### Send "Contacts" to spreadsheet

In [None]:
df_check = pd.concat([df_init.astype(str), df_contacts.astype(str)]).drop_duplicates(keep=False)
if len(df_check) > 0:
    gsheet.connect(spreadsheet_url).send(data=df_contacts, sheet_name=sheet_contacts, append=False)
else:
    print("Noting to update in Google Sheets!")