<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# HubSpot - Send targets with action items to Google Sheets
<a href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/HubSpot/HubSpot_Get_all_contacts.ipynb" target="_parent"><img src="https://naasai-public.s3.eu-west-3.amazonaws.com/Open_in_Naas_Lab.svg"/></a><br><br><a href="https://bit.ly/3JyWIk6">Give Feedback</a> | <a href="https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=bug&template=bug_report.md&title=HubSpot+-+Get+all+contacts:+Error+short+description">Bug report</a>

**Tags:** #hubspot #crm #sales #contact #naas_drivers #snippet #dataframe

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel/)

**Last update:** 2023-08-23 (Created: 2022-02-21)

**Description:** This notebook allows you to retrieve all contacts from HubSpot.

**References:**
- [HubSpot CRM API - Contacts](https://developers.hubspot.com/docs/api/crm/contacts)

## Input

### Import libraries

In [None]:
from naas_drivers import hubspot, gsheet
import naas
import pandas as pd
import requests
import openai

### Setup variables
**Mandatory**

[Get your HubSpot Access token](https://knowledge.hubspot.com/articles/kcs_article/integrations/how-do-i-get-my-hubspot-api-key)
- `hs_access_token`: This variable stores an access token used for accessing the HubSpot API.

**Optional**
- `contact_properties`: List of properties (hubspot internal names) you want to get from contact. By default, you will get: email, firstname, lastname, createdate, lastmodifieddate, hs_object_id. To list of contact properties, you can use this template: "HubSpot/HubSpot_List_contact_properties.ipynb" stored in https://github.com/jupyter-naas/awesome-notebooks.
- `limit`: Number of targets to retrieve

In [None]:
# Mandatory
hs_access_token = naas.secret.get("HS_ACCESS_TOKEN") or "YOUR_HS_ACCESS_TOKEN"
spreadsheet_url = naas.secret.get("SPREADSHEET_ID")
sheet_name = "Top50"
openai_api_key = naas.secret.get("OPENAI_API_KEY") or "YOUR_OPENAI_API_KEY"

# Optional
exclude_profiles = [
    "https://www.linkedin.com/in/ACoAABCNSioBW3YZHc2lBHVG0E_TXYWitQkmwog",
    "https://www.linkedin.com/in/ACoAAA6EYJABlJdZG2ZQLuLkpCu2Ny8pqa065b8",
    "https://www.linkedin.com/in/ACoAAAJHE7sB5OxuKHuzguZ9L6lfDHqw--cdnJg"
]
contact_properties = [
    "hs_object_id",
    "firstname",
    "lastname",
    'email',
    'linkedinbio',
    'jobtitle',
    "notes_last_updated",
    "naas_target",
    "icp_type",
    "score",
]
associations = ["notes", "emails", "meetings", "communications"]
limit = 50

## Model

### Get all contacts

In [None]:
df_contacts = hubspot.connect(hs_access_token).contacts.get_all(contact_properties)
print("Rows:", len(df_contacts))
df_contacts.head(1)

### Filter contacts to get targets

In [None]:
def get_targets(
    df_init,
    limit,
    contact_properties,
    exclude_profiles
):
    # Init
    df = df_init.copy()
    
    # Cleaning
    fillna = {
        "icp_type": "NotICP",
        "score": 0
    }
    df = df.fillna(fillna)
    df["fullname"] = df["firstname"].fillna("TBD") + " " + df["lastname"].fillna("TBD")
    
    # Filter contacts to only get targets
    df = df[
        ~(df["linkedinbio"].isin(exclude_profiles)) &
        (df["naas_target"].astype(str) == "Yes") &
        (df["icp_type"].astype(str) == "DataProducer") &
        (df["score"].astype(int) > 5)
    ].drop_duplicates("fullname")
    
    # Order column
    df = df[contact_properties]
    
    # Sort values
    df = df.sort_values(by="notes_last_updated", ascending=False)[:limit]
    return df.reset_index(drop=True)

df_targets = get_targets(df_contacts, limit, contact_properties, exclude_profiles)
print("Targets:", len(df_targets))
df_targets.head(5)

### Get contact details

In [None]:
def retrieve_object_details(
    token,
    object_id,
    object_type,
    properties=None,
):
    # Init
    data = []
    params = {
        "archived": "false"
    }
    
    # Requests
    if properties:
        params["properties"] = properties
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {token}"
    }
    url = f"https://api.hubapi.com/crm/v3/objects/{object_type}/{object_id}"
    
    # Response
    res = requests.get(url, headers=headers, params=params)
    if res.status_code == 200:
        data = res.json().get("properties")
    else:
        print(res.text)
    return pd.DataFrame([data])

def create_activity_df(
    token,
    object_id,
    activity,
    properties_dict=None,
):
    # Init
    properties = [x for x in properties_dict]
    
    # List activities
    df = retrieve_object_details(
        token,
        object_id,
        activity,
        properties
    )
    if len(df) > 0:
        df = df[properties]
        
    if len(df) > 0:
        df = df.rename(columns=properties_dict)
        if 'activity_type' not in df:
            df.insert(loc=1, column="activity_type", value=activity.upper())
    
    return df.reset_index(drop=True)

def get_contact_details(
    hs_access_token,
    contact_id,
    properties,
    associations
):
    # Init
    message = "CONTACT:\n"
    df = pd.DataFrame()
    
    # Get contact
    contact = hubspot.connect(hs_access_token).contacts.get(
        contact_id,
        hs_properties=properties,
        hs_associations=associations
    )
    
    # Get contact properties
    contact_properties = contact.get("properties")
    for p in properties:
        message = f"{message}- {p}: {contact_properties.get(p)}\n"
    
    # Get contact associations
    contact_associations = contact.get("associations")
    for a in contact_associations:
        results = contact_associations.get(a).get("results")
        for r in results:
            if a == "communications":
                properties_dict = {
                    "hs_object_id": "activity_hs_id",
                    "hs_lastmodifieddate": "activity_date",
                    "hs_communication_channel_type": "activity_type",
                    "hs_body_preview": "activity_content"
                }
            elif a == "meetings":
                properties_dict = {
                    "hs_object_id": "activity_hs_id",
                    "hs_lastmodifieddate": "activity_date",
                    "hs_meeting_title": "activity_content"
                }
            else:
                properties_dict = {
                    "hs_object_id": "activity_hs_id",
                    "hs_lastmodifieddate": "activity_date",
                    "hs_body_preview": "activity_content"
                }
            association_id = r.get("id")
            
            # Create activity df
            tmp_df = create_activity_df(
                hs_access_token,
                association_id,
                a,
                properties_dict
            )
            df = pd.concat([df, tmp_df])
    
    # Cleaning df
    if len(df) > 0:
        # Exclude empty or None value
        df = df[~(df["activity_content"].astype(str).isin(["None"]))]

        # Format date
        df["activity_date"] = pd.to_datetime(df["activity_date"]).dt.strftime("%Y-%m-%d %H:%M:%S")
        df = df.sort_values(by="activity_date").reset_index(drop=True)
            
    # Create activity message
    message = f"{message}\nACTIVITIES:\n"
    for row in df.itertuples():
        activity_date = row.activity_date
        activity_type = row.activity_type
        activity_content = row.activity_content.replace("\xa0\u200c", "")
        message = f"{message}-{activity_date}: {activity_type} - {activity_content}\n"
    return message, df.reset_index(drop=True)

def action_item_extraction(openai_api_key, transcription):
    # Connect to openai
    openai.api_key = openai_api_key
    
    response = openai.ChatCompletion.create(
        model="gpt-4",
        temperature=0,
        messages=[
            {
                "role": "system",
                "content": "You are an AI expert in analyzing conversations and extracting action items. Please review the text and identify any tasks, assignments, or actions that were agreed upon or mentioned as needing to be done. These could be tasks assigned to specific individuals, or general actions that the group has decided to take. Please list these action items clearly and concisely."
            },
            {
                "role": "user",
                "content": transcription
            }
        ]
    )
    return response['choices'][0]['message']['content']

for row in df_targets.itertuples():
    index = row.Index
    firstname = row.firstname
    lastname = row.lastname
    contact_id = row.hs_object_id
    print(index+1, f"Starting with: {firstname} {lastname} ({contact_id})")
    prompt_message, df_activity = get_contact_details(
        hs_access_token,
        contact_id,
        contact_properties,
        associations
    )
#     print(prompt_message)
    activity_types = df_activity["activity_type"].unique().tolist()
    # Rules if only notes
    if len(activity_types) == 1 and "NOTES" in activity_types:
        action_items = 'No action items can be identified from the provided text. The text mainly consists of LinkedIn interactions such as likes and comments, but does not specify any tasks, assignments, or actions that need to be done.'
    else:
        action_items = action_item_extraction(openai_api_key, prompt_message)
    print(action_items)
    df_targets.loc[index, "ACTION_ITEMS"] = action_items

## Output

### Send data to Google Sheets spreadsheet

In [None]:
gsheet.connect(spreadsheet_url).send(
    sheet_name=sheet_name,
    data=df_targets,
    append=False
)

### Add scheduler

In [None]:
naas.scheduler.add(cron=cron)

# naas.scheduler.delete()