<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# HubSpot - Update linkedinbio from google
<a href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/HubSpot/HubSpot_Update_linkedinbio_from_google.ipynb" target="_parent"><img src="https://naasai-public.s3.eu-west-3.amazonaws.com/open_in_naas.svg"/></a>

**Tags:** #hubspot #crm #sales #contact #naas_drivers #googlesearch #scheduler #naas #automation

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel/)

## Input

### Import library

In [None]:
from naas_drivers import hubspot
import naas
import pandas as pd
from googlesearch import search
import time
import re

### Setup your HubSpot
ðŸ‘‰ Access your [HubSpot API key](https://knowledge.hubspot.com/integrations/how-do-i-get-my-hubspot-api-key)

In [None]:
HS_API_KEY = 'YOUR_HUBSPOT_API_KEY'

### Setup Naas

In [None]:
naas.scheduler.add(cron="0 8 * * *")

# -> Uncomment the line below (by removing the hashtag) and execute this cell to remove your scheduler
# naas.scheduler.delete()

## Model

### Get all contacts in Hubspot

In [None]:
properties_list = [
    "hs_object_id",
    "firstname",
    "lastname",
    "linkedinbio",
]
hubspot_contacts = hubspot.connect(HS_API_KEY).contacts.get_all(properties_list)
hubspot_contacts

### Filter to get linkedinbio "Not Defined" and "firstname" and "lastname" defined

In [None]:
df_to_update = hubspot_contacts.copy()

# Cleaning
df_to_update = df_to_update.fillna("Not Defined")

# Filter on "Not defined"
df_to_update = df_to_update[(df_to_update.firstname != "Not Defined") & 
                            (df_to_update.lastname != "Not Defined") &
                            (df_to_update.linkedinbio == "Not Defined")].reset_index(drop=True)

df_to_update

### Search bio in Google with firstname and lastname

In [None]:
def get_bio(firstname, lastname):
    # Init linkedinbio
    linkedinbio = None
    
    # Create query
    query = f"{firstname}+{lastname}+Linkedin"
    print("Google query: ", query)
    
    # Search in Google
    for i in search(query, tld="com", num=10, stop=10, pause=2):
        pattern = "https:\/\/.+.linkedin.com\/in\/.([^?])+"
        result = re.search(pattern, i)

        # Return value if result is not None
        if result != None:
            linkedinbio = result.group(0).replace(" ", "")
            return linkedinbio
        else:
            time.sleep(2)
    return linkedinbio

In [None]:
for _, row in df_to_update.iterrows():
    firstname = row.firstname
    lastname = row.lastname
    
    # Get linkedinbio
    linkedinbio = get_bio(firstname, lastname)
    df_to_update.loc[_, "linkedinbio"] = linkedinbio
    
df_to_update

## Output

### Update linkedinbio in Hubspot

In [None]:
for _, row in df_to_update.iterrows():
    # Init data
    data = {}
    
    # Get data
    hs_object_id = row.hs_object_id
    linkedinbio = row.linkedinbio

    # Update LK Bio
    if linkedinbio != None:
        data = {"properties": {"linkedinbio": linkedinbio}}
    hubspot.connect(HS_API_KEY).contacts.patch(hs_object_id, data)