<img width="8%" alt="Google Search.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/Google%20Search.png" style="border-radius: 15%">

# Google Search - Search and Download LinkedIn logo from company names
<a href="https://bit.ly/3JyWIk6">Give Feedback</a> | <a href="https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=bug&template=bug_report.md&title=Google+Search+-+Search+and+Download+LinkedIn+logo+from+company+names:+Error+short+description">Bug report</a>

**Tags:** #googlesearch #snippet #operations #url #linkedin #logo #image #download

**Author:** [Florent Ravenel](https://www.linkedin.com/in/ACoAABCNSioBW3YZHc2lBHVG0E_TXYWitQkmwog/)

**Last update:** 2023-11-06 (Created: 2023-11-06)

**Description:** This notebook is designed to search and download the logo of a list of companies The logo that will be downloaded is the one associated with the company's LinkedIn page. In case the logo is not available on the LinkedIn company page, the notebook will not retrieve any logo.

## Input

### Import libraries

In [None]:
try:
    from googlesearch import search
except:
    !pip install google
    from googlesearch import search
import re
import naas
from naas_drivers import linkedin
import requests
import os

### Setup variables
**Mandatory**

[Learn how to get your cookies on LinkedIn](https://www.notion.so/LinkedIn-driver-Get-your-cookies-d20a8e7e508e42af8a5b52e33f3dba75)
- `li_at`: Cookie used to authenticate Members and API clients.
- `JSESSIONID`: Cookie used for Cross Site Request Forgery (CSRF) protection and URL signature validation.
- `companies`: List of companies

**Optional**
- `output_dir`: Output directory

In [None]:
# Mandatory
li_at = naas.secret.get("LINKEDIN_LI_AT") or "YOUR_LINKEDIN_LI_AT" #example: AQFAzQN_PLPR4wAAAXc-FCKmgiMit5FLdY1af3-2
JSESSIONID = naas.secret.get("LINKEDIN_JSESSIONID") or "YOUR_LINKEDIN_JSESSIONID" #example: ajax:8379907400220387585
companies = ["Naas.ai"]

# Optional
output_dir = "logos"

## Model

### Get logos already in output directory

In [None]:
os.makedirs(output_dir, exist_ok=True)
logos = os.listdir(output_dir)
print(logos)

## Output

### Search and Download logos

In [None]:
def get_linkedin_url(
    company
):
    # Init
    linkedin_url = None

    # Create query
    query = f"{company}+Linkedin"
    print("Google query: ", query)

    # Search in Google
    for i in search(query, tld="com", num=10, stop=10, pause=2):
        pattern = "https:\/\/.+.linkedin.com\/company\/.([^?])+"
        result = re.search(pattern, i)

        # Return value if result is not None
        if result != None:
            linkedin_url = result.group(0).replace(" ", "")
            return linkedin_url
    return linkedin_url

def dowload_image(
    image_url, 
    image_path
):
    # Send a GET request to the image URL
    response = requests.get(image_url)

    # Check if the request was successful
    if response.status_code == 200:
        # Get the content of the response
        image_content = response.content

        # Save the image to a file
        with open(image_path, "wb") as file:
            file.write(image_content)
        print("✅ Image downloaded successfully.")
    else:
        print(f"Failed to download image. Error: {response.text}")

for x in companies:
    print("- ", x)
    file_name = x + ".png"
    if file_name not in logos:
        linkedin_url = get_linkedin_url(x)
        print("Company URL:", linkedin_url)
        if linkedin_url:
            df = linkedin.connect(li_at, JSESSIONID).company.get_info(linkedin_url)
            logo_url = df.loc[0, "LOGO_URL"]
            if str(logo_url) != "None":
                file_path = os.path.join(output_dir, file_name)
                dowload_image(logo_url, file_path)
            else:
                print("Logo does not exists in LinkedIn")
    else:
        print("✅ Logo already exists")