In [None]:
import pandas as pd
import requests
import dotenv # pip install python-dotenv
import os 


In [None]:
dotenv.load_dotenv()
# Constants
GOOGLE_CSE_ENDPOINT = "https://www.googleapis.com/customsearch/v1"
# Get the API key and Custom Search Engine ID from the environment
API_KEY = os.environ.get("API_KEY")
CSE_ID = os.environ.get("CSE_ID")

In [None]:
# Function to retrieve LinkedIn profile URL
def get_linkedin_url(name, organization):
    query = f"{name} {organization} LinkedIn site:linkedin.com"
    
    response = requests.get(GOOGLE_CSE_ENDPOINT, params={
        "q": query, 
        "key": API_KEY,
        "cx": CSE_ID
    })
    
    data = response.json()
    try:
        for item in data['items']:
            if 'linkedin.com/in/' in item['link']:
                return item['link']
    except KeyError:
        print(f"Error fetching LinkedIn URL for {name} at {organization}: {data.get('error', {}).get('message', 'Unknown error')}")

    return None  # Return None if no match found


# Function to retrieve organization's official website URL
def get_organization_website(organization):
    query = f"{organization} official website"
    
    response = requests.get(GOOGLE_CSE_ENDPOINT, params={
        "q": query, 
        "key": API_KEY,
        "cx": CSE_ID
    })
    
    data = response.json()
    try:
        return data['items'][0]['link']
    except KeyError:
        print(f"Error fetching website for {organization}: {data.get('error', {}).get('message', 'Unknown error')}")

    return None  # Return None if no match found

# Load the CSV file
df = pd.read_csv('contacts.csv')  # Replace with the path to your CSV file
9
# Iterate over each row in the DataFrame
for index, row in df.iterrows():
    linkedin_url = get_linkedin_url(row['Name'], row['Organization'])
    organization_website = get_organization_website(row['Organization'])
    
    # Only update the DataFrame if URLs are found
    if linkedin_url:
        df.at[index, 'Name'] = f'=HYPERLINK("{linkedin_url}", "{row["Name"]}")'
    if organization_website:
        df.at[index, 'Organization'] = f'=HYPERLINK("{organization_website}", "{row["Organization"]}")'

# Save the updated DataFrame to a new CSV file
%pip install --upgrade certifi
df.to_csv('contacts.csv', index=False)  # Replace 'updated_csv_file.csv' with your desired output filename

import pandas as pd
import requests
from bs4 import BeautifulSoup


# Function to retrieve organization's industry
def get_organization_industry(organization):
        # Query Google for the organization and retrieve identify the industry the organization belongs to, accurately 
    params = {
        "key": API_KEY,
        "cx": CSE_ID,
        "q": organization
    }

    # update the Industry column with the industry of the organization
    response = requests.get(GOOGLE_CSE_ENDPOINT, params=params)
    data = response.json()
    try:
        for item in data['items']:
            if 'wikipedia.org' in item['link']:
                url = item['link']
                break
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        industry = soup.find('th', text='Industry').find_next_sibling('td').text
        return industry
    except KeyError:
        print(f"Error fetching Industry for {organization}: {data.get('error', {}).get('message', 'Unknown error')}")
    except AttributeError:
        print(f"Error fetching Industry for {organization}: {data.get('error', {}).get('message', 'Unknown error')}")
    return None  # Return None if no match found

# Load the CSV file
df = pd.read_csv('contacts.csv')  # Replace with the path to your CSV file

# Iterate over each row in the DataFrame
for index, row in df.iterrows():
    industry = get_organization_industry(row['Organization'])
    # Only update the DataFrame if URLs are found
    if industry:
        df.at[index, 'Industry'] = industry

# Save the updated DataFrame to a new CSV file
df.to_csv('contacts.csv', index=False)  # Replace 'updated_csv_file.csv' with your desired output filename
