In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raises an HTTPError for bad responses
    except requests.RequestException as e:
        print(f"Request failed: {e}")
        return pd.DataFrame()  # Return empty DataFrame on failure

    soup = BeautifulSoup(response.content, 'html.parser')
    data_list = []

    for row in soup.select('tr[data-row_id]'):
        columns = [col.get_text(strip=True) for col in row.find_all('td')]
        if len(columns) > 6:
            entry = {
                'Name': columns[0],
                'Website': columns[1],
                'Industry': columns[2],
                'Country': columns[3],
                'Funding Amount (USD)': columns[4],
                'Funding Type': columns[5],
                'Last Funding Date': columns[6],
            }
            data_list.append(entry)

    return pd.DataFrame(data_list)

# Dictionary of URLs categorized by location
urls = {
    'Location': {
        '': '',
    }
}

all_dataframes = []

# Extract and label data from each URL
for category, category_urls in urls.items():
    for key, url in category_urls.items():
        df = scrape(url)
        if not df.empty:
            df['Category'] = category
            df['Subcategory'] = key
            all_dataframes.append(df)

# Combine all dataframes
combined_df = pd.concat(all_dataframes, ignore_index=True).drop_duplicates(subset=['Name'])
combined_df = combined_df[combined_df['Country'] == 'United States']
combined_df = combined_df[combined_df['Industry'].str.contains('B2B', na=False)]
combined_df.drop(columns=['Country'], inplace=True)

combined_df['Last Funding Date'] = pd.to_datetime(combined_df['Last Funding Date'], errors='coerce', format='%b %Y')
combined_df.sort_values(by='Last Funding Date', ascending=False, inplace=True)

# Save to CSV
# combined_df.to_csv('Growthlist_Startups.csv', index=False)
combined_df.head(5)


Unnamed: 0,Name,Website,Industry,Funding Amount (USD),Funding Type,Last Funding Date,Category,Subcategory
0,Glui,www.glui.io,"Advertising, Marketing, B2B Software","$1,874,851",Seed,2024-03-01,Location,ATL
3,SmartPM Technologies,smartpmtech.com,"Data, Analytics, B2B Software","$5,499,977",Venture - Series Unknown,2024-03-01,Location,ATL
5,The Zero Proof,thezeroproof.com,"Food and Beverage, B2B Software",,Series A,2024-02-01,Location,ATL
6,Insignum AgTech,www.insignumagtech.com,"Agriculture, B2B Software","$600,000",Venture - Series Unknown,2024-01-01,Location,ATL
8,Canza Finance,canza.io,"Blockchain, Finance, B2B Software","$2,300,000",Seed,2024-01-01,Location,ATL


In [None]:
import json

#Function to write logs in a text file if any
# def write_log(message):
#     f = open("logs.txt", "a")
#     f.write(message)
#     f.close()

# Function to insrt data in Airtable
def insert_in_airtable():

    #Airtable configuration variables
    base_id = 
    table_id = 
    url = 
    url = 
    # url =

    try:
        # data to be inserted (fields is the json object of columns to be inserted)
        myobj = {
            "records": [
                {
                    "fields": {}
                }
            ]
        }

        auth_token=
        headers = {'Authorization': f'Bearer {auth_token}'}
        x = requests.post(url, json = myobj, headers=headers)
        #write_log(str(x.json()))
        #print("Record Inserted Successfully")
    #except Exception as e:
        #write_log(str(e))

# CAlling Function to insert data
insert_in_airtable()