<a href="https://colab.research.google.com/github/riddhi365/NGO_Grant_DataCollector/blob/main/NGOBox.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import urllib3
from urllib.parse import urljoin
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


In [None]:
# Function to extract Product Title
def get_title(soup):
    try:
        # Try common places
        title_tag = soup.find("h1", class_="card-title")
        if not title_tag:
            # Fall back: first h1 on the page
            title_tag = soup.find("h1")
        if title_tag:
            return title_tag.text.strip()
    except:
        pass
    return ""

# Function to extract organisation name
def extract_organization(soup):
    try:
        org_raw = soup.find("h4", class_="card-title").text.strip()
        return org_raw.replace("Organization: ", "").strip()
    except:
        return ""

# Function to extract deadline(date)
def extract_date(soup):
    try:
        deadline_div = soup.find("h2", class_="card-text").text.strip()
        return deadline_div.replace("Apply By: ","").strip()
    except:
        return ""

# Function to extract Number of User Reviews
def extract_amount(soup):
    try:
        for p in soup.find_all("p"):
            if "Grant Amount:" in p.text:
                return p.text.replace("Grant Amount:", "").strip()
        return "Not mentioned"
    except:
        return "Not mentioned"


In [None]:
if __name__ == '__main__':

    HEADERS = ({
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US, en;q=0.5'
    })

    URL = "https://ngobox.org/grant_announcement_listing.php"
    webpage = requests.get(URL, headers=HEADERS, verify=False)
    soup = BeautifulSoup(webpage.content, "html.parser")

    links = soup.find_all("a", attrs={'class': 'card-title'})
    links_list = [urljoin("https://www.ngobox.org", link.get('href')) for link in links]

    d = {"title": [], "organisation": [], "Deadline": [], "Amount": [], "Link": []}

    for link in links_list:
        try:
            new_webpage = requests.get(link, headers=HEADERS, verify=False)
            new_soup = BeautifulSoup(new_webpage.content, "html.parser")

            d['title'].append(get_title(new_soup))
            d['organisation'].append(extract_organization(new_soup))
            d['Deadline'].append(extract_date(new_soup))
            d['Amount'].append(extract_amount(new_soup))
            d['Link'].append(link)

        except Exception as e:
            print(f"❌ Error scraping {link}: {e}")
            d['title'].append("")
            d['organisation'].append("")
            d['Deadline'].append("")
            d['Amount'].append("")
            d['Link'].append(link)

    amazon_df = pd.DataFrame.from_dict(d)
    amazon_df['title'].replace('', np.nan, inplace=True)
    amazon_df = amazon_df.dropna(subset=['title'])
    amazon_df.to_csv("amazon_data.csv", header=True, index=False)
    print("\n✅ CSV saved as 'amazon_data.csv'")


✅ CSV saved as 'amazon_data.csv'


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  amazon_df['title'].replace('', np.nan, inplace=True)


In [None]:
# Print each entry
for i in range(len(d["title"])):
    print(f"\nGrant {i+1}")
    print(f"Title       : {d['title'][i]}")
    print(f"Organization: {d['organisation'][i]}")
    print(f"Deadline    : {d['Deadline'][i]}")
    print(f"Amount      : {d['Amount'][i]}")
    print(f"Link        : {d['Link'][i]}")
    print("-" * 60)

# Save to CSV
df = pd.DataFrame(d)
df.to_csv("grants_ngobox.csv", index=False)
print("\n✅ All grants printed and saved to grants_ngobox.csv")


Grant 1
Title       : Applications Open for HCLTech Grant Edition XI
Organization: HCLTech Grant India
Deadline    : 25 Jun  2025
Amount      : 240000000 INR
Link        : https://www.ngobox.org/full_grant_announcement_Applications-Open-for-HCLTech-Grant-Edition-XI-HCLTech-Grant-India_12911
------------------------------------------------------------

Grant 2
Title       : Applications Invited for GATE Membership Grants
Organization: Global Action for Trans Equality (GATE)
Deadline    : 30 Jun  2025
Amount      : 20000 USD
Link        : https://www.ngobox.org/full_grant_announcement_Applications-Invited-for-GATE-Membership-Grants-Global-Action-for-Trans-Equality-(GATE)-_12935
------------------------------------------------------------

Grant 3
Title       : Applications Invited for Innovation Challenge Version 2: Remote-Sensing Water Quality with Satellites
Organization: Fish Welfare Initiative (FWI)
Deadline    : 20 Aug  2025
Amount      : 100000 USD
Link        : https://www.ngobox