In [2]:
!pip install wikipedia
!pip insall pydantic

Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wikipedia
  Building wheel for wikipedia (setup.py) ... [?25l[?25hdone
  Created wheel for wikipedia: filename=wikipedia-1.4.0-py3-none-any.whl size=11679 sha256=4369fcf535ca418563910a3b380a154ba18d850205ba9c456311a34b8305da96
  Stored in directory: /root/.cache/pip/wheels/8f/ab/cb/45ccc40522d3a1c41e1d2ad53b8f33a62f394011ec38cd71c6
Successfully built wikipedia
Installing collected packages: wikipedia
Successfully installed wikipedia-1.4.0
ERROR: unknown command "insall" - maybe you meant "install"


In [7]:
from pydantic import BaseModel
import wikipedia
import re

def extract_info(page_content: str, keyword: str) -> str:
    """Extract relevant information based on keyword search."""
    pattern = re.compile(rf'({keyword}.*?)\n', re.IGNORECASE)
    match = pattern.search(page_content)
    return match.group(1) if match else "Not Available"

class InstitutionInfo(BaseModel):
    name: str
    founder: str
    founded_year: str
    branches: str
    employees: str
    summary: str

def fetch_institution_details(institution_name: str) -> InstitutionInfo:
    try:
        page = wikipedia.page(institution_name)
        content = page.content

        founder = extract_info(content, "Founder")
        founded_year = extract_info(content, "Founded")
        branches = extract_info(content, "Campuses|Branches|Locations")
        employees = extract_info(content, "Employees|Staff")
        summary = " ".join(page.summary.split(".")[:2])  # First two sentences

        return InstitutionInfo(
            name=institution_name,
            founder=founder,
            founded_year=founded_year,
            branches=branches,
            employees=employees,
            summary=summary
        )
    except wikipedia.exceptions.PageError:
        return InstitutionInfo(
            name=institution_name,
            founder="Not Available",
            founded_year="Not Available",
            branches="Not Available",
            employees="Not Available",
            summary="No information found on Wikipedia."
        )

if __name__ == "__main__":
    institution_name = input("Enter Institution Name: ")
    result = fetch_institution_details(institution_name)
    print(result.model_dump_json(indent=4))



Enter Institution Name: Birla Institute of Technology and Science Pilani
{
    "name": "Birla Institute of Technology and Science Pilani",
    "founder": "founder, G.D. Birla, was chancellor from the college's inception until his death in 1983. He was followed by his son, Krishna Kumar Birla, who was chancellor until his death in 2008. Currently, Kumar Mangalam Birla is chancellor and Shobhana Bhartia is pro-chancellor.",
    "founded_year": "Founded in 1929, the Birla Education Trust established an intermediate college, followed by the addition of Science and Pharmacy colleges in 1943 and 1950 respectively. The Birla College of Engineering, offering degree programs in electrical and mechanical engineering was started in 1946. The master's program in electronics was introduced in 1955. In 1964, the Birla Colleges of Science, Engineering and Pharmacy were incorporated to form the Birla Institute of Technology and Science (BITS).",
    "branches": "Not Available",
    "employees": "Not A