In [3]:
pip install requests beautifulsoup4 pandas


Note: you may need to restart the kernel to use updated packages.


In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd



In [9]:
# Fetch the webpage

url = "https://en.wikipedia.org/wiki/List_of_mountains_in_India"
response = requests.get(url)
response.raise_for_status()  # Ensure we got a valid response
print(response)
print(url)

<Response [200]>
https://en.wikipedia.org/wiki/List_of_mountains_in_India


In [10]:
# Parse the HTML
soup = BeautifulSoup(response.content, 'html.parser')
print(soup)

<!DOCTYPE html>

<html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-enabled vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-disabled skin-theme-clientpref-day vector-toc-available" dir="ltr" lang="en">
<head>
<meta charset="utf-8"/>
<title>List of mountains in India - Wikipedia</title>
<script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled ve

In [11]:
# Find the table
table = soup.find('table', class_='wikitable')
print(table)

<table class="wikitable sortable sort-under-center col1center col2center col5center col4center col7center hover-highlight">
<caption><big>Summits of India with at least 500 meters of topographic prominence</big><br/><br/>
</caption>
<tbody><tr>
<th colspan="2">Ranks
</th>
<th rowspan="2">Name  / Short name (if applicable)
</th>
<th colspan="2">Height
</th>
<th rowspan="2">Range
</th>
<th rowspan="2">Prominence (m)<sup class="reference" id="cite_ref-1"><a href="#cite_note-1">[a]</a></sup><sup class="reference" id="cite_ref-2"><a href="#cite_note-2">[b]</a></sup>
</th>
<th class="unsortable" rowspan="2">Coordinates
</th>
<th rowspan="2">State
</th></tr>
<tr>
<th>National
</th>
<th>Global
</th>
<th>In meter <sup class="reference" id="cite_ref-3"><a href="#cite_note-3">[c]</a></sup>
</th>
<th>In ft
</th></tr>
<tr>
<td>1
</td>
<td>3
</td>
<td><a href="/wiki/Kangchenjunga" title="Kangchenjunga">Kangchenjunga</a>
</td>
<td align="right">8,586
</td>
<td align="right">28,169
</td>
<td><a href="

In [13]:
# Extract headers (limit to 4 columns)
headers = []
for th in table.find_all('th')[:4]:  # Limit to 4 columns
    headers.append(th.get_text(strip=True))

print(headers)


['Ranks', 'Name  / Short name (if applicable)', 'Height', 'Range']


In [14]:
# Extract rows (limit to 25 rows and 4 columns)
rows = []
for tr in table.find_all('tr')[1:26]:  # Limit to 25 rows
    cells = tr.find_all('td')[:4]  # Limit to 4 columns
    row = [cell.get_text(strip=True) for cell in cells]
    rows.append(row)
print(rows)

[[], ['1', '3', 'Kangchenjunga', '8,586'], ['2', '23', 'Nanda Devi', '7,816'], ['3', '29', 'Kamet', '7,756'], ['4', '31', 'Saltoro Kangri/ K10', '7,742'], ['5', '35', 'Saser KangriI / K22', '7,672'], ['6', '48', 'Mamostong Kangri/ K35', '7,516'], ['7', '49', 'Saser KangriII E', '7,513'], ['8', '51', 'Saser KangriIII', '7,495'], ['9', '56', 'Teram KangriI', '7,462'], ['10', '57', 'Jongsong Peak', '7,462'], ['11', '61', 'K12', '7,428'], ['12', '65', 'KabruN', '7,412'], ['13', '69', 'Ghent Kangri', '7,401'], ['14', '71', 'Rimo I', '7,385'], ['15', '73', 'Teram Kangri III', '7,382'], ['16', '76', 'Kirat Chuli', '7,362'], ['17', '92', 'Mana Peak', '7,272'], ['18', '96', 'Apsarasas Kangri', '7,245'], ['19', '97', 'Mukut Parbat', '7,242'], ['20', '98', 'Rimo III', '7,233'], ['21', '108', 'Singhi Kangri', '7,202'], ['22', '', 'Hardeol', '7,161'], ['23', '', 'Chaukhamba I', '7,138'], ['24', '', 'Nun-Kun', '7,135']]


In [15]:
# Create DataFrame
mountains_df = pd.DataFrame(rows, columns=headers)
print(mountains_df)

   Ranks Name  / Short name (if applicable)                 Height  Range
0   None                               None                   None   None
1      1                                  3          Kangchenjunga  8,586
2      2                                 23             Nanda Devi  7,816
3      3                                 29                  Kamet  7,756
4      4                                 31    Saltoro Kangri/ K10  7,742
5      5                                 35    Saser KangriI / K22  7,672
6      6                                 48  Mamostong Kangri/ K35  7,516
7      7                                 49       Saser KangriII E  7,513
8      8                                 51        Saser KangriIII  7,495
9      9                                 56          Teram KangriI  7,462
10    10                                 57          Jongsong Peak  7,462
11    11                                 61                    K12  7,428
12    12                              

In [18]:
# Example: Remove empty rows
mountains_df.dropna(how='all', inplace=True)

# Save to CSV
mountains_df.to_csv('mountains_in_india.csv', index=False)

print("Data saved to mountains_in_india.csv")


Data saved to mountains_in_india.csv
