**Disclaimer:** Scraping data is often considered illegal and unethical if done without proper permissions. This project and its content are intended **solely for educational purposes** to demonstrate technical concepts. Data is scraped from https://seismonepal.gov.np/earthquakes

In [1]:
#scraping data

import requests
from bs4 import BeautifulSoup
import pandas as pd

# Scrape data from the website
base_url = "https://seismonepal.gov.np/earthquakes/index"

max_pages = 62  # Update as needed
# max_pages = 5  # to test

# Function to scrape data from a single page
def scrape_page(page_url):
    response = requests.get(page_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Locate the table
    table = soup.find('table', {'class': 'table table-striped table-bordered'})
    rows = table.find('tbody').find_all('tr')

    data = []
    for row in rows:
        cols = row.find_all('td')
        cols = [col.text.strip() for col in cols]
        if cols:  # Check for non-empty rows
            data.append({
                'Date': cols[1],
                'Time': cols[2],
                'Latitude': str(cols[3]),
                'Longitude': str(cols[4]),
                'Magnitude': cols[5],
                'Epicenter': cols[6]
            })
    return data

# Main function to scrape multiple pages
def scrape_all_pages(base_url, max_pages):
    all_data = []
    for page in range(1, max_pages + 1):
        url = f"{base_url}?page={page}"
        page_data = scrape_page(url)
        all_data.extend(page_data)
        print(f"Scraped page {page}") #this step can take some time, to see the progress of scraping data, uncomment this line
    return all_data

data = scrape_all_pages(base_url, max_pages)
# print(data)
print('data scraped successfully')

Scraped page 1
Scraped page 2
Scraped page 3
Scraped page 4
Scraped page 5
Scraped page 6
Scraped page 7
Scraped page 8
Scraped page 9
Scraped page 10
Scraped page 11
Scraped page 12
Scraped page 13
Scraped page 14
Scraped page 15
Scraped page 16
Scraped page 17
Scraped page 18
Scraped page 19
Scraped page 20
Scraped page 21
Scraped page 22
Scraped page 23
Scraped page 24
Scraped page 25
Scraped page 26
Scraped page 27
Scraped page 28
Scraped page 29
Scraped page 30
Scraped page 31
Scraped page 32
Scraped page 33
Scraped page 34
Scraped page 35
Scraped page 36
Scraped page 37
Scraped page 38
Scraped page 39
Scraped page 40
Scraped page 41
Scraped page 42
Scraped page 43
Scraped page 44
Scraped page 45
Scraped page 46
Scraped page 47
Scraped page 48
Scraped page 49
Scraped page 50
Scraped page 51
Scraped page 52
Scraped page 53
Scraped page 54
Scraped page 55
Scraped page 56
Scraped page 57
Scraped page 58
Scraped page 59
Scraped page 60
Scraped page 61
Scraped page 62
data scraped succ

In [2]:
# Convert to DataFrame
df = pd.DataFrame(data)
# Save to CSV
df.to_csv('uncleaned_earthquake_data_nepal.csv', index=False)
print("Data saved to uncleaned_earthquake_data_nepal.csv")

Data saved to uncleaned_earthquake_data_nepal.csv


In [3]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1237 entries, 0 to 1236
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Date       1237 non-null   object
 1   Time       1237 non-null   object
 2   Latitude   1237 non-null   object
 3   Longitude  1237 non-null   object
 4   Magnitude  1237 non-null   object
 5   Epicenter  1237 non-null   object
dtypes: object(6)
memory usage: 58.1+ KB
None


In [4]:
print(df.describe())

                                    Date                    Time Latitude  \
count                               1237                    1237     1237   
unique                               824                     981      310   
top     B.S.: 2072-01-12A.D.: 2015-04-25  Local: 00:24UTC: 18:39    27.79   
freq                                  40                       4       25   

       Longitude Magnitude Epicenter  
count       1237      1237      1237  
unique       518        30       133  
top        86.07       4.0   Dolakha  
freq          11       463       188  


In [5]:
print(df.head())

                               Date                    Time Latitude  \
0  B.S.: 2081-09-23A.D.: 2025-01-07  Local: 06:50UTC: 01:05    28.31   
1  B.S.: 2081-09-19A.D.: 2025-01-03  Local: 08:03UTC: 02:18    29.57   
2  B.S.: 2081-09-19A.D.: 2025-01-03  Local: 08:29UTC: 02:44    28.54   
3  B.S.: 2081-09-18A.D.: 2025-01-02  Local: 13:02UTC: 07:17    27.77   
4  B.S.: 2081-09-16A.D.: 2024-12-31  Local: 07:54UTC: 02:09    29.45   

  Longitude Magnitude        Epicenter  
0     87.37       7.0  Dinggye, China*  
1     82.19       4.4             Mugu  
2     84.13       4.1           Manang  
3     85.57       4.8    Sindhupalchok  
4     80.86       4.6          Baitadi  
