### Scraping Electricity Consumption Data for Delhi from SLDC Website

In [1]:
import requests
import csv
import os
import time
from bs4 import BeautifulSoup

# Base URL for scraping
BASE_URL = 'http://www.delhisldc.org/Loaddata.aspx?mode='

# Define Year Range (Strictly from 2020 to 2024)
year_range = range(2020, 2025)

# Directory for saving the scraped data
SAVE_DIR = 'SLDC_Data'
os.makedirs(SAVE_DIR, exist_ok=True)

# Define CSV filename
csv_filename = os.path.join(SAVE_DIR, 'Delhi_Data_15min.csv')

# Open the CSV file and write the header row
with open(csv_filename, 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['date', 'time', 'load_consumption'])  # Header

    # Iterate through years, months, and days
    for year in year_range:
        for month in range(1, 13):
            # Get the max number of days in the current month
            if month in [4, 6, 9, 11]:  # April, June, September, November
                max_day = 30
            elif month == 2:  # February
                max_day = 29 if (year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)) else 28
            else:  # All other months
                max_day = 31

            # Iterate over each day
            for day in range(1, max_day + 1):
                date_str = f'{day:02d}/{month:02d}/{year}'
                print(f'Scraping data for {date_str}...')

                try:
                    # Send GET request
                    response = requests.get(BASE_URL + date_str)
                    soup = BeautifulSoup(response.text, 'lxml')

                    # Find the required table
                    table = soup.find('table', {'id': 'ContentPlaceHolder3_DGGridAv'})
                    if not table:
                        print(f"No data found for {date_str}. Skipping...")
                        continue

                    # Extract rows
                    rows = table.find_all('tr')[1:]  # Skipping header row

                    # Process each row
                    for row in rows:
                        cols = row.find_all('font')
                        if len(cols) < 2:
                            continue

                        time_text = cols[0].text.strip()  # Extract time
                        load_text = cols[1].text.strip()  # Extract load consumption

                        # Ensure 15-minute interval format
                        if time_text.endswith(('00', '15', '30', '45')):
                            writer.writerow([date_str, time_text, load_text])

                    # Respect server rate limits
                    time.sleep(1)

                except Exception as e:
                    print(f"Error scraping {date_str}: {e}")

print(f"\n Data scraping completed. All data saved in: {csv_filename}")


Scraping data for 01/01/2020...
Scraping data for 02/01/2020...
Scraping data for 03/01/2020...
Scraping data for 04/01/2020...
Scraping data for 05/01/2020...
Scraping data for 06/01/2020...
Scraping data for 07/01/2020...
Scraping data for 08/01/2020...
Scraping data for 09/01/2020...
Scraping data for 10/01/2020...
Scraping data for 11/01/2020...
Scraping data for 12/01/2020...
Scraping data for 13/01/2020...
Scraping data for 14/01/2020...
Scraping data for 15/01/2020...
Scraping data for 16/01/2020...
Scraping data for 17/01/2020...
Scraping data for 18/01/2020...
Scraping data for 19/01/2020...
Scraping data for 20/01/2020...
Scraping data for 21/01/2020...
Scraping data for 22/01/2020...
Scraping data for 23/01/2020...
Scraping data for 24/01/2020...
Scraping data for 25/01/2020...
Scraping data for 26/01/2020...
Scraping data for 27/01/2020...
Scraping data for 28/01/2020...
Scraping data for 29/01/2020...
Scraping data for 30/01/2020...
Scraping data for 31/01/2020...
Scraping

### Scraping Weather Data for Delhi from Weatherbit Website

In [None]:
import requests
import csv
import time
from datetime import date, datetime

# Replace with your Weatherbit API key
API_KEY = '6f0e0438564e4955bc382b241af71fd3'

# Coordinates for Delhi
LAT = 28.6139
LON = 77.2090

# CSV file to save weather data
CSV_FILE = 'delhi_weather_15min.csv'

# Open CSV file and write header
with open(CSV_FILE, 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['date', 'time', 'temperature', 'apparent_temp', 'rh', 'wind_speed', 'ghi', 'sea_lvl_pressure', 'dew_point'])

    # Loop over years and months
    for year in range(2020, 2025):
        for month in range(1, 13):
            # Calculate start and end dates
            start_date = date(year, month, 1)
            if month == 12:
                end_date = date(year + 1, 1, 1)
            else:
                end_date = date(year, month + 1, 1)

            start_date_str = start_date.strftime('%Y-%m-%d')
            end_date_str = end_date.strftime('%Y-%m-%d')

            # Construct API URL
            url = f'https://api.weatherbit.io/v2.0/history/subhourly?lat={LAT}&lon={LON}&start_date={start_date_str}&end_date={end_date_str}&key={API_KEY}&tz=local'

            # Send GET request
            response = requests.get(url)

            if response.status_code == 200:
                data = response.json()['data']
                for item in data:
                    timestamp = item['timestamp_local']
                    date_yyyymmdd, time_hhmmss = timestamp.split('T')
                    date_obj = datetime.strptime(date_yyyymmdd, '%Y-%m-%d')
                    date_str = date_obj.strftime('%d/%m/%Y')
                    time_str = time_hhmmss[:5]  # 'HH:MM'
                    row = [date_str, time_str, item['temp'], item['app_temp'], item['rh'], item['wind_spd'],item['ghi'],item['slp'],item['dewpt']]
                    writer.writerow(row)
            else:
                print(f"Failed to fetch data for {start_date_str} to {end_date_str}: {response.status_code}")

            # Sleep to respect rate limits
            time.sleep(10)

print(f"Weather data saved to {CSV_FILE}")

### Scraping Holiday Data from Time and Date Website

In [None]:
import pandas as pd
from datetime import datetime, timedelta

# Function to fetch holidays from timeanddate.com
def fetch_holidays(year):
    url = f"https://www.timeanddate.com/holidays/india/{year}"
    tables = pd.read_html(url)
    holiday_table = tables[0]
    # Print the available columns to check if they match expectations
    print(holiday_table.columns)
    # Select columns if they are available otherwise select available columns
    holiday_table = holiday_table[['Date', 'Type', 'Name'] if all(col in holiday_table.columns for col in ['Date', 'Type', 'Name']) else holiday_table.columns[:3]]
    holiday_table.columns = ['Date', 'Holiday Type', 'Holiday Name']
    holiday_table['Year'] = year
    return holiday_table

# Compile holidays from 2020 to 2024
all_holidays = pd.DataFrame()
for year in range(2020, 2025):
    yearly_holidays = fetch_holidays(year)
    all_holidays = pd.concat([all_holidays, yearly_holidays], ignore_index=True)

# Convert 'Date' and 'Year' to a single datetime column
all_holidays['Date'] = all_holidays['Date'].str.replace(r'\s*\(\d+\)', '', regex=True)
all_holidays['Full Date'] = all_holidays['Date'] + ' ' + all_holidays['Year'].astype(str)
all_holidays['Full Date'] = pd.to_datetime(all_holidays['Full Date'], format='%b %d %Y')

# Create a complete date range from 2020-01-01 to 2024-12-31
start_date = datetime(2020, 1, 1)
end_date = datetime(2024, 12, 31)
date_range = pd.date_range(start_date, end_date)

# Initialize the dataset
dataset = pd.DataFrame(date_range, columns=['Date'])
dataset['Holiday Name'] = ''
dataset['Holiday Flag'] = 0
dataset['Weekly Holiday'] = dataset['Date'].dt.dayofweek.apply(lambda x: 1 if x == 6 else 0)  # Sunday as weekly holiday

# Populate the dataset with holiday information
for index, row in all_holidays.iterrows():
    holiday_date = row['Full Date']
    if holiday_date in dataset['Date'].values:
        dataset.loc[dataset['Date'] == holiday_date, 'Holiday Name'] = row['Holiday Name']
        dataset.loc[dataset['Date'] == holiday_date, 'Holiday Flag'] = 1

# Format the 'Date' column as DD-MM-YYYY
dataset['Date'] = dataset['Date'].dt.strftime('%d-%m-%Y')

# Save to CSV
dataset.to_csv('Delhi_Holidays_2020_2024.csv', index=False)

print("Holiday dataset created successfully.")