In [None]:
import pandas as pd
import requests

hospital_data = pd.read_csv("hospital_admissions.csv") 

In [None]:

hospital_data['ResidentDate'] = pd.to_datetime(
    hospital_data.rename(columns={
        'ResidentDate_year': 'year',
        'ResidentDate_month': 'month',
        'ResidentDate_day': 'day'
    })[['year', 'month', 'day']]
)

# Check if the date looks correct
print(hospital_data[['ResidentDate_year', 'ResidentDate_month', 'ResidentDate_day', 'ResidentDate']].head())


   ResidentDate_year  ResidentDate_month  ResidentDate_day ResidentDate
0               2017                   3                21   2017-03-21
1               2017                   3                21   2017-03-21
2               2017                   3                21   2017-03-21
3               2017                   3                21   2017-03-21
4               2017                   3                21   2017-03-21


In [None]:
import pandas as pd
import requests

# CONFIG
LAT = 13.0827
LON = 80.2707
START_DATE = "2017-03-21"
END_DATE = "2022-03-20"
API_KEY = '8TEpIGk3AuzzHb0vNIBlWsfq0aeSp6H9' 

# Fetch Weather from Open-Meteo
def fetch_weather(start_date, end_date):
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        'latitude': LAT,
        'longitude': LON,
        'start_date': start_date,
        'end_date': end_date,
        'daily': 'temperature_2m_max,temperature_2m_min,precipitation_sum',
        'timezone': 'Asia/Kolkata'
    }
    response = requests.get(url, params=params)
    data = response.json()
    return pd.DataFrame({
        'ResidentDate': pd.to_datetime(data['daily']['time']).date,
        'temperature_max': data['daily']['temperature_2m_max'],
        'temperature_min': data['daily']['temperature_2m_min'],
        'precipitation': data['daily']['precipitation_sum']
    })

# Fetch Holidays using Calendarific
def fetch_holidays(year, api_key):
    url = "https://calendarific.com/api/v2/holidays"
    params = {
        'api_key': api_key,
        'country': 'IN',
        'year': year,
        'location': 'Tamil Nadu',
    }
    response = requests.get(url, params=params).json()
    holidays = []
    for item in response['response']['holidays']:
        holidays.append({
            'ResidentDate': pd.to_datetime(item['date']['iso']).date(),
            'event_type': 'Holiday'
        })
    return pd.DataFrame(holidays)

# Covid Waves
def generate_covid_waves():
    covid_periods = [
        ('2020-03-20', '2020-06-30'),
        ('2021-04-15', '2021-06-15'),
        ('2022-01-01', '2022-03-01')
    ]
    covid_list = []
    for start, end in covid_periods:
        date_range = pd.date_range(start, end)
        covid_list.extend([{'ResidentDate': d.date(), 'event_type': 'Covid_Wave'} for d in date_range])
    return pd.DataFrame(covid_list)

# Placeholder for Disasters, Elections, Festivals
def fetch_placeholder_events():
    sample_events = [
        {'ResidentDate': pd.to_datetime('2018-11-15').date(), 'event_type': 'Disaster'},
        {'ResidentDate': pd.to_datetime('2019-05-23').date(), 'event_type': 'Election'},
        {'ResidentDate': pd.to_datetime('2023-01-14').date(), 'event_type': 'Festival'},
    ]
    return pd.DataFrame(sample_events)

# Merge all events with priority logic
def merge_all_events(start_date, end_date, api_key):
    weather = fetch_weather(start_date, end_date)

    holiday_frames = [fetch_holidays(year, api_key) for year in range(
        pd.to_datetime(start_date).year, pd.to_datetime(end_date).year + 1)]
    holidays = pd.concat(holiday_frames, ignore_index=True)

    covid = generate_covid_waves()
    manual_events = fetch_placeholder_events()

    # Mark Sundays
    date_range = pd.DataFrame({'ResidentDate': pd.date_range(start_date, end_date)})
    date_range['weekday'] = date_range['ResidentDate'].dt.weekday
    sundays = date_range[date_range['weekday'] == 6][['ResidentDate']].copy()
    sundays['event_type'] = 'Sunday'
    sundays['ResidentDate'] = sundays['ResidentDate'].dt.date

    # Combine all event types
    all_events = pd.concat([manual_events, holidays, covid, sundays], ignore_index=True)

    # Priority map
    priority_map = {
        'Disaster': 1,
        'Election': 2,
        'Festival': 3,
        'Holiday': 4,
        'Covid_Wave': 5,
        'Sunday': 6
    }
    all_events['priority'] = all_events['event_type'].map(priority_map)

    # Keep only highest priority per date
    all_events = all_events.sort_values(['ResidentDate', 'priority']).drop_duplicates('ResidentDate')

    # Merge with weather
    merged = pd.merge(weather, all_events[['ResidentDate', 'event_type']], on='ResidentDate', how='left')
    merged['event_type'] = merged['event_type'].fillna('None')

    return merged

# Merge with Hospital Data
def merge_with_hospital_data(hospital_data, external_factors):
    # Rename for datetime creation
    hospital_data = hospital_data.rename(columns={
        'ResidentDate_year': 'year',
        'ResidentDate_month': 'month',
        'ResidentDate_day': 'day'
    })

    # Construct ResidentDate
    hospital_data['ResidentDate'] = pd.to_datetime(
        hospital_data[['year', 'month', 'day']]
    ).dt.date

    # Rename back
    hospital_data = hospital_data.rename(columns={
        'year': 'ResidentDate_year',
        'month': 'ResidentDate_month',
        'day': 'ResidentDate_day'
    })

    # Merge external factors
    merged_data = pd.merge(hospital_data, external_factors, on='ResidentDate', how='left')
    return merged_data

# Usage Example
# Replace this with your actual hospital dataset CSV
hospital_data = pd.read_csv('hospital_admissions.csv')

# Fetch enriched external factors
external_factors = merge_all_events(START_DATE, END_DATE, API_KEY)

# Merge the two
merged_hospital_data = merge_with_hospital_data(hospital_data, external_factors)

# Save both original and enriched
hospital_data.to_csv('original_hospital_data.csv', index=False)
merged_hospital_data.to_csv('hospital_data_with_external_factors.csv', index=False)

print("✅ Saved: 'original_hospital_data.csv' and 'hospital_data_with_external_factors.csv'")


✅ Saved: 'original_hospital_data.csv' and 'hospital_data_with_external_factors.csv'


In [2]:
import pandas as pd
import requests

# CONFIG
LAT = 13.0827
LON = 80.2707
START_DATE = "2017-03-21"
END_DATE = "2022-03-20"
API_KEY = '8TEpIGk3AuzzHb0vNIBlWsfq0aeSp6H9'  # Calendarific API Key

# Fetch Weather from Open-Meteo
def fetch_weather(start_date, end_date):
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        'latitude': LAT,
        'longitude': LON,
        'start_date': start_date,
        'end_date': end_date,
        'daily': 'temperature_2m_max,temperature_2m_min,precipitation_sum',
        'timezone': 'Asia/Kolkata'
    }
    response = requests.get(url, params=params)
    data = response.json()
    return pd.DataFrame({
        'ResidentDate': pd.to_datetime(data['daily']['time']).date,
        'temperature_max': data['daily']['temperature_2m_max'],
        'temperature_min': data['daily']['temperature_2m_min'],
        'precipitation': data['daily']['precipitation_sum']
    })

# Fetch Holidays using Calendarific
def fetch_holidays(year, api_key):
    url = "https://calendarific.com/api/v2/holidays"
    params = {
        'api_key': api_key,
        'country': 'IN',
        'year': year,
        'location': 'Tamil Nadu',
    }
    response = requests.get(url, params=params).json()
    holidays = []
    for item in response['response']['holidays']:
        holidays.append({
            'ResidentDate': pd.to_datetime(item['date']['iso']).date(),
            'event_type': 'Holiday'
        })
    return pd.DataFrame(holidays)

# Covid Waves
def generate_covid_waves():
    covid_periods = [
        ('2020-03-20', '2020-06-30'),
        ('2021-04-15', '2021-06-15'),
        ('2022-01-01', '2022-03-01')
    ]
    covid_list = []
    for start, end in covid_periods:
        date_range = pd.date_range(start, end)
        covid_list.extend([{'ResidentDate': d.date(), 'event_type': 'Covid_Wave'} for d in date_range])
    return pd.DataFrame(covid_list)

# Load Disaster Events from CSV
def fetch_disaster_events():
    df = pd.read_csv('disaster_data_chennai_2016_2022_gemini.csv')
    df['ResidentDate'] = pd.to_datetime(df['ResidentDate']).dt.date
    return df

# Merge all events with priority logic
def merge_all_events(start_date, end_date, api_key):
    weather = fetch_weather(start_date, end_date)

    holiday_frames = [fetch_holidays(year, api_key) for year in range(
        pd.to_datetime(start_date).year, pd.to_datetime(end_date).year + 1)]
    holidays = pd.concat(holiday_frames, ignore_index=True)

    covid = generate_covid_waves()
    disaster_events = fetch_disaster_events()

    # Mark Sundays
    date_range = pd.DataFrame({'ResidentDate': pd.date_range(start_date, end_date)})
    date_range['weekday'] = date_range['ResidentDate'].dt.weekday
    sundays = date_range[date_range['weekday'] == 6][['ResidentDate']].copy()
    sundays['event_type'] = 'Sunday'
    sundays['ResidentDate'] = sundays['ResidentDate'].dt.date

    # Combine all event types
    all_events = pd.concat([disaster_events, holidays, covid, sundays], ignore_index=True)

    # Priority Map
    priority_map = {
        'Cyclone Vardah': 1,
        'Heavy Rains & Flooding': 1,
        'Disaster': 1,
        'Election': 2,
        'Festival': 3,
        'Holiday': 4,
        'Covid_Wave': 5,
        'Sunday': 6
    }
    all_events['priority'] = all_events['event_type'].map(lambda x: priority_map.get(x, 7))

    # Keep highest priority event for each date
    all_events = all_events.sort_values(['ResidentDate', 'priority']).drop_duplicates('ResidentDate')

    # Merge with weather data
    merged = pd.merge(weather, all_events[['ResidentDate', 'event_type']], on='ResidentDate', how='left')
    merged['event_type'] = merged['event_type'].fillna('None')

    return merged

# Merge with Hospital Data
def merge_with_hospital_data(hospital_data, external_factors):
    hospital_data = hospital_data.rename(columns={
        'ResidentDate_year': 'year',
        'ResidentDate_month': 'month',
        'ResidentDate_day': 'day'
    })

    hospital_data['ResidentDate'] = pd.to_datetime(
        hospital_data[['year', 'month', 'day']]
    ).dt.date

    hospital_data = hospital_data.rename(columns={
        'year': 'ResidentDate_year',
        'month': 'ResidentDate_month',
        'day': 'ResidentDate_day'
    })

    merged_data = pd.merge(hospital_data, external_factors, on='ResidentDate', how='left')
    return merged_data

# Usage Example
hospital_data = pd.read_csv('hospital_admissions.csv')

# Fetch enriched external factors
external_factors = merge_all_events(START_DATE, END_DATE, API_KEY)

# Merge hospital data with external factors
merged_hospital_data = merge_with_hospital_data(hospital_data, external_factors)

# Save to CSV
hospital_data.to_csv('original_hospital_data.csv', index=False)
merged_hospital_data.to_csv('hospital_data_with_external_factors.csv', index=False)

print("✅ Saved: 'original_hospital_data.csv' and 'hospital_data_with_external_factors.csv'")


  df['ResidentDate'] = pd.to_datetime(df['ResidentDate']).dt.date


✅ Saved: 'original_hospital_data.csv' and 'hospital_data_with_external_factors.csv'


In [3]:
import pandas as pd
import requests

# CONFIG
LAT = 13.0827
LON = 80.2707
START_DATE = "2017-03-21"
END_DATE = "2022-03-20"
API_KEY = '8TEpIGk3AuzzHb0vNIBlWsfq0aeSp6H9'  # Calendarific API Key

# Fetch Weather from Open-Meteo
def fetch_weather(start_date, end_date):
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        'latitude': LAT,
        'longitude': LON,
        'start_date': start_date,
        'end_date': end_date,
        'daily': 'temperature_2m_max,temperature_2m_min,precipitation_sum',
        'timezone': 'Asia/Kolkata'
    }
    response = requests.get(url, params=params)
    data = response.json()
    return pd.DataFrame({
        'ResidentDate': pd.to_datetime(data['daily']['time']).date,
        'temperature_max': data['daily']['temperature_2m_max'],
        'temperature_min': data['daily']['temperature_2m_min'],
        'precipitation': data['daily']['precipitation_sum']
    })

# Fetch Holidays using Calendarific
def fetch_holidays(year, api_key):
    url = "https://calendarific.com/api/v2/holidays"
    params = {
        'api_key': api_key,
        'country': 'IN',
        'year': year,
        'location': 'Tamil Nadu',
    }
    response = requests.get(url, params=params).json()
    holidays = []
    for item in response['response']['holidays']:
        holidays.append({
            'ResidentDate': pd.to_datetime(item['date']['iso']).date(),
            'event_type': 'Holiday'
        })
    return pd.DataFrame(holidays)

# Covid Waves — returns a dataframe
def generate_covid_waves():
    covid_periods = [
        ('2020-03-20', '2020-06-30'),
        ('2021-04-15', '2021-06-15'),
        ('2022-01-01', '2022-03-01')
    ]
    covid_list = []
    for start, end in covid_periods:
        date_range = pd.date_range(start, end)
        covid_list.extend([{'ResidentDate': d.date(), 'pandemic': 'Covid_Wave'} for d in date_range])
    return pd.DataFrame(covid_list)

# Disaster Events — returns a dataframe
def fetch_disaster_events():
    df = pd.read_csv('disaster_data_chennai_2016_2022_gemini.csv')
    df['ResidentDate'] = pd.to_datetime(df['ResidentDate']).dt.date
    df = df[['ResidentDate', 'event_type']]
    df = df.rename(columns={'event_type': 'disaster'})
    return df

# Merge all external features
def merge_all_events(start_date, end_date, api_key):
    weather = fetch_weather(start_date, end_date)

    # Fetch holidays
    holiday_frames = [fetch_holidays(year, api_key) for year in range(
        pd.to_datetime(start_date).year, pd.to_datetime(end_date).year + 1)]
    holidays = pd.concat(holiday_frames, ignore_index=True)

    # Fetch pandemic and disaster data
    covid = generate_covid_waves()
    disasters = fetch_disaster_events()

    # Sundays
    date_range = pd.DataFrame({'ResidentDate': pd.date_range(start_date, end_date)})
    date_range['weekday'] = date_range['ResidentDate'].dt.weekday
    sundays = date_range[date_range['weekday'] == 6][['ResidentDate']].copy()
    sundays['event_type'] = 'Sunday'
    sundays['ResidentDate'] = sundays['ResidentDate'].dt.date

    # Combine general event types (Holiday + Sunday)
    general_events = pd.concat([holidays, sundays], ignore_index=True)
    general_events = general_events.drop_duplicates(subset=['ResidentDate'])

    # Merge all on ResidentDate
    merged = pd.merge(weather, general_events, on='ResidentDate', how='left')
    merged = pd.merge(merged, covid, on='ResidentDate', how='left')
    merged = pd.merge(merged, disasters, on='ResidentDate', how='left')

    # Fill missing
    merged['event_type'] = merged['event_type'].fillna('None')
    merged['pandemic'] = merged['pandemic'].fillna('None')
    merged['disaster'] = merged['disaster'].fillna('None')

    return merged

# Merge with Hospital Data
def merge_with_hospital_data(hospital_data, external_factors):
    hospital_data = hospital_data.rename(columns={
        'ResidentDate_year': 'year',
        'ResidentDate_month': 'month',
        'ResidentDate_day': 'day'
    })

    hospital_data['ResidentDate'] = pd.to_datetime(
        hospital_data[['year', 'month', 'day']]
    ).dt.date

    hospital_data = hospital_data.rename(columns={
        'year': 'ResidentDate_year',
        'month': 'ResidentDate_month',
        'day': 'ResidentDate_day'
    })

    merged_data = pd.merge(hospital_data, external_factors, on='ResidentDate', how='left')
    return merged_data

# Usage Example
hospital_data = pd.read_csv('hospital_admissions.csv')

# Fetch enriched external factors
external_factors = merge_all_events(START_DATE, END_DATE, API_KEY)

# Merge hospital data with external factors
merged_hospital_data = merge_with_hospital_data(hospital_data, external_factors)

# Save to CSV
hospital_data.to_csv('original_hospital_data.csv', index=False)
merged_hospital_data.to_csv('hospital_data_with_external_factors.csv', index=False)

print("✅ Saved: 'original_hospital_data.csv' and 'hospital_data_with_external_factors.csv'")


  df['ResidentDate'] = pd.to_datetime(df['ResidentDate']).dt.date


✅ Saved: 'original_hospital_data.csv' and 'hospital_data_with_external_factors.csv'


In [11]:
import pandas as pd

# Wikipedia URL
url = "https://en.wikipedia.org/wiki/List_of_epidemics_and_pandemics"

# Read all tables from the page
tables = pd.read_html(url)

# Combine all tables (some are grouped by century)
pandemic_data = pd.concat(tables, ignore_index=True)

# Display a preview
pandemic_data.to_csv('pandemic_data.csv', index=False)

SyntaxError: invalid non-printable character U+00A0 (775136401.py, line 13)

In [7]:
pip install lxml

Note: you may need to restart the kernel to use updated packages.
