Adding normal days in the prediction dataset as well

In [2]:
import openmeteo_requests
import numpy as np
import requests_cache
import pandas as pd
from retry_requests import retry
import random
from datetime import datetime, timedelta

In [3]:

df = pd.read_csv('combined.csv')
print(df)

            date  weather_code  temperature_2m_max  temperature_2m_min  \
0     1941-05-20          51.0               29.55               26.80   
1     1947-10-20           1.0               28.35               24.60   
2     1958-10-19          51.0               29.55               24.10   
3     1960-10-08          63.0               27.65               25.70   
4     1960-10-29          61.0               28.90               23.25   
...          ...           ...                 ...                 ...   
1853  2021-07-30          61.0               29.50               25.30   
1854  2021-07-31          63.0               28.55               24.75   
1855  2021-08-01          61.0               28.05               24.55   
1856  2021-08-02          65.0               27.65               24.75   
1857  2021-05-26          51.0               34.95               27.30   

      temperature_2m_mean  precipitation_sum   rain_sum  wind_speed_10m_max  \
0               28.243750       

In [5]:
existing_dates = set(pd.to_datetime(df['date']))

In [6]:
start_date = datetime(1940, 1, 1)
end_date = datetime(2024, 7, 16)

total_days = (end_date - start_date).days
all_dates = [start_date + timedelta(days=i) for i in range(total_days + 1)]


remaining_dates = [date for date in all_dates if date not in existing_dates]

if len(remaining_dates) >= 215:
    random_dates = random.sample(remaining_dates, 215)
else:
    raise ValueError("Not enough remaining dates to choose 215 unique dates")


In [7]:
new_dates = random_dates

# Convert the new dates to string format
new_dates_str = [date.strftime('%Y-%m-%d') for date in new_dates]
print(new_dates_str)

['1996-10-20', '1960-05-19', '1954-03-28', '1991-07-27', '1950-03-01', '2015-03-04', '1975-09-18', '2015-01-09', '1962-07-06', '1996-10-27', '2019-10-17', '1989-12-28', '1963-04-03', '1999-09-08', '2012-06-12', '1994-08-09', '1943-08-15', '1950-02-27', '1943-08-01', '1964-08-01', '2021-12-10', '1966-05-31', '1974-04-15', '1958-01-14', '2012-08-18', '1964-10-03', '1961-03-25', '1998-04-27', '2019-08-12', '1986-03-09', '1962-01-19', '1976-10-15', '2013-09-10', '2017-01-22', '1987-01-17', '1958-09-24', '1971-02-03', '1978-07-13', '1988-10-22', '1975-05-31', '1988-08-10', '1947-08-06', '2001-03-19', '1961-01-12', '2008-05-11', '1969-09-22', '1954-05-01', '1945-12-24', '2004-05-13', '2013-01-05', '1945-06-18', '2015-01-24', '1946-01-09', '1990-10-26', '1967-03-04', '2005-05-02', '2022-01-03', '1940-07-29', '1946-10-07', '1999-12-11', '1940-04-27', '1961-11-03', '2004-06-03', '1973-12-06', '2012-01-13', '1946-07-23', '1954-01-06', '1957-08-08', '2015-06-03', '2006-03-06', '1973-10-09', '1957

In [8]:
def get_weather_data(start_date, end_date, disaster):
    cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)

    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
	    "latitude": 22.335,
	    "longitude": 91.8325,
	    "start_date": start_date,
	    "end_date": end_date,
	    "daily": ["weather_code", "temperature_2m_max", "temperature_2m_min", "temperature_2m_mean", "precipitation_sum", "rain_sum", "wind_speed_10m_max", "wind_gusts_10m_max", "wind_direction_10m_dominant", "shortwave_radiation_sum"],
	    "hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "precipitation", "rain", "weather_code", "pressure_msl", "surface_pressure", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high", "et0_fao_evapotranspiration", "vapour_pressure_deficit", "wind_speed_10m", "wind_speed_100m", "wind_direction_10m", "wind_direction_100m", "wind_gusts_10m", "soil_temperature_0_to_7cm", "soil_temperature_7_to_28cm", "soil_temperature_28_to_100cm", "soil_temperature_100_to_255cm", "soil_moisture_0_to_7cm", "soil_moisture_7_to_28cm", "soil_moisture_28_to_100cm", "soil_moisture_100_to_255cm"],
        "timezone": "Asia/Dhaka",
	    "elevation": "NaN"
    }
    responses = openmeteo.weather_api(url, params=params)


    response = responses[0]
    """
    print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
    print(f"Elevation {response.Elevation()} m asl")
    print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
    print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
    """

    daily = response.Daily()
    daily_weather_code = daily.Variables(0).ValuesAsNumpy()
    daily_temperature_2m_max = daily.Variables(1).ValuesAsNumpy()
    daily_temperature_2m_min = daily.Variables(2).ValuesAsNumpy()
    daily_temperature_2m_mean = daily.Variables(3).ValuesAsNumpy()
    daily_precipitation_sum = daily.Variables(4).ValuesAsNumpy()
    daily_rain_sum = daily.Variables(5).ValuesAsNumpy()
    daily_wind_speed_10m_max = daily.Variables(6).ValuesAsNumpy()
    daily_wind_gusts_10m_max = daily.Variables(7).ValuesAsNumpy()
    daily_wind_direction_10m_dominant = daily.Variables(8).ValuesAsNumpy()
    daily_shortwave_radiation_sum = daily.Variables(9).ValuesAsNumpy()

    daily_data = {"date": pd.date_range(
	    start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	    end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	    freq = pd.Timedelta(seconds = daily.Interval()),
	    inclusive = "left"
    )}
    daily_data["start_date"] = start_date
    daily_data["end_date"] = end_date
    daily_data["disaster_type"] = disaster
    daily_data["weather_code"] = (daily_weather_code)
    daily_data["temperature_2m_max"] = (daily_temperature_2m_max)
    daily_data["temperature_2m_min"] = (daily_temperature_2m_min)
    daily_data["temperature_2m_mean"] = (daily_temperature_2m_mean)
    daily_data["precipitation_sum"] = (daily_precipitation_sum)
    daily_data["rain_sum"] = (daily_rain_sum)
    daily_data["wind_speed_10m_max"] = (daily_wind_speed_10m_max)
    daily_data["wind_gusts_10m_max"] = (daily_wind_gusts_10m_max)
    daily_data["wind_direction_10m_dominant"] = (daily_wind_direction_10m_dominant)
    daily_data["shortwave_radiation_sum"] = (daily_shortwave_radiation_sum)
    

    #print(daily_data)
    
    hourly = response.Hourly()
    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
    hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
    hourly_apparent_temperature = hourly.Variables(3).ValuesAsNumpy()
    hourly_precipitation = hourly.Variables(4).ValuesAsNumpy()
    hourly_rain = hourly.Variables(5).ValuesAsNumpy()
    hourly_weather_code = hourly.Variables(6).ValuesAsNumpy()
    hourly_pressure_msl = hourly.Variables(7).ValuesAsNumpy()
    hourly_surface_pressure = hourly.Variables(8).ValuesAsNumpy()
    hourly_cloud_cover = hourly.Variables(9).ValuesAsNumpy()
    hourly_cloud_cover_low = hourly.Variables(10).ValuesAsNumpy()
    hourly_cloud_cover_mid = hourly.Variables(11).ValuesAsNumpy()
    hourly_cloud_cover_high = hourly.Variables(12).ValuesAsNumpy()
    hourly_et0_fao_evapotranspiration = hourly.Variables(13).ValuesAsNumpy()
    hourly_vapour_pressure_deficit = hourly.Variables(14).ValuesAsNumpy()
    hourly_wind_speed_10m = hourly.Variables(15).ValuesAsNumpy()
    hourly_wind_speed_100m = hourly.Variables(16).ValuesAsNumpy()
    hourly_wind_direction_10m = hourly.Variables(17).ValuesAsNumpy()
    hourly_wind_direction_100m = hourly.Variables(18).ValuesAsNumpy()
    hourly_wind_gusts_10m = hourly.Variables(19).ValuesAsNumpy()
    hourly_soil_temperature_0_to_7cm = hourly.Variables(20).ValuesAsNumpy()
    hourly_soil_temperature_7_to_28cm = hourly.Variables(21).ValuesAsNumpy()
    hourly_soil_temperature_28_to_100cm = hourly.Variables(22).ValuesAsNumpy()
    hourly_soil_temperature_100_to_255cm = hourly.Variables(23).ValuesAsNumpy()
    hourly_soil_moisture_0_to_7cm = hourly.Variables(24).ValuesAsNumpy()
    hourly_soil_moisture_7_to_28cm = hourly.Variables(25).ValuesAsNumpy()
    hourly_soil_moisture_28_to_100cm = hourly.Variables(26).ValuesAsNumpy()
    hourly_soil_moisture_100_to_255cm = hourly.Variables(27).ValuesAsNumpy()

    hourly_data = {"date": pd.date_range(
	    start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	    end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	    freq = pd.Timedelta(seconds = hourly.Interval()),
	    inclusive = "left"
    )}
    hourly_data["temperature_2m"] = hourly_temperature_2m
    hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
    hourly_data["dew_point_2m"] = hourly_dew_point_2m
    hourly_data["apparent_temperature"] = hourly_apparent_temperature
    hourly_data["precipitation"] = hourly_precipitation
    hourly_data["rain"] = hourly_rain
    hourly_data["weather_code"] = hourly_weather_code
    hourly_data["pressure_msl"] = hourly_pressure_msl
    hourly_data["surface_pressure"] = hourly_surface_pressure
    hourly_data["cloud_cover"] = hourly_cloud_cover
    hourly_data["cloud_cover_low"] = hourly_cloud_cover_low
    hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
    hourly_data["cloud_cover_high"] = hourly_cloud_cover_high
    hourly_data["et0_fao_evapotranspiration"] = hourly_et0_fao_evapotranspiration
    hourly_data["vapour_pressure_deficit"] = hourly_vapour_pressure_deficit
    hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
    hourly_data["wind_speed_100m"] = hourly_wind_speed_100m
    hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
    hourly_data["wind_direction_100m"] = hourly_wind_direction_100m
    hourly_data["wind_gusts_10m"] = hourly_wind_gusts_10m
    hourly_data["soil_temperature_0_to_7cm"] = hourly_soil_temperature_0_to_7cm
    hourly_data["soil_temperature_7_to_28cm"] = hourly_soil_temperature_7_to_28cm
    hourly_data["soil_temperature_28_to_100cm"] = hourly_soil_temperature_28_to_100cm
    hourly_data["soil_temperature_100_to_255cm"] = hourly_soil_temperature_100_to_255cm
    hourly_data["soil_moisture_0_to_7cm"] = hourly_soil_moisture_0_to_7cm
    hourly_data["soil_moisture_7_to_28cm"] = hourly_soil_moisture_7_to_28cm
    hourly_data["soil_moisture_28_to_100cm"] = hourly_soil_moisture_28_to_100cm
    hourly_data["soil_moisture_100_to_255cm"] = hourly_soil_moisture_100_to_255cm
    
    return daily_data, hourly_data

In [9]:
date_list = []
days_in_month = [ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 ]

for date in range(len(new_dates_str)):
    
    start_date = new_dates_str[date]
    end_date = new_dates_str[date]

    daily_data, hourly_data = get_weather_data(start_date, end_date, 'Normal')

    for i in range(len(daily_data['weather_code'])):
        temp={}
        temp["date"] = daily_data["date"][i].date()
        temp["weather_code"] =  daily_data["weather_code"][i]
        temp["temperature_2m_max"] = daily_data["temperature_2m_max"][i]
        temp["temperature_2m_min"] = daily_data["temperature_2m_min"][i]
        temp["temperature_2m_mean"] = daily_data["temperature_2m_mean"][i]
        temp["precipitation_sum"] = daily_data["precipitation_sum"][i]
        temp["rain_sum"] = daily_data["rain_sum"][i]
        temp["wind_speed_10m_max"] = daily_data["wind_speed_10m_max"][i]
        temp["wind_gusts_10m_max"] = daily_data["wind_gusts_10m_max"][i]
        temp["wind_direction_10m_dominant"] = daily_data["wind_direction_10m_dominant"][i]
        temp["shortwave_radiation_sum"] = daily_data["shortwave_radiation_sum"][i]
        
        k=i*24
        for j in range(24):
            temp2={}
            temp2[f"temperature_2m_{j}"] = hourly_data["temperature_2m"][k+j]
            temp2[f"relative_humidity_2m_{j}"] = hourly_data["relative_humidity_2m"][k+j]
            temp2[f"dew_point_2m_{j}"] = hourly_data["dew_point_2m"][k+j]
            temp2[f"apparent_temperature_{j}"] = hourly_data["apparent_temperature"][k+j]
            temp2[f"precipitation_{j}"] = hourly_data["precipitation"][k+j]
            temp2[f"rain_{j}"] = hourly_data["rain"][k+j]
            temp2[f"weather_code_{j}"] = hourly_data["weather_code"][k+j]
            temp2[f"pressure_msl_{j}"] = hourly_data["pressure_msl"][k+j]
            temp2[f"surface_pressure_{j}"] = hourly_data["surface_pressure"][k+j]
            temp2[f"cloud_cover_{j}"] = hourly_data["cloud_cover"][k+j]
            temp2[f"cloud_cover_low_{j}"] = hourly_data["cloud_cover_low"][k+j]
            temp2[f"cloud_cover_mid_{j}"] = hourly_data["cloud_cover_mid"][k+j]
            temp2[f"cloud_cover_high_{j}"] =  hourly_data["cloud_cover_high"][k+j]
            temp2[f"et0_fao_evapotranspiration_{j}"] = hourly_data["et0_fao_evapotranspiration"][k+j]
            temp2[f"vapour_pressure_deficit_{j}"] = hourly_data["vapour_pressure_deficit"][k+j]
            temp2[f"wind_speed_10m_{j}"] = hourly_data["wind_speed_10m"][k+j]
            temp2[f"wind_speed_100m_{j}"] = hourly_data["wind_speed_100m"][k+j]
            temp2[f"wind_direction_10m_{j}"] = hourly_data["wind_direction_10m"][k+j]
            temp2[f"wind_direction_100m_{j}"] = hourly_data["wind_direction_100m"][k+j]
            temp2[f"wind_gusts_10m_{j}"] = hourly_data["wind_gusts_10m"][k+j]
            temp2[f"soil_temperature_0_to_7cm_{j}"] = hourly_data["soil_temperature_0_to_7cm"][k+j]
            temp2[f"soil_temperature_7_to_28cm_{j}"] = hourly_data["soil_temperature_7_to_28cm"][k+j]
            temp2[f"soil_temperature_28_to_100cm_{j}"] = hourly_data["soil_temperature_28_to_100cm"][k+j]
            temp2[f"soil_temperature_100_to_255cm_{j}"] = hourly_data["soil_temperature_100_to_255cm"][k+j]
            temp2[f"soil_moisture_0_to_7cm_{j}"] = hourly_data["soil_moisture_0_to_7cm"][k+j]
            temp2[f"soil_moisture_7_to_28cm_{j}"] = hourly_data["soil_moisture_7_to_28cm"][k+j]
            temp2[f"soil_moisture_28_to_100cm_{j}"] = hourly_data["soil_moisture_28_to_100cm"][k+j]
            temp2[f"soil_moisture_100_to_255cm_{j}"] = hourly_data["soil_moisture_100_to_255cm"][k+j]
            temp.update(temp2)
        
        temp["disaster_type"]= 'Normal'
            
        date_list.append(temp)
    
print(date_list)

[{'date': datetime.date(1996, 10, 19), 'weather_code': 0.0, 'temperature_2m_max': 30.5, 'temperature_2m_min': 23.15, 'temperature_2m_mean': 26.429169, 'precipitation_sum': 0.0, 'rain_sum': 0.0, 'wind_speed_10m_max': 11.525623, 'wind_gusts_10m_max': 20.519999, 'wind_direction_10m_dominant': 113.89879, 'shortwave_radiation_sum': 20.8, 'temperature_2m_0': 24.35, 'relative_humidity_2m_0': 86.49427, 'dew_point_2m_0': 21.95, 'apparent_temperature_0': 27.690304, 'precipitation_0': 0.0, 'rain_0': 0.0, 'weather_code_0': 0.0, 'pressure_msl_0': 1010.3, 'surface_pressure_0': 1009.9519, 'cloud_cover_0': 0.0, 'cloud_cover_low_0': 0.0, 'cloud_cover_mid_0': 0.0, 'cloud_cover_high_0': 0.0, 'et0_fao_evapotranspiration_0': 0.010723392, 'vapour_pressure_deficit_0': 0.41129565, 'wind_speed_10m_0': 10.661107, 'wind_speed_100m_0': 15.034041, 'wind_direction_10m_0': 101.68935, 'wind_direction_100m_0': 106.699326, 'wind_gusts_10m_0': 15.119999, 'soil_temperature_0_to_7cm_0': 25.0, 'soil_temperature_7_to_28cm_0

In [10]:
new_dates_df = pd.DataFrame(date_list)
print(new_dates_df)

           date  weather_code  temperature_2m_max  temperature_2m_min  \
0    1996-10-19           0.0           30.500000           23.150000   
1    1960-05-18          51.0           32.099998           27.600000   
2    1954-03-27          51.0           30.500000           24.600000   
3    1991-07-26          51.0           31.650000           26.549999   
4    1950-02-28           0.0           29.100000           19.850000   
..          ...           ...                 ...                 ...   
210  2011-05-20          63.0           28.950001           25.000000   
211  1948-01-15           1.0           25.299999           18.900000   
212  1984-04-30          61.0           30.500000           25.850000   
213  1995-07-20          63.0           26.799999           24.500000   
214  2001-09-06          61.0           28.049999           25.600000   

     temperature_2m_mean  precipitation_sum   rain_sum  wind_speed_10m_max  \
0              26.429169           0.000000  

In [11]:
df_updated = pd.concat([df, new_dates_df], ignore_index=True)
print(df_updated)

            date  weather_code  temperature_2m_max  temperature_2m_min  \
0     1941-05-20          51.0           29.550000               26.80   
1     1947-10-20           1.0           28.350000               24.60   
2     1958-10-19          51.0           29.550000               24.10   
3     1960-10-08          63.0           27.650000               25.70   
4     1960-10-29          61.0           28.900000               23.25   
...          ...           ...                 ...                 ...   
2068  2011-05-20          63.0           28.950001               25.00   
2069  1948-01-15           1.0           25.299999               18.90   
2070  1984-04-30          61.0           30.500000               25.85   
2071  1995-07-20          63.0           26.799999               24.50   
2072  2001-09-06          61.0           28.049999               25.60   

      temperature_2m_mean  precipitation_sum   rain_sum  wind_speed_10m_max  \
0               28.243750       

In [12]:
df_updated.to_csv('combined_with_normal.csv', index=False)