**Checking API data**

In [9]:
import openmeteo_requests
import numpy as np
import requests_cache
import pandas as pd
from retry_requests import retry


In [3]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [4]:
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": [21.427222, 22.335, 23.763889, 22.71],
	"longitude": [92.005, 91.8325, 90.388889, 90.363],
	"start_date": ["2018-04-28","2018-05-01", "2018-05-01", "2018-05-01"],       #actual start date: 04-29 -> 2 days later
	"end_date": ["2018-05-02","2018-05-02", "2018-05-02", "2018-05-02"],         #actual end date: 04-30
	"daily": ["weather_code", "temperature_2m_max", "temperature_2m_min", "temperature_2m_mean", "precipitation_sum", "rain_sum", "wind_speed_10m_max", "wind_gusts_10m_max", "wind_direction_10m_dominant", "shortwave_radiation_sum"],
	"timezone": ["Asia/Dhaka", "Asia/Dhaka", "Asia/Dhaka", "Asia/Dhaka"],
	"elevation": ["NaN", "NaN", "NaN", "NaN"]
}
responses = openmeteo.weather_api(url, params=params)

In [12]:
# Process first location. Add a for-loop for multiple locations or weather models
response = responses[1]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
print(f"response length {len(responses)}")
print(f" response: {response}")

Coordinates 22.31985855102539°N 91.8614273071289°E
Elevation 3.0 m asl
Timezone b'Asia/Dhaka' b'+06'
Timezone difference to GMT+0 21600 s
response length 4
 response: <openmeteo_sdk.WeatherApiResponse.WeatherApiResponse object at 0x000001864B6DFD00>


In [13]:
# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_weather_code = daily.Variables(0).ValuesAsNumpy()
daily_temperature_2m_max = daily.Variables(1).ValuesAsNumpy()
daily_temperature_2m_min = daily.Variables(2).ValuesAsNumpy()
daily_temperature_2m_mean = daily.Variables(3).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(4).ValuesAsNumpy()
daily_rain_sum = daily.Variables(5).ValuesAsNumpy()
daily_wind_speed_10m_max = daily.Variables(6).ValuesAsNumpy()
daily_wind_gusts_10m_max = daily.Variables(7).ValuesAsNumpy()
daily_wind_direction_10m_dominant = daily.Variables(8).ValuesAsNumpy()
daily_shortwave_radiation_sum = daily.Variables(9).ValuesAsNumpy()

In [14]:
daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}
daily_data["weather_code"] =  daily_weather_code
daily_data["temperature_2m_max"] = daily_temperature_2m_max
daily_data["temperature_2m_min"] = daily_temperature_2m_min
daily_data["temperature_2m_mean"] = daily_temperature_2m_mean
daily_data["precipitation_sum"] = daily_precipitation_sum
daily_data["rain_sum"] = daily_rain_sum
daily_data["wind_speed_10m_max"] = daily_wind_speed_10m_max
daily_data["wind_gusts_10m_max"] = daily_wind_gusts_10m_max
daily_data["wind_direction_10m_dominant"] = daily_wind_direction_10m_dominant
daily_data["shortwave_radiation_sum"] = daily_shortwave_radiation_sum
print(daily_data)

{'date': DatetimeIndex(['2018-04-30 18:00:00+00:00', '2018-05-01 18:00:00+00:00'], dtype='datetime64[ns, UTC]', freq='D'), 'weather_code': array([63., 63.], dtype=float32), 'temperature_2m_max': array([30.65, 31.35], dtype=float32), 'temperature_2m_min': array([22.85, 22.45], dtype=float32), 'temperature_2m_mean': array([26.616669, 27.295832], dtype=float32), 'precipitation_sum': array([8.1, 4.4], dtype=float32), 'rain_sum': array([8.1, 4.4], dtype=float32), 'wind_speed_10m_max': array([13.783817, 16.575644], dtype=float32), 'wind_gusts_10m_max': array([32.760002, 37.079998], dtype=float32), 'wind_direction_10m_dominant': array([162.22856, 205.59842], dtype=float32), 'shortwave_radiation_sum': array([19.68, 23.98], dtype=float32)}


In [104]:
daily_dataframe = pd.DataFrame(data = daily_data)
print(daily_dataframe)

                       date  weather_code  temperature_2m_max  \
0 2018-04-30 18:00:00+00:00          63.0           30.299999   
1 2018-05-01 18:00:00+00:00           1.0           30.350000   

   temperature_2m_min  temperature_2m_mean  precipitation_sum  rain_sum  \
0           24.400000            27.456247                6.5       6.5   
1           24.549999            27.554167                0.0       0.0   

   wind_speed_10m_max  wind_gusts_10m_max  wind_direction_10m_dominant  \
0           18.837322           39.599998                   219.876755   
1           19.642281           38.880001                   221.061829   

   shortwave_radiation_sum  
0                    21.08  
1                    23.98  


**Loading Disaster Data CSV and cleaning the data**

In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [17]:
df= pd.read_csv('disaster.csv')
df.head()

Unnamed: 0,Year,Seq,Glide,Disaster Group,Disaster Subgroup,Disaster Type,Disaster Subtype,Disaster Subsubtype,Event Name,Country,...,No Affected,No Homeless,Total Affected,Insured Damages ('000 US$),Total Damages ('000 US$),CPI,Adm Level,Admin1 Code,Admin2 Code,Geo Locations
0,1900,9002,,Natural,Climatological,Drought,Drought,,,Cabo Verde,...,,,,,,3.221647,,,,
1,1900,9001,,Natural,Climatological,Drought,Drought,,,India,...,,,,,,3.221647,,,,
2,1902,12,,Natural,Geophysical,Earthquake,Ground movement,,,Guatemala,...,,,,,25000.0,3.350513,,,,
3,1902,3,,Natural,Geophysical,Volcanic activity,Ash fall,,Santa Maria,Guatemala,...,,,,,,3.350513,,,,
4,1902,10,,Natural,Geophysical,Volcanic activity,Ash fall,,Santa Maria,Guatemala,...,,,,,,3.350513,,,,


In [18]:
n_df = df[df['Country']=='Bangladesh']
print(n_df)

       Year  Seq           Glide Disaster Group Disaster Subgroup  \
7      1904    3             NaN        Natural    Meteorological   
18     1909   10             NaN        Natural    Meteorological   
19     1909   13             NaN        Natural    Meteorological   
32     1911    4             NaN        Natural    Meteorological   
47     1918    1             NaN        Natural        Biological   
...     ...  ...             ...            ...               ...   
15105  2019  655             NaN        Natural    Meteorological   
15150  2020  323  FL-2020-000166        Natural      Hydrological   
15191  2020  211   TC-2020-00135        Natural    Meteorological   
15243  2021  455             NaN        Natural      Hydrological   
15288  2021  295  TC-2021-000058        Natural    Meteorological   

              Disaster Type  Disaster Subtype Disaster Subsubtype  \
7                     Storm  Tropical cyclone                 NaN   
18                    Storm  Trop

In [19]:
n_df.describe()

Unnamed: 0,Year,Seq,Aid Contribution,Dis Mag Value,Start Year,Start Month,Start Day,End Year,End Month,End Day,Total Deaths,No Injured,No Affected,No Homeless,Total Affected,Insured Damages ('000 US$),Total Damages ('000 US$),CPI
count,356.0,356.0,19.0,110.0,356.0,353.0,274.0,356.0,348.0,273.0,318.0,127.0,180.0,62.0,265.0,1.0,53.0,354.0
mean,1991.755618,416.643258,34894.368421,7083.027273,1991.758427,6.31728,16.094891,1991.775281,6.29023,16.168498,9415.057,8432.661417,2493596.0,232853.7,1752284.0,80000.0,413086.1,55.5939
std,20.166891,1255.896241,72407.02647,17373.800454,20.165598,2.817591,8.888716,20.175988,2.861695,8.871271,110241.8,55345.775663,6558631.0,726407.4,5637644.0,,795845.5,25.968809
min,1904.0,1.0,161.0,-7.0,1904.0,1.0,1.0,1904.0,1.0,1.0,1.0,2.0,12.0,225.0,6.0,80000.0,201.0,3.479379
25%,1983.75,76.0,534.5,57.0,1983.75,4.0,9.0,1983.75,4.0,9.0,15.0,70.5,18843.75,7575.0,1500.0,80000.0,14000.0,38.897403
50%,1995.0,190.0,1551.0,120.0,1995.0,6.0,15.5,1995.0,6.0,15.0,41.0,200.0,169230.0,24471.5,40200.0,80000.0,100000.0,58.878183
75%,2005.0,373.0,8828.5,556.25,2005.0,8.0,24.0,2005.0,8.25,23.0,165.75,500.0,1498934.0,100000.0,500000.0,80000.0,500000.0,75.4572
max,2021.0,9350.0,234100.0,91120.0,2021.0,12.0,31.0,2021.0,12.0,31.0,1900000.0,600000.0,45000000.0,5000000.0,45000000.0,80000.0,4300000.0,100.0


We'll be dropping rows relating to earthquake data as well as earthquakes have no relation to weather data

In [20]:
n_df['Disaster Subgroup'].unique()  


array(['Meteorological', 'Biological', 'Climatological', 'Hydrological',
       'Geophysical'], dtype=object)

In [21]:
nn_df = n_df[n_df['Disaster Subgroup']!='Geophysical']   #geophysical incidents are earthquakes
print(nn_df)

       Year  Seq           Glide Disaster Group Disaster Subgroup  \
7      1904    3             NaN        Natural    Meteorological   
18     1909   10             NaN        Natural    Meteorological   
19     1909   13             NaN        Natural    Meteorological   
32     1911    4             NaN        Natural    Meteorological   
47     1918    1             NaN        Natural        Biological   
...     ...  ...             ...            ...               ...   
15105  2019  655             NaN        Natural    Meteorological   
15150  2020  323  FL-2020-000166        Natural      Hydrological   
15191  2020  211   TC-2020-00135        Natural    Meteorological   
15243  2021  455             NaN        Natural      Hydrological   
15288  2021  295  TC-2021-000058        Natural    Meteorological   

              Disaster Type  Disaster Subtype Disaster Subsubtype  \
7                     Storm  Tropical cyclone                 NaN   
18                    Storm  Trop

The api we're using also only has data available from 1970 onwards, so we'll be filtering our data accordingly

In [22]:
nnn_df = nn_df[nn_df['Year'] > 1939]
print(nnn_df)


       Year   Seq           Glide Disaster Group Disaster Subgroup  \
143    1941     3             NaN        Natural    Meteorological   
146    1942     8             NaN        Natural    Meteorological   
152    1943  9002             NaN        Natural    Climatological   
170    1947    12             NaN        Natural    Meteorological   
171    1947  9005             NaN        Natural    Climatological   
...     ...   ...             ...            ...               ...   
15105  2019   655             NaN        Natural    Meteorological   
15150  2020   323  FL-2020-000166        Natural      Hydrological   
15191  2020   211   TC-2020-00135        Natural    Meteorological   
15243  2021   455             NaN        Natural      Hydrological   
15288  2021   295  TC-2021-000058        Natural    Meteorological   

              Disaster Type  Disaster Subtype Disaster Subsubtype  \
143                   Storm  Tropical cyclone                 NaN   
146                  

We'll be dropping rows where there is no starting and ending date, as these are the parameters we'll be using to fetch weather data

In [23]:
df_cleaned = nnn_df.dropna(subset=['Start Day', 'Start Month', 'End Day', 'End Month' ])
null_checker = df_cleaned.apply(lambda x: sum(x.isnull())).to_frame(name='count')
print(null_checker)


                            count
Year                            0
Seq                             0
Glide                         231
Disaster Group                  0
Disaster Subgroup               0
Disaster Type                   0
Disaster Subtype               66
Disaster Subsubtype           220
Event Name                    232
Country                         0
ISO                             0
Region                          0
Continent                       0
Location                       17
Origin                        208
Associated Dis                215
Associated Dis2               251
OFDA Response                 242
Appeal                        208
Declaration                   210
Aid Contribution              241
Dis Mag Value                 170
Dis Mag Scale                   6
Latitude                      244
Longitude                     244
Local Time                    256
River Basin                   227
Start Year                      0
Start Month   

As we can see, all the null values from start and end month have been removed.Disaster types also didn't have any null values in the first place. Now we'll be iterating over these values to collect weather related data from these dates

In [39]:
def get_weather_data(start_date, end_date):
    cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)

    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
	    "latitude": 22.335,
	    "longitude": 91.8325,
	    "start_date": start_date,
	    "end_date": end_date,
	    "daily": ["weather_code", "temperature_2m_max", "temperature_2m_min", "temperature_2m_mean", "precipitation_sum", "rain_sum", "wind_speed_10m_max", "wind_gusts_10m_max", "wind_direction_10m_dominant", "shortwave_radiation_sum"],
	    "timezone": "Asia/Dhaka",
	    "elevation": "NaN"
    }
    responses = openmeteo.weather_api(url, params=params)


    response = responses[0]
    print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
    print(f"Elevation {response.Elevation()} m asl")
    print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
    print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

    daily = response.Daily()
    daily_weather_code = daily.Variables(0).ValuesAsNumpy()
    daily_temperature_2m_max = daily.Variables(1).ValuesAsNumpy()
    daily_temperature_2m_min = daily.Variables(2).ValuesAsNumpy()
    daily_temperature_2m_mean = daily.Variables(3).ValuesAsNumpy()
    daily_precipitation_sum = daily.Variables(4).ValuesAsNumpy()
    daily_rain_sum = daily.Variables(5).ValuesAsNumpy()
    daily_wind_speed_10m_max = daily.Variables(6).ValuesAsNumpy()
    daily_wind_gusts_10m_max = daily.Variables(7).ValuesAsNumpy()
    daily_wind_direction_10m_dominant = daily.Variables(8).ValuesAsNumpy()
    daily_shortwave_radiation_sum = daily.Variables(9).ValuesAsNumpy()

    daily_data = {"date": pd.date_range(
	    start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	    end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	    freq = pd.Timedelta(seconds = daily.Interval()),
	    inclusive = "left"
    )}
    daily_data["start_date"] = start_date
    daily_data["end_date"] = end_date
    daily_data["weather_code"] = np.average(daily_weather_code)
    daily_data["temperature_2m_max"] = np.average(daily_temperature_2m_max)
    daily_data["temperature_2m_min"] = np.average(daily_temperature_2m_min)
    daily_data["temperature_2m_mean"] = np.average(daily_temperature_2m_mean)
    daily_data["precipitation_sum"] = np.average(daily_precipitation_sum)
    daily_data["rain_sum"] = np.average(daily_rain_sum)
    daily_data["wind_speed_10m_max"] = np.average(daily_wind_speed_10m_max)
    daily_data["wind_gusts_10m_max"] = np.average(daily_wind_gusts_10m_max)
    daily_data["wind_direction_10m_dominant"] = np.average(daily_wind_direction_10m_dominant)
    daily_data["shortwave_radiation_sum"] = np.average(daily_shortwave_radiation_sum)
    

    print(daily_data)
    return daily_data

In [41]:
date_list = []
days_in_month = [ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 ]

for index, row in df_cleaned.iterrows():
    
    st_day = int(row['Start Day'])
    st_mon = int(row['Start Month'])
    
    fin_day = int(row['End Day'])
    fin_mon = int(row['End Month'])
    
    #print(st_mon, st_day, " ", fin_mon, fin_day)
    
    if(st_day > days_in_month[st_mon-1]):
        st_day += (st_day % days_in_month[st_mon-1])
        st_mon+=1
        
    if(fin_day > days_in_month[fin_mon-1]):
        fin_day += (fin_day % days_in_month[fin_mon-1])
        fin_mon+=1
    
    start_date = (f"{int(row['Start Year']):04d}-{int(st_mon)  :02d}-{int(st_day):02d}")
    end_date = (f"{int(row['End Year']):04d}-{int(fin_mon):02d}-{int(fin_day):02d}")
    
    #print(start_date, end_date)
    results = get_weather_data(start_date, end_date)
    print(results)
    date_list.append(results)
    
print(date_list)

Coordinates 22.31985855102539°N 91.8614273071289°E
Elevation 3.0 m asl
Timezone b'Asia/Dhaka' b'+06'
Timezone difference to GMT+0 21600 s
{'date': DatetimeIndex(['1941-05-20 18:00:00+00:00'], dtype='datetime64[ns, UTC]', freq='D'), 'start_date': '1941-05-21', 'end_date': '1941-05-21', 'weather_code': 51.0, 'temperature_2m_max': 29.55, 'temperature_2m_min': 26.8, 'temperature_2m_mean': 28.24375, 'precipitation_sum': 1.6000001, 'rain_sum': 1.6000001, 'wind_speed_10m_max': 13.32, 'wind_gusts_10m_max': 21.599998, 'wind_direction_10m_dominant': 260.71222, 'shortwave_radiation_sum': 24.21}
{'date': DatetimeIndex(['1941-05-20 18:00:00+00:00'], dtype='datetime64[ns, UTC]', freq='D'), 'start_date': '1941-05-21', 'end_date': '1941-05-21', 'weather_code': 51.0, 'temperature_2m_max': 29.55, 'temperature_2m_min': 26.8, 'temperature_2m_mean': 28.24375, 'precipitation_sum': 1.6000001, 'rain_sum': 1.6000001, 'wind_speed_10m_max': 13.32, 'wind_gusts_10m_max': 21.599998, 'wind_direction_10m_dominant': 2

In [42]:
weather_data_df = pd.DataFrame(date_list)
weather_data_df.index.name = 'Row_Number'
print(weather_data_df)

                                                         date  start_date  \
Row_Number                                                                  
0           DatetimeIndex(['1941-05-20 18:00:00+00:00'], d...  1941-05-21   
1           DatetimeIndex(['1947-10-20 18:00:00+00:00'], d...  1947-10-21   
2           DatetimeIndex(['1958-10-19 18:00:00+00:00'], d...  1958-10-20   
3           DatetimeIndex(['1960-10-08 18:00:00+00:00'], d...  1960-10-09   
4           DatetimeIndex(['1960-10-29 18:00:00+00:00'], d...  1960-10-30   
...                                                       ...         ...   
252         DatetimeIndex(['2019-11-08 18:00:00+00:00', '2...  2019-11-09   
253         DatetimeIndex(['2020-06-14 18:00:00+00:00', '2...  2020-06-15   
254         DatetimeIndex(['2020-05-19 18:00:00+00:00'], d...  2020-05-20   
255         DatetimeIndex(['2021-07-26 18:00:00+00:00', '2...  2021-07-27   
256         DatetimeIndex(['2021-05-26 18:00:00+00:00'], d...  2021-05-27   

In [43]:
temp_disaster = pd.DataFrame(df_cleaned['Disaster Type'])
temp_disaster_reset = temp_disaster.reset_index(drop=True)
temp_disaster_reset.index.name = 'Row_Number'
temp_disaster_reset.rename(columns={None: 'disaster_type'}, inplace=True)
print(temp_disaster_reset)

           Disaster Type
Row_Number              
0                  Storm
1                  Storm
2                  Storm
3                  Storm
4                  Storm
...                  ...
252                Storm
253                Flood
254                Storm
255                Flood
256                Storm

[257 rows x 1 columns]


In [44]:
combined_data = pd.merge(weather_data_df, temp_disaster_reset, on='Row_Number', how='inner')
print(combined_data)

                                                         date  start_date  \
Row_Number                                                                  
0           DatetimeIndex(['1941-05-20 18:00:00+00:00'], d...  1941-05-21   
1           DatetimeIndex(['1947-10-20 18:00:00+00:00'], d...  1947-10-21   
2           DatetimeIndex(['1958-10-19 18:00:00+00:00'], d...  1958-10-20   
3           DatetimeIndex(['1960-10-08 18:00:00+00:00'], d...  1960-10-09   
4           DatetimeIndex(['1960-10-29 18:00:00+00:00'], d...  1960-10-30   
...                                                       ...         ...   
252         DatetimeIndex(['2019-11-08 18:00:00+00:00', '2...  2019-11-09   
253         DatetimeIndex(['2020-06-14 18:00:00+00:00', '2...  2020-06-15   
254         DatetimeIndex(['2020-05-19 18:00:00+00:00'], d...  2020-05-20   
255         DatetimeIndex(['2021-07-26 18:00:00+00:00', '2...  2021-07-27   
256         DatetimeIndex(['2021-05-26 18:00:00+00:00'], d...  2021-05-27   

In [45]:
combined_data.to_csv('combined_disaster_weather_data.csv', index=False)