In [107]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
pd.set_option('display.max_columns',None)
sns.set_theme()

In [108]:
flood_events = pd.read_csv('Indofloods Dataset/floodevents_indofloods.csv')

In [109]:
flood_events.head()

Unnamed: 0,EventID,Start Date,End Date,Peak Flood Level (m),Peak FL Date,Num Peak FL,Peak Discharge Q (cumec),Peak Discharge Date,Flood Volume (cumec),Event Duration (days),Time to Peak (days),Recession Time (day),Flood Type
0,INDOFLOODS-gauge-1010-1,2010-07-21,2010-07-21,47.95,2010-07-21,1,,,,1,1,1,Flood
1,INDOFLOODS-gauge-1010-2,2016-07-23,2016-07-23,48.05,2016-07-23,1,,,,1,1,1,Flood
2,INDOFLOODS-gauge-1010-3,2016-07-26,2016-07-26,48.0,2016-07-26,1,,,,1,1,1,Flood
3,INDOFLOODS-gauge-1010-4,2017-08-11,2017-08-13,48.95,2017-08-12,1,,,,3,2,2,Severe Flood
4,INDOFLOODS-gauge-1012-1,2010-07-21,2010-07-21,48.1,2010-07-21,1,,,,1,1,1,Flood


In [110]:
flood_events.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4548 entries, 0 to 4547
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   EventID                   4548 non-null   object 
 1   Start Date                4548 non-null   object 
 2   End Date                  4548 non-null   object 
 3   Peak Flood Level (m)      4548 non-null   float64
 4   Peak FL Date              4548 non-null   object 
 5   Num Peak FL               4548 non-null   int64  
 6   Peak Discharge Q (cumec)  3993 non-null   float64
 7   Peak Discharge Date       3993 non-null   object 
 8   Flood Volume (cumec)      3971 non-null   float64
 9   Event Duration (days)     4548 non-null   int64  
 10  Time to Peak (days)       4548 non-null   int64  
 11  Recession Time (day)      4548 non-null   int64  
 12  Flood Type                4548 non-null   object 
dtypes: float64(3), int64(4), object(6)
memory usage: 462.0+ KB


In [111]:
## check how many unique gauge stations are present
flood_events['gauge_id'] = flood_events['EventID'].str.split('-').str[:-1].str.join('-')

In [112]:
flood_events.head()

Unnamed: 0,EventID,Start Date,End Date,Peak Flood Level (m),Peak FL Date,Num Peak FL,Peak Discharge Q (cumec),Peak Discharge Date,Flood Volume (cumec),Event Duration (days),Time to Peak (days),Recession Time (day),Flood Type,gauge_id
0,INDOFLOODS-gauge-1010-1,2010-07-21,2010-07-21,47.95,2010-07-21,1,,,,1,1,1,Flood,INDOFLOODS-gauge-1010
1,INDOFLOODS-gauge-1010-2,2016-07-23,2016-07-23,48.05,2016-07-23,1,,,,1,1,1,Flood,INDOFLOODS-gauge-1010
2,INDOFLOODS-gauge-1010-3,2016-07-26,2016-07-26,48.0,2016-07-26,1,,,,1,1,1,Flood,INDOFLOODS-gauge-1010
3,INDOFLOODS-gauge-1010-4,2017-08-11,2017-08-13,48.95,2017-08-12,1,,,,3,2,2,Severe Flood,INDOFLOODS-gauge-1010
4,INDOFLOODS-gauge-1012-1,2010-07-21,2010-07-21,48.1,2010-07-21,1,,,,1,1,1,Flood,INDOFLOODS-gauge-1012


In [113]:
## lets see how many unique gauge stations are there 
len(flood_events['gauge_id'].unique())

155

In [114]:
gauge_flood = flood_events.groupby('gauge_id')['Flood Type'].size().sort_values(ascending=False)

In [115]:
## get Info about this gauge id
meta_data = pd.read_csv('Indofloods Dataset/metadata_indofloods.csv')

In [116]:
meta_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 214 entries, 0 to 213
Data columns (total 18 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   GaugeID                              214 non-null    object 
 2   Danger Level                         212 non-null    float64
 3   Station                              214 non-null    object 
 4   Latitude                             214 non-null    float64
 5   Longitude                            214 non-null    float64
 6   River Name/ Tributory/ SubTributory  214 non-null    object 
 7   Basin                                212 non-null    object 
 8   State                                214 non-null    object 
 9   Start_date                           214 non-null    object 
 10  End_date                             214 non-null    object 
 11  Level_Entries                        214 non-null    int64  
 12  Streamflow_Entries              

In [117]:
cautious_gauges = meta_data[meta_data['Reliability'] == 'Caution']

In [118]:
cautious_gauges.head()

Unnamed: 0,GaugeID,Warning Level,Danger Level,Station,Latitude,Longitude,River Name/ Tributory/ SubTributory,Basin,State,Start_date,End_date,Level_Entries,Streamflow_Entries,Privacy,Source Catchment Area,Catchment Area,Area variation (%),Reliability
21,INDOFLOODS-gauge-115,47.68,48.68,Benibad,26.1453,85.6922,Ganga/Kosi/ Bagmati,Ganga - Brahmaputra - Meghna/Barak,Bihar,1985-04-01,2010-12-30,9405,9384,Restricted,6160.0,1793.517334,70.884459,Caution
22,INDOFLOODS-gauge-164,204.5,205.33,delhi rly bridge,28.6625,77.2467,Ganga/Yamuna,Ganga - Brahmaputra - Meghna/Barak,Delhi,1963-06-06,2011-12-31,17741,17740,Restricted,18552.0,36437.1875,96.405711,Caution
30,INDOFLOODS-gauge-125,51.32,52.32,Saulighat,26.4089,85.8597,Ganga/Kosi/\nBagmati/Adhwara/ Dhaus,Ganga - Brahmaputra - Meghna/Barak,Bihar,1985-03-22,2010-12-31,9416,9371,Restricted,1932.0,3314.956299,71.581589,Caution
33,INDOFLOODS-gauge-124,44.72,45.72,Hayaghat,26.0333,85.9,Ganga/Kosi/Bagmati,Ganga - Brahmaputra - Meghna/Barak,Bihar,1970-01-01,2010-01-31,14641,14611,Restricted,12973.0,8762.385742,32.456751,Caution
49,INDOFLOODS-gauge-116,45.94,46.94,Ekmighat,26.1167,85.8589,Ganga/Kosi/ Bagmati/Adhwara,Ganga - Brahmaputra - Meghna/Barak,Bihar,1985-04-01,2010-12-31,9405,9312,Restricted,4197.0,516.797852,87.686494,Caution


In [119]:
for idx, row in cautious_gauges.iterrows():
    gauge_id = row['GaugeID']
    flood_count = gauge_flood.get(gauge_id, 'Not Found')
    print(f'{gauge_id} : {flood_count}')


INDOFLOODS-gauge-115 : Not Found
INDOFLOODS-gauge-164 : Not Found
INDOFLOODS-gauge-125 : Not Found
INDOFLOODS-gauge-124 : Not Found
INDOFLOODS-gauge-116 : Not Found
INDOFLOODS-gauge-1050 : 41
INDOFLOODS-gauge-509 : 39
INDOFLOODS-gauge-1003 : Not Found
INDOFLOODS-gauge-370 : 24
INDOFLOODS-gauge-506 : Not Found
INDOFLOODS-gauge-833 : 6
INDOFLOODS-gauge-1010 : 4
INDOFLOODS-gauge-665 : 3
INDOFLOODS-gauge-422 : 2
INDOFLOODS-gauge-1084 : 2
INDOFLOODS-gauge-406 : 2
INDOFLOODS-gauge-421 : 2
INDOFLOODS-gauge-943 : 2
INDOFLOODS-gauge-367 : 1
INDOFLOODS-gauge-426 : 1
INDOFLOODS-gauge-445 : 1
INDOFLOODS-gauge-434 : 1
INDOFLOODS-gauge-485 : 1
INDOFLOODS-gauge-705 : 1
INDOFLOODS-gauge-1074 : 1
INDOFLOODS-gauge-1012 : 1
INDOFLOODS-gauge-626 : 1
INDOFLOODS-gauge-877 : 1


In [120]:
## gauge 1050 has 41 records and its cautious
meta_data[meta_data['GaugeID'] == 'INDOFLOODS-gauge-1050']

Unnamed: 0,GaugeID,Warning Level,Danger Level,Station,Latitude,Longitude,River Name/ Tributory/ SubTributory,Basin,State,Start_date,End_date,Level_Entries,Streamflow_Entries,Privacy,Source Catchment Area,Catchment Area,Area variation (%),Reliability
51,INDOFLOODS-gauge-1050,34.22,35.3,Tufanganj,26.3089,89.6756,Brahmaputra/ Torsa/\nRaidak-i,Ganga - Brahmaputra -Meghna/Barak,West Bengal,2012-06-02,2020-10-31,1023,0,Open,520.0,4924.346191,846.989652,Caution


In [121]:
## lets see the flood records at this place
tufanganj_floods = flood_events[flood_events['gauge_id'] == 'INDOFLOODS-gauge-1050']

In [122]:
tufanganj_floods

Unnamed: 0,EventID,Start Date,End Date,Peak Flood Level (m),Peak FL Date,Num Peak FL,Peak Discharge Q (cumec),Peak Discharge Date,Flood Volume (cumec),Event Duration (days),Time to Peak (days),Recession Time (day),Flood Type,gauge_id
29,INDOFLOODS-gauge-1050-1,2012-06-02,2012-06-02,65.8,2012-06-02,1,,,,1,1,1,Severe Flood,INDOFLOODS-gauge-1050
30,INDOFLOODS-gauge-1050-2,2012-06-07,2012-06-07,57.4,2012-06-07,1,,,,1,1,1,Severe Flood,INDOFLOODS-gauge-1050
31,INDOFLOODS-gauge-1050-3,2012-06-12,2012-06-17,131.4,2012-06-17,1,,,,6,6,1,Severe Flood,INDOFLOODS-gauge-1050
32,INDOFLOODS-gauge-1050-4,2012-06-23,2012-06-23,95.6,2012-06-23,1,,,,1,1,1,Severe Flood,INDOFLOODS-gauge-1050
33,INDOFLOODS-gauge-1050-5,2012-06-25,2012-06-28,175.6,2012-06-27,1,,,,4,3,2,Severe Flood,INDOFLOODS-gauge-1050
34,INDOFLOODS-gauge-1050-6,2012-07-05,2012-07-07,175.6,2012-07-06,1,,,,3,2,2,Severe Flood,INDOFLOODS-gauge-1050
35,INDOFLOODS-gauge-1050-7,2012-07-11,2012-07-11,36.8,2012-07-11,1,,,,1,1,1,Severe Flood,INDOFLOODS-gauge-1050
36,INDOFLOODS-gauge-1050-8,2012-07-14,2012-07-14,42.6,2012-07-14,1,,,,1,1,1,Severe Flood,INDOFLOODS-gauge-1050
37,INDOFLOODS-gauge-1050-9,2012-07-16,2012-07-16,125.6,2012-07-16,1,,,,1,1,1,Severe Flood,INDOFLOODS-gauge-1050
38,INDOFLOODS-gauge-1050-10,2012-07-23,2012-07-24,54.6,2012-07-23,1,,,,2,1,2,Severe Flood,INDOFLOODS-gauge-1050


In [123]:
gauge_flood

gauge_id
INDOFLOODS-gauge-394    419
INDOFLOODS-gauge-560    268
INDOFLOODS-gauge-571    242
INDOFLOODS-gauge-361    219
INDOFLOODS-gauge-364    175
                       ... 
INDOFLOODS-gauge-705      1
INDOFLOODS-gauge-877      1
INDOFLOODS-gauge-832      1
INDOFLOODS-gauge-913      1
INDOFLOODS-gauge-889      1
Name: Flood Type, Length: 155, dtype: int64

In [124]:
df = flood_events[flood_events['gauge_id'] == 'INDOFLOODS-gauge-394']

In [125]:
## gather the rainfall data
df['Start Date'].min()
df['End Date'].max()

'2018-07-31'

In [126]:
meta_data[meta_data['GaugeID'] == 'INDOFLOODS-gauge-394']

Unnamed: 0,GaugeID,Warning Level,Danger Level,Station,Latitude,Longitude,River Name/ Tributory/ SubTributory,Basin,State,Start_date,End_date,Level_Entries,Streamflow_Entries,Privacy,Source Catchment Area,Catchment Area,Area variation (%),Reliability
0,INDOFLOODS-gauge-394,259.2,262.0,Biligundulu,12.18,77.73,Cauvery,Cauvery,Karnataka,1971-08-30,2020-05-27,17715,17273,Open,36682.0,36889.80078,0.566493,Safe


In [None]:
import requests
import pandas as pd

# Define parameters
latitude = 12.18
longitude = 77.73
start_date = "1970-01-01"
end_date = "2020-12-31"

# Open-Meteo historical API endpoint
url = "https://archive-api.open-meteo.com/v1/archive"

# API parameters
params = {
    "latitude": latitude,
    "longitude": longitude,
    "start_date": start_date,
    "end_date": end_date,
    "daily": "precipitation_sum",
    "timezone": "Asia/Kolkata"  # optional, for local time
}

# Make request
response = requests.get(url, params=params)
data = response.json()

# Convert to DataFrame
rainfall_data = pd.DataFrame({
    "date": data["daily"]["time"],
    "precipitation_mm": data["daily"]["precipitation_sum"]
})

# Display first few rows
print(rainfall_data.head())

         date  precipitation_mm
0  1970-01-01               0.5
1  1970-01-02               0.0
2  1970-01-03               0.0
3  1970-01-04               0.0
4  1970-01-05               0.0
