In [1]:
import requests 
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta

In [2]:
bmkg = requests.get('https://data.bmkg.go.id/DataMKG/MEWS/DigitalForecast/DigitalForecast-JawaTimur.xml')

In [3]:
bs = BeautifulSoup(bmkg.content,features='xml')

In [4]:
def get_last_date(): 
    today = datetime.now().date() - timedelta(days=1)
    date_str = today.strftime("%Y%m%d")
    return date_str

In [40]:
def get_area_detail(area,index): 
    area_dict = {}
    target_date = datetime.strptime(get_last_date(),"%Y%m%d")
    attributes = area.attrs
    area_dict['date'] = target_date
    area_dict['id'] = attributes.get('id')
    area_dict['name']=area.select('name')[1]
    area_dict['latitude'] = attributes.get('latitude')
    area_dict['longitude'] = attributes.get('longitude')
    df = pd.DataFrame(area_dict, index=[index])
    return df
    

In [27]:
def get_city_data(bmkg):
    areas = bmkg.select('area[type="land"]')
    res_df = pd.DataFrame()
    for i,area in enumerate(areas) : 
        df = get_area_detail(area,i)
        if not res_df.empty: 
            res_df = pd.concat((res_df,df))
        else : 
            res_df = df
    return res_df

In [41]:
city_data = get_city_data(bs)

In [42]:
city_data

Unnamed: 0,date,id,name,latitude,longitude
0,2023-05-08,501272,Kab. Bangkalan,-7.033333333,112.7667
1,2023-05-08,501273,Kab. Banyuwangi,-8.216666667,114.35
2,2023-05-08,501274,Kota Batu,-7.83708,112.53144
3,2023-05-08,501277,Kab. Bojonegoro,-7.183333334,111.9
4,2023-05-08,501278,Kab. Bondowoso,-7.916666669,113.8167
5,2023-05-08,501279,Kab. Gresik,-7.158125,112.651062
6,2023-05-08,501280,Kab. Jember,-8.183333334,113.6833
7,2023-05-08,501281,Kab. Jombang,-7.55994,112.25719
8,2023-05-08,5002271,Kab. Blitar,-8.129929,112.252808
9,2023-05-08,5002268,Kab. Kediri,-7.806242,112.180023


In [43]:
def get_temp_hum_data(area,index): 
    my_dict = {}
    target_date = get_last_date()
    my_dict['tmax'] = area.select(f'parameter[id="tmax"] timerange[day="{target_date}"] value[unit="C"]')[0].text
    my_dict['tmin'] = area.select(f'parameter[id="tmin"] timerange[day="{target_date}"] value[unit="C"]')[0].text
    my_dict['humax'] = area.select(f'parameter[id="humax"] timerange[day="{target_date}"] value')[0].text
    my_dict['humin'] = area.select(f'parameter[id="humin"] timerange[day="{target_date}"] value')[0].text
    df = pd.DataFrame(my_dict,index=[index])
    return df 

In [47]:
def get_t_hum_monitoring_data(bmkg): 
    areas = bmkg.select('area[type="land"]')
    res_df = pd.DataFrame()
    for i,area in enumerate(areas) : 
        res_detail = get_temp_hum_data(area,i)
        area_detail = get_area_detail(area,i)
        if not res_df.empty: 
            df = pd.concat((area_detail,res_detail),axis=1)
            res_df = pd.concat((res_df,df))
        else : 
            df = pd.concat((area_detail,res_detail),axis=1)
            res_df = df
    res_df.drop(['latitude','longitude'], axis=1, inplace=True)
    return res_df

In [48]:
temp_humid_data = get_t_hum_monitoring_data(bs)

In [52]:
new_data = get_t_hum_monitoring_data(bs)

pd.concat((temp_humid_data,new_data)).reset_index(drop=True)

Unnamed: 0,date,id,name,tmax,tmin,humax,humin
0,2023-05-08,501272,Kab. Bangkalan,33,25,95,65
1,2023-05-08,501273,Kab. Banyuwangi,34,24,95,60
2,2023-05-08,501274,Kota Batu,23,17,100,70
3,2023-05-08,501277,Kab. Bojonegoro,32,25,95,65
4,2023-05-08,501278,Kab. Bondowoso,30,20,100,65
...,...,...,...,...,...,...,...
71,2023-05-08,501305,Kab. Sumenep,33,25,95,65
72,2023-05-08,501306,Kota Surabaya,33,25,95,60
73,2023-05-08,501307,Kab. Trenggalek,31,22,95,75
74,2023-05-08,501308,Kab. Tuban,32,25,95,65
