# **1. Instalasi dan Import Library**

In [1]:
pip install bs4



In [2]:
import bs4
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import re

# **2. Koneksi ke web page**

In [3]:
my_url = 'https://warning.bmkg.go.id/'

In [4]:
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()

In [5]:
page_soup = soup(page_html, "html.parser")

In [6]:
page_soup.h1

<h1><img alt="BMKG" height="98" src="img/logo-bmkg.png" width="80"/></h1>

# **3. Mengkonvert data dari HTML**

In [7]:
scrap = page_soup.findAll("div", {"class":"col-8"})

In [8]:
len(scrap)

1

In [9]:
page = scrap[0]
page

<div class="col-8"> <div class="lindu">
<h3 class="margin-bottom-10 center m5">Gempabumi M ≥ 5.0</h3> <h5 class="center">05 Oktober 2023, 11:25:47 WIB</h5>
<div id="map"></div>
<ul class="infolindu">
<li><img alt="Magnitudo" height="26" src="img/magnitude.png" width="26"/>5.0<span>Magnitudo</span></li>
<li><img alt="Kedalaman" height="26" src="img/kedalaman.png" width="26"/>10 km<span>Kedalaman</span></li>
<li><img alt="Koordinat" height="26" src="img/koordinat.png" width="26"/>4.46 LS<br/>127.09 BT</li>
</ul>
<div class="infoext">
<p class="par"><span>Lokasi Gempa</span>99 km BaratDaya BURUSELATAN-MALUKU</p>
<p class="par"><span>Arahan</span>tidak berpotensi TSUNAMI</p>
<p class="par"><span>Saran BMKG</span>Hati-hati terhadap gempabumi susulan yang mungkin terjadi</p>
<p><a class="tombol shakemap" href="https://bmkg-content-inatews.storage.googleapis.com/20231005112828.mmi.jpg" rel="noopener" target="_blank">Peta Guncangan (<em>Shakemap</em>)</a></p>
<p class="alert-lindu">Info Gempa 

# **4. Membangun Scraper**

### A. Magnitudo

In [10]:
magnitudo = page.div.select("li", {"img alt":"Magnitudo"})
mag = magnitudo[0].text
mag

'5.0Magnitudo'

In [11]:
ang_mag = r'[\d\W]'
ang = re.findall(ang_mag, mag)
ang

['5', '.', '0']

In [12]:
an = ''.join(ang)
an

'5.0'

In [13]:
hur_mag = r'[a-zA-Z]'
hur = re.findall(hur_mag, mag)
hur

['M', 'a', 'g', 'n', 'i', 't', 'u', 'd', 'o']

In [14]:
hu = ''.join(hur)
hu

'Magnitudo'

In [15]:
magni = an + " " + hu
magni

'5.0 Magnitudo'

### B. Waktu Kejadian

In [16]:
waktu = page.div.select("h5")
wkt = waktu[0].text
wkt

'05 Oktober 2023, 11:25:47 WIB'

### C. Lokasi

In [17]:
lokasi = page.div.select("p", {"class":"par"})
lok = lokasi[0].text.split()
lok

['Lokasi', 'Gempa99', 'km', 'BaratDaya', 'BURUSELATAN-MALUKU']

In [18]:
lok[1]

'Gempa99'

In [19]:
ke = lok[1]
pat = r'[a-zA-z]'
ge = re.findall(pat, ke)
ge

['G', 'e', 'm', 'p', 'a']

In [20]:
tem = ''.join(ge)
tem

'Gempa'

In [21]:
pola = r'[\d]'
km = re.findall(pola, ke)
km

['9', '9']

In [22]:
kilo = ''.join(km)
kilo

'99'

In [23]:
lok_pas = tem + " " + kilo

lok[1] = lok_pas
lok

['Lokasi', 'Gempa 99', 'km', 'BaratDaya', 'BURUSELATAN-MALUKU']

In [24]:
pos = " ".join(lok)
pos

'Lokasi Gempa 99 km BaratDaya BURUSELATAN-MALUKU'

# **5. Export ke CSV**

In [25]:
import pandas as pd

In [26]:
print(magni)
print(wkt)
print(pos)

5.0 Magnitudo
05 Oktober 2023, 11:25:47 WIB
Lokasi Gempa 99 km BaratDaya BURUSELATAN-MALUKU


In [27]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [31]:
data = {'Magnitudo':[an],
        'Waktu Kejadian':[wkt],
        'Lokasi':[pos]
        }

df = pd.DataFrame(data)

#ubah path sesuai dengan path yang anda inginkan
df.to_csv('/content/drive/My Drive/WebScrap/CSV/gempa.csv', index=False)