In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [4]:
df = pd.read_csv("kecamatanforecast-jawabarat.csv", delimiter=";")

In [5]:
df

Unnamed: 0,501212,2024-12-11 00:00:00,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,85,22,4,W,10
0,501212,2024-12-11 01:00:00,,,,,79,24,4,W,10
1,501212,2024-12-11 02:00:00,,,,,74,25,4,W,10
2,501212,2024-12-11 03:00:00,,,,,81,26,4,W,10
3,501212,2024-12-11 04:00:00,,,,,73,28,4,W,10
4,501212,2024-12-11 05:00:00,,,,,67,27,5,W,10
...,...,...,...,...,...,...,...,...,...,...,...
76278,5009846,2024-12-20 00:00:00,,,,,91,24,4,W,11
76279,5009846,2024-12-20 06:00:00,,,,,61,31,1,W,11
76280,5009846,2024-12-20 12:00:00,24.0,31.0,61.0,91.0,80,27,4,NW,7
76281,5009846,2024-12-20 18:00:00,,,,,86,25,4,NW,7


# Clean Data

## Petunjuk

<h3>Urutan Parameter Data CSV (kiri ke kanan, <em>separator</em> ";")</h3>
<ul>
    <li>ID Lokasi (<a href="../DataMKG/MEWS/DigitalForecast/CSV/kecamatan_geofeatures.csv" target="_blank">Referensi Lokasi</a>)</li>
    <li>Waktu dalam UTC YYYY-MM-DD HH:mm:ss</pre></li>
    <li>Suhu Udara (rata-rata) Minimum dalam °C</li>
    <li>Suhu Udara (rata-rata) Maksimum dalam °C</li>
    <li>Kelembapan Udara (rata-rata) Minimum dalam %</li>
    <li>Kelembapan Udara (rata-rata) Maksimum dalam %</li>
    <li>Kelembapan Udara dalam %</li>
    <li>Suhu Udara dalam °C</li>
    <li>Cuaca berupa <a href="#kodecuaca">kode cuaca</a></li>
    <li>Arah Angin berupa <a href="#card">CARD</a></li>
    <li>Kecepatan Angin dalam km/jam</li>
</ul>

<h3 id="kodecuaca">Kode Cuaca</h3>
<ul>
    <li>0 / 100: Cerah / <em>Clear Skies</em></li>
    <li>1 / 101: Cerah Berawan / <em>Partly Cloudy</em></li>
    <li>2 / 102: Cerah Berawan / <em>Partly Cloudy</em></li>
    <li>3 / 103: Berawan / <em>Mostly Cloudy</em></li>
    <li>4 / 104: Berawan Tebal / <em>Overcast</em></li>
    <li>5: Udara Kabur / <em>Haze</em></li>
    <li>10: Asap / <em>Smoke</em></li>
    <li>45: Kabut / <em>Fog</em></li>
    <li>60: Hujan Ringan / <em>Light Rain</em></li>
    <li>61: Hujan Sedang / <em>Rain</em></li>
    <li>63: Hujan Lebat / <em>Heavy Rain</em></li>
    <li>80: Hujan Lokal / <em>Isolated Shower</em></li>
    <li>95: Hujan Petir / <em>Severe Thunderstorm</em></li>
    <li>97: Hujan Petir / <em>Severe Thunderstorm</em></li>
</ul>

<h3 id="card">Kode Arah Angin (CARD) (dibaca: dari arah ...)</h3>
<ul>
    <li>N: (North)</li>
    <li>NE: (Northeast)</li>
    <li>E:(East)</li>
    <li>SE: (Southeast)</li>
    <li>S: (South)</li>
    <li>SW:(Southwest)</li>
    <li>W: (West)</li>
    <li>NW: (Northwest)</li>
</ul>

In [6]:
try:
    df = df.rename(columns={
        "501212": "ID Lokasi",
        "2024-12-11 00:00:00": "Waktu",
        "Unnamed: 2": "Suhu Udara (rata-rata) Minimum",
        "Unnamed: 3": "Suhu Udara (rata-rata) Maksimum",
        "Unnamed: 4": "Kelembapan Udara (rata-rata) Minimum",
        "Unnamed: 5": "Kelembapan Udara (rata-rata) Maksimum",
        "85": "Kelembapan Udara",
        "22": "Suhu",
        "4": "Cuaca",
        "W": "Arah Angin",
        "10": "Kecepatan Angin"
    })
except: None

In [9]:
df

Unnamed: 0,ID Lokasi,Waktu,Suhu Udara (rata-rata) Minimum,Suhu Udara (rata-rata) Maksimum,Kelembapan Udara (rata-rata) Minimum,Kelembapan Udara (rata-rata) Maksimum,Kelembapan Udara,Suhu,Cuaca,Arah Angin,Kecepatan Angin
0,501212,2024-12-11 01:00:00,,,,,79,24,4,W,10
1,501212,2024-12-11 02:00:00,,,,,74,25,4,W,10
2,501212,2024-12-11 03:00:00,,,,,81,26,4,W,10
3,501212,2024-12-11 04:00:00,,,,,73,28,4,W,10
4,501212,2024-12-11 05:00:00,,,,,67,27,5,W,10
...,...,...,...,...,...,...,...,...,...,...,...
76278,5009846,2024-12-20 00:00:00,,,,,91,24,4,W,11
76279,5009846,2024-12-20 06:00:00,,,,,61,31,1,W,11
76280,5009846,2024-12-20 12:00:00,24.0,31.0,61.0,91.0,80,27,4,NW,7
76281,5009846,2024-12-20 18:00:00,,,,,86,25,4,NW,7


In [7]:
# Pilih fitur yang relevan untuk klasifikasi
features = ['Suhu Udara (rata-rata) Minimum', 'Suhu Udara (rata-rata) Maksimum',
            'Kelembapan Udara (rata-rata) Minimum', 'Kelembapan Udara (rata-rata) Maksimum',
            'Kecepatan Angin']

In [10]:
X = df[features]
y = df['Cuaca']  # Label klasifikasi berdasarkan kode cuaca BMKG


In [11]:
# Split data menjadi training dan testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [12]:
# Inisialisasi dan latih model Decision Tree
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

In [13]:
# Prediksi dan evaluasi model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Akurasi Model: {accuracy * 100:.2f}%')

Akurasi Model: 62.71%


In [14]:
# Prediksi cuaca dua hari ke depan
data_prediksi = pd.DataFrame({
    'Suhu Udara (rata-rata) Minimum': [24, 25],
    'Suhu Udara (rata-rata) Maksimum': [32, 33],
    'Kelembapan Udara (rata-rata) Minimum': [60, 58],
    'Kelembapan Udara (rata-rata) Maksimum': [90, 88],
    'Kecepatan Angin': [10, 12]
})

In [17]:
prediksi_cuaca = model.predict(data_prediksi)
prediksi_cuaca

array([4, 4])

In [16]:
# Mapping kode cuaca ke deskripsi
kode_cuaca = {
    0: 'Cerah', 100: 'Cerah',
    1: 'Cerah Berawan', 101: 'Cerah Berawan', 2: 'Cerah Berawan', 102: 'Cerah Berawan',
    3: 'Berawan', 103: 'Berawan',
    4: 'Berawan Tebal', 104: 'Berawan Tebal',
    5: 'Udara Kabur', 10: 'Asap', 45: 'Kabut',
    60: 'Hujan Ringan', 61: 'Hujan Sedang', 63: 'Hujan Lebat',
    80: 'Hujan Lokal', 95: 'Hujan Petir', 97: 'Hujan Petir'
}

prediksi_cuaca_deskripsi = [kode_cuaca.get(kode, 'Tidak Diketahui') for kode in prediksi_cuaca]
print(f'Prediksi Cuaca untuk 2 Hari ke Depan: {prediksi_cuaca_deskripsi}')

Prediksi Cuaca untuk 2 Hari ke Depan: ['Berawan Tebal', 'Berawan Tebal']
