In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

# Modelling
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import f1_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb

# Settings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
sns.set_style('whitegrid')

SEED = 42

In [7]:
# Load Dataset ISPU
from pathlib import Path

files = sorted(Path("ISPU").glob("*.csv"))
dataframes = [pd.read_csv(file) for file in files]


In [8]:
# Gabungkan semua file ISPU
df_ispu = pd.concat(dataframes, ignore_index=True)
print(df_ispu.shape)

(16902, 23)


In [9]:
# Tampilkan informasi dasar tentang dataset
df_ispu.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16902 entries, 0 to 16901
Data columns (total 23 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   periode_data               16902 non-null  int64  
 1   tanggal                    16902 non-null  object 
 2   stasiun                    14000 non-null  object 
 3   pm_sepuluh                 4684 non-null   object 
 4   pm_duakomalima             5182 non-null   object 
 5   sulfur_dioksida            4817 non-null   object 
 6   karbon_monoksida           4826 non-null   object 
 7   ozon                       4827 non-null   object 
 8   nitrogen_dioksida          4804 non-null   object 
 9   max                        16895 non-null  object 
 10  parameter_pencemar_kritis  4795 non-null   object 
 11  kategori                   4870 non-null   object 
 12  bulan                      3045 non-null   float64
 13  pm10                       11667 non-null  obj

In [None]:
# Tampilkan 10 baris pertama dari dataset
df_ispu.head(10)

Unnamed: 0,periode_data,tanggal,stasiun,pm_sepuluh,pm_duakomalima,sulfur_dioksida,karbon_monoksida,ozon,nitrogen_dioksida,max,parameter_pencemar_kritis,kategori,bulan,pm10,so2,co,o3,no2,critical,categori,lokasi_spku,pm25,pm_10
0,202302,2023-02-25,DKI5 Kebon Jeruk Jakarta Barat,35,-,13,12,31,18,35,PM10,BAIK,,,,,,,,,,,
1,202302,2023-02-26,DKI5 Kebon Jeruk Jakarta Barat,23,-,14,9,32,11,32,O3,BAIK,,,,,,,,,,,
2,202302,2023-02-27,DKI5 Kebon Jeruk Jakarta Barat,20,-,13,8,33,13,33,O3,BAIK,,,,,,,,,,,
3,202302,2023-02-28,DKI5 Kebon Jeruk Jakarta Barat,30,-,21,11,28,18,30,PM10,BAIK,,,,,,,,,,,
4,202303,2023-03-01,DKI1 Bunderan HI,38,44,50,8,19,27,50,3,BAIK,,,,,,,,,,,
5,202303,2023-03-02,DKI1 Bunderan HI,29,33,47,11,21,27,47,3,BAIK,,,,,,,,,,,
6,202303,2023-03-03,DKI1 Bunderan HI,38,46,49,9,16,25,49,3,BAIK,,,,,,,,,,,
7,202303,2023-03-04,DKI1 Bunderan HI,31,40,50,---,18,21,50,3,BAIK,,,,,,,,,,,
8,202303,2023-03-05,DKI1 Bunderan HI,33,41,47,11,21,22,47,3,BAIK,,,,,,,,,,,
9,202303,2023-03-06,DKI1 Bunderan HI,31,44,46,9,20,19,46,3,BAIK,,,,,,,,,,,
