# Kode Fungsi

In [None]:
import pandas as pd

def data_imputer(strategy='mean'):
    def impute(data):
        if isinstance(data, pd.DataFrame):
            for column in data.columns:
                if pd.api.types.is_numeric_dtype(data[column]):
                    if strategy == 'mean':
                        data[column] = data[column].fillna(data[column].mean())
                    elif strategy == 'median':
                        data[column] = data[column].fillna(data[column].median())
                    elif strategy == 'mode':
                        data[column] = data[column].fillna(data[column].mode()[0])
                    else:
                        raise ValueError("Unsupported imputation strategy")
        elif isinstance(data, pd.Series):
            if pd.api.types.is_numeric_dtype(data):
                if strategy == 'mean':
                    data = data.fillna(data.mean())
                elif strategy == 'median':
                    data = data.fillna(data.median())
                elif strategy == 'mode':
                    data = data.fillna(data.mode()[0])
                else:
                    raise ValueError("Unsupported imputation strategy")
        else:
            raise ValueError("Unsupported data type. Expecting Pandas DataFrame or Series.")
        return data

    return impute

# Penerapan pada dataset

In [None]:
import pandas as pd
data = pd.read_excel("/content/pbf.xlsx")

In [None]:
data

Unnamed: 0,Bulan,Jumlah Curah Hujan,Rata-rata Suhu Udara,Rata-rata Kelembaban Udara
0,Januari,Tinggi,34.2,100.0
1,Februari,Menengah,34.4,100.0
2,Maret,Menengah,34.6,100.0
3,April,Menengah,,98.0
4,Mei,Menengah,35.0,100.0
5,Juni,Menengah,34.2,
6,Juli,Rendah,33.4,98.0
7,Agustus,Menengah,34.2,98.0
8,September,Rendah,34.0,96.0
9,Oktober,Menengah,34.4,98.0


In [None]:
data.isnull().sum()

Bulan                         0
Jumlah Curah Hujan            0
Rata-rata Suhu Udara          1
Rata-rata Kelembaban Udara    1
dtype: int64

In [None]:
impute_mean = data_imputer()
data_imputed = impute_mean(data)

In [None]:
data_imputed

Unnamed: 0,Bulan,Jumlah Curah Hujan,Rata-rata Suhu Udara,Rata-rata Kelembaban Udara
0,Januari,Tinggi,34.2,100.0
1,Februari,Menengah,34.4,100.0
2,Maret,Menengah,34.6,100.0
3,April,Menengah,34.272727,98.0
4,Mei,Menengah,35.0,100.0
5,Juni,Menengah,34.2,98.818182
6,Juli,Rendah,33.4,98.0
7,Agustus,Menengah,34.2,98.0
8,September,Rendah,34.0,96.0
9,Oktober,Menengah,34.4,98.0


In [None]:
impute_median = data_imputer('median')
impute_median(data)

Unnamed: 0,Bulan,Jumlah Curah Hujan,Rata-rata Suhu Udara,Rata-rata Kelembaban Udara
0,Januari,Tinggi,34.2,100.0
1,Februari,Menengah,34.4,100.0
2,Maret,Menengah,34.6,100.0
3,April,Menengah,34.2,98.0
4,Mei,Menengah,35.0,100.0
5,Juni,Menengah,34.2,99.0
6,Juli,Rendah,33.4,98.0
7,Agustus,Menengah,34.2,98.0
8,September,Rendah,34.0,96.0
9,Oktober,Menengah,34.4,98.0


In [None]:
impute_mode = data_imputer('mode')
impute_mode(data)

Unnamed: 0,Bulan,Jumlah Curah Hujan,Rata-rata Suhu Udara,Rata-rata Kelembaban Udara
0,Januari,Tinggi,34.2,100.0
1,Februari,Menengah,34.4,100.0
2,Maret,Menengah,34.6,100.0
3,April,Menengah,34.2,98.0
4,Mei,Menengah,35.0,100.0
5,Juni,Menengah,34.2,100.0
6,Juli,Rendah,33.4,98.0
7,Agustus,Menengah,34.2,98.0
8,September,Rendah,34.0,96.0
9,Oktober,Menengah,34.4,98.0
