In [1]:
import pandas as pd
import numpy as np

from tabulate import tabulate

df = pd.read_csv('../vgsales.csv')
df

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.00
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.00,31.37
...,...,...,...,...,...,...,...,...,...,...,...
16593,16596,Woody Woodpecker in Crazy Castle 5,GBA,2002.0,Platform,Kemco,0.01,0.00,0.00,0.00,0.01
16594,16597,Men in Black II: Alien Escape,GC,2003.0,Shooter,Infogrames,0.01,0.00,0.00,0.00,0.01
16595,16598,SCORE International Baja 1000: The Official Game,PS2,2008.0,Racing,Activision,0.00,0.00,0.00,0.00,0.01
16596,16599,Know How 2,DS,2010.0,Puzzle,7G//AMES,0.00,0.01,0.00,0.00,0.01


In [2]:
data = df['NA_Sales']

# Tentukan jumlah kelas dengan rumus Sturges
num_classes = int(1 + 3.322 * np.log10(len(data)))

# Buat tabel distribusi frekuensi langsung
frequency_table = pd.cut(
    data, 
    bins=num_classes, 
    right=False, 
    precision=2
).value_counts().sort_index().reset_index()

# Beri nama kolom
frequency_table.columns = ['Interval', 'Frekuensi']

print(tabulate(frequency_table, headers='keys', tablefmt='pretty', showindex=False))

# Konversi kolom Interval ke string
frequency_table['Interval'] = frequency_table['Interval'].astype(str)

+----------------+-----------+
|    Interval    | Frekuensi |
+----------------+-----------+
|  [0.0, 2.77)   |   16427   |
|  [2.77, 5.53)  |    117    |
|  [5.53, 8.3)   |    29     |
|  [8.3, 11.06)  |    13     |
| [11.06, 13.83) |     3     |
| [13.83, 16.6)  |     5     |
| [16.6, 19.36)  |     0     |
| [19.36, 22.13) |     0     |
| [22.13, 24.89) |     1     |
| [24.89, 27.66) |     1     |
| [27.66, 30.43) |     1     |
| [30.43, 33.19) |     0     |
| [33.19, 35.96) |     0     |
| [35.96, 38.72) |     0     |
| [38.72, 41.53) |     1     |
+----------------+-----------+


In [3]:
# persiapan untuk mencari bbk, bak, titik tengah, dan frekuensi tertimbang (f_m)

# pisahkan bbk dan bak
frequency_table[['Batas Bawah', 'Batas Atas']] = frequency_table['Interval'].str.extract(r'\[([\d.]+),\s*([\d.]+)\)').astype(float)

frequency_table['Titik Tengah'] = (frequency_table['Batas Bawah'] + frequency_table['Batas Atas']) / 2

# hitung f * m
frequency_table['f_m'] = frequency_table['Frekuensi'] * frequency_table['Titik Tengah']

frequency_table

Unnamed: 0,Interval,Frekuensi,Batas Bawah,Batas Atas,Titik Tengah,f_m
0,"[0.0, 2.77)",16427,0.0,2.77,1.385,22751.395
1,"[2.77, 5.53)",117,2.77,5.53,4.15,485.55
2,"[5.53, 8.3)",29,5.53,8.3,6.915,200.535
3,"[8.3, 11.06)",13,8.3,11.06,9.68,125.84
4,"[11.06, 13.83)",3,11.06,13.83,12.445,37.335
5,"[13.83, 16.6)",5,13.83,16.6,15.215,76.075
6,"[16.6, 19.36)",0,16.6,19.36,17.98,0.0
7,"[19.36, 22.13)",0,19.36,22.13,20.745,0.0
8,"[22.13, 24.89)",1,22.13,24.89,23.51,23.51
9,"[24.89, 27.66)",1,24.89,27.66,26.275,26.275


In [4]:
# rata-rata hitung

total_fm = frequency_table['f_m'].sum()
total_f = frequency_table['Frekuensi'].sum()
mean = total_fm / total_f

print(f"Rata-rata NA_Sales (berkelompok): {mean:.2f}")

Rata-rata NA_Sales (berkelompok): 1.43


In [5]:
# persiapan untuk mencari median dari data berkelompok

frequency_table['Frekuensi Kumulatif'] = frequency_table['Frekuensi'].cumsum()
n = frequency_table['Frekuensi'].sum()
median_class_idx = frequency_table[frequency_table['Frekuensi Kumulatif'] >= n / 2].index[0]

frequency_table

Unnamed: 0,Interval,Frekuensi,Batas Bawah,Batas Atas,Titik Tengah,f_m,Frekuensi Kumulatif
0,"[0.0, 2.77)",16427,0.0,2.77,1.385,22751.395,16427
1,"[2.77, 5.53)",117,2.77,5.53,4.15,485.55,16544
2,"[5.53, 8.3)",29,5.53,8.3,6.915,200.535,16573
3,"[8.3, 11.06)",13,8.3,11.06,9.68,125.84,16586
4,"[11.06, 13.83)",3,11.06,13.83,12.445,37.335,16589
5,"[13.83, 16.6)",5,13.83,16.6,15.215,76.075,16594
6,"[16.6, 19.36)",0,16.6,19.36,17.98,0.0,16594
7,"[19.36, 22.13)",0,19.36,22.13,20.745,0.0,16594
8,"[22.13, 24.89)",1,22.13,24.89,23.51,23.51,16595
9,"[24.89, 27.66)",1,24.89,27.66,26.275,26.275,16596


In [6]:
# mencari median dari data berkelompok

# hitung parameter
l_m = frequency_table.at[median_class_idx, 'Batas Bawah'] # tepi bawah kelas median
f_m = frequency_table.at[median_class_idx, 'Frekuensi'] # frekuensi kelas median
c = frequency_table.at[median_class_idx, 'Batas Atas'] - l_m # panjang kelas interval
F = 0 if median_class_idx == 0 else frequency_table.at[median_class_idx - 1, 'Frekuensi_Kumulatif'] # frekuensi kumulatif sebelum kelas median

# hitung median 
median = l_m + ((n / 2 - F) / f_m) * c

print(f"Median NA_Sales (berkelompok): {median:.2f}")

Median NA_Sales (berkelompok): 1.40


In [7]:
# menentukan kelas modus dari data kelompok
modus_class = frequency_table.loc[frequency_table['Frekuensi'].idxmax()]

# parameter kelas modus
l_mo = modus_class['Batas Bawah']  # tepi bawah kelas modus
c = modus_class['Batas Atas'] - modus_class['Batas Bawah']  # panjang interval kelas
f_modus = modus_class['Frekuensi']  # frekuensi kelas modus

# menghitung d1 dan d2
d1 = f_modus - (frequency_table.loc[modus_class.name - 1, 'Frekuensi'] if modus_class.name > 0 else 0)
d2 = f_modus - (frequency_table.loc[modus_class.name + 1, 'Frekuensi'] if modus_class.name < len(frequency_table) - 1 else 0)

# Menghitung modus
modus = l_mo + (d1 / (d1 + d2)) * c

print(f"Modus data kelompok: {modus:.2f}")


Modus data kelompok: 1.39


In [8]:
# bersihkan format interval
frequency_table['Interval'] = frequency_table['Interval'].astype(str).str.replace(r'[\[\]()]', '', regex=True).str.replace(', ', ' - ')

# Pisahkan kelas dan frekuensi
kelas = frequency_table['Interval'].values.tolist()
frekuensi = frequency_table['Frekuensi'].values.tolist()

In [9]:
# hitung kuartil
def hitung_kuartil(kelas, frekuensi, kuartil_ke):
    total_frekuensi = sum(frekuensi)
    posisi_kuartil = kuartil_ke * total_frekuensi / 4

    frekuensi_kumulatif = 0
    for indeks, frekuensi_kelas in enumerate(frekuensi):
        frekuensi_kumulatif += frekuensi_kelas
        if frekuensi_kumulatif >= posisi_kuartil:
            tepi_bawah = float(kelas[indeks].split(' - ')[0])
            panjang_interval = float(kelas[indeks].split(' - ')[1]) - tepi_bawah
            frekuensi_sebelum = frekuensi_kumulatif - frekuensi_kelas

            kuartil = tepi_bawah + ((posisi_kuartil - frekuensi_sebelum) / frekuensi_kelas) * panjang_interval
            return kuartil

# hitung kuartil
q1 = hitung_kuartil(kelas, frekuensi, 1)  # Kuartil ke-1
q2 = hitung_kuartil(kelas, frekuensi, 2)  # Kuartil ke-2 (median)
q3 = hitung_kuartil(kelas, frekuensi, 3)  # Kuartil ke-3

print(f"Kuartil ke-1 (Q1): {q1:.2f}")
print(f"Kuartil ke-2 (Q2): {q2:.2f}")
print(f"Kuartil ke-3 (Q3): {q3:.2f}")


Kuartil ke-1 (Q1): 0.70
Kuartil ke-2 (Q2): 1.40
Kuartil ke-3 (Q3): 2.10


In [10]:
# hitung desil
def hitung_desil(kelas, frekuensi, desil_ke):
    total_frekuensi = sum(frekuensi)
    posisi_desil = desil_ke * total_frekuensi / 10

    frekuensi_kumulatif = 0
    for indeks, frekuensi_kelas in enumerate(frekuensi):
        frekuensi_kumulatif += frekuensi_kelas
        if frekuensi_kumulatif >= posisi_desil:
            tepi_bawah, tepi_atas = map(float, kelas[indeks].split(' - '))
            panjang_interval = tepi_atas - tepi_bawah
            frekuensi_sebelum = frekuensi_kumulatif - frekuensi_kelas

            return tepi_bawah + ((posisi_desil - frekuensi_sebelum) / frekuensi_kelas) * panjang_interval

# hitung desil D1 hingga D3
desil_values = {f"{i}": hitung_desil(kelas, frekuensi, i) for i in range(1, 4)}

for desil, value in desil_values.items():
    print(f"Desil ke-{desil} (D{desil}): {value:.2f}")


Desil ke-1 (D1): 0.28
Desil ke-2 (D2): 0.56
Desil ke-3 (D3): 0.84


In [11]:
# hitung persentil
def hitung_persentil(kelas, frekuensi, persentil_ke):
    total_frekuensi = sum(frekuensi)
    posisi_persentil = persentil_ke * (total_frekuensi + 1) / 100
    frekuensi_kumulatif = 0

    for i, f in enumerate(frekuensi):
        frekuensi_kumulatif += f
        if frekuensi_kumulatif >= posisi_persentil:
            tepi_bawah, tepi_atas = map(float, kelas[i].split(' - '))
            panjang_interval = tepi_atas - tepi_bawah
            sigma_f = frekuensi_kumulatif - f
            return tepi_bawah + ((posisi_persentil - sigma_f) / f) * panjang_interval

# ihtung persentil ke-25, ke-50, dan ke-75
kelas = frequency_table['Interval'].values.tolist()
frekuensi = frequency_table['Frekuensi'].values.tolist()
persentil = {f"{p}": hitung_persentil(kelas, frekuensi, p) for p in (25, 50, 75)}

for key, value in persentil.items():
    print(f"Persentil ke-{key} (P{key}): {value:.2f}")


Persentil ke-25 (P25): 0.70
Persentil ke-50 (P50): 1.40
Persentil ke-75 (P75): 2.10
