In [1]:
import pandas as pd
import numpy as np

from tabulate import tabulate

df = pd.read_csv('../vgsales.csv')
df

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.00
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.00,31.37
...,...,...,...,...,...,...,...,...,...,...,...
16593,16596,Woody Woodpecker in Crazy Castle 5,GBA,2002.0,Platform,Kemco,0.01,0.00,0.00,0.00,0.01
16594,16597,Men in Black II: Alien Escape,GC,2003.0,Shooter,Infogrames,0.01,0.00,0.00,0.00,0.01
16595,16598,SCORE International Baja 1000: The Official Game,PS2,2008.0,Racing,Activision,0.00,0.00,0.00,0.00,0.01
16596,16599,Know How 2,DS,2010.0,Puzzle,7G//AMES,0.00,0.01,0.00,0.00,0.01


In [2]:
data = df['JP_Sales']

# tentukan jumlah kelas dengan rumus Sturges
num_classes = int(1 + 3.322 * np.log10(len(data)))

# buat tabel distribusi frekuensi langsung
frequency_table = pd.cut(
    data, 
    bins=num_classes, 
    right=False, 
    precision=2
).value_counts().sort_index().reset_index()

frequency_table.columns = ['Interval', 'Frekuensi']

print(tabulate(frequency_table, headers='keys', tablefmt='pretty', showindex=False))

# konversi kolom Interval ke string
frequency_table['Interval'] = frequency_table['Interval'].astype(str)

+---------------+-----------+
|   Interval    | Frekuensi |
+---------------+-----------+
|  [0.0, 0.68)  |   16202   |
| [0.68, 1.36)  |    251    |
| [1.36, 2.04)  |    65     |
| [2.04, 2.73)  |    32     |
| [2.73, 3.41)  |    16     |
| [3.41, 4.09)  |    12     |
| [4.09, 4.77)  |    10     |
| [4.77, 5.45)  |     4     |
| [5.45, 6.13)  |     2     |
| [6.13, 6.81)  |     2     |
| [6.81, 7.49)  |     1     |
| [7.49, 8.18)  |     0     |
| [8.18, 8.86)  |     0     |
| [8.86, 9.54)  |     0     |
| [9.54, 10.23) |     1     |
+---------------+-----------+


In [3]:
max = df['JP_Sales'].max()
min = df['JP_Sales'].min()

range = max - min

print(f"min: {min}")
print(f"max: {max}")
print(f"Range JP_Sales: {range}")

min: 0.0
max: 10.22
Range JP_Sales: 10.22


In [4]:
def titik_tengah(interval):
    interval = interval.replace('[', '').replace(']', '').replace('(', '').replace(')', '')
    tepi_bawah, tepi_atas = map(float, interval.split(', '))
    return (tepi_bawah + tepi_atas) / 2

total_frekuensi = frequency_table['Frekuensi'].sum()

frequency_table['Titik Tengah'] = frequency_table['Interval'].apply(titik_tengah)
frequency_table['Titik Tengah'] = frequency_table['Titik Tengah'].astype(float)
frequency_table['Frekuensi'] = frequency_table['Frekuensi'].astype(int)



In [5]:
# rata-rata tertimbang

rata_rata_tertimbang = (frequency_table['Titik Tengah'] * frequency_table['Frekuensi']).sum() / total_frekuensi

print(f"Rata-rata tertimbang (x̄): {rata_rata_tertimbang}")


Rata-rata tertimbang (x̄): 0.37064314977708157


In [6]:
# simpangan rata-rata

simpangan_rata_rata = ((abs(frequency_table['Titik Tengah'] - rata_rata_tertimbang) * frequency_table['Frekuensi']).sum()) / total_frekuensi

print(f"Simpangan rata-rata (SR): {simpangan_rata_rata}")

Simpangan rata-rata (SR): 0.05982411286760761


In [7]:
# variance

varians = ((frequency_table['Frekuensi'] * (frequency_table['Titik Tengah'] - rata_rata_tertimbang)**2).sum()) / total_frekuensi

print(f"Varians (σ²): {varians}")


Varians (σ²): 0.06890250387854739


In [8]:
# simpangan baku

simpangan_baku = np.sqrt(varians)

print(f"Simpangan Baku (σ): {simpangan_baku}")

Simpangan Baku (σ): 0.262492864433583


In [9]:
def hitung_persentil(kelas, frekuensi, persentil_ke):
    # Tentukan posisi persentil
    posisi_persentil = persentil_ke * (total_frekuensi + 1) / 100
    
    frekuensi_kumulatif = 0
    for i, f in enumerate(frekuensi):
        frekuensi_kumulatif += f
        
        if frekuensi_kumulatif >= posisi_persentil:  # Temukan kelas persentil
            # Bersihkan interval dan konversi tepi bawah dan atas ke float
            interval = kelas[i].replace('[', '').replace(']', '').replace('(', '').replace(')', '')
            tepi_bawah, tepi_atas = map(float, interval.split(', '))
            panjang_interval = tepi_atas - tepi_bawah  # Panjang interval kelas
            f_p = f  # Frekuensi kelas persentil
            sigma_f = frekuensi_kumulatif - f  # Frekuensi kumulatif sebelum kelas persentil
            
            # Hitung persentil menggunakan rumus
            persentil = tepi_bawah + ((posisi_persentil - sigma_f) / f_p) * panjang_interval
            return persentil

# Tentukan kelas interval
kelas = frequency_table['Interval'].values.tolist()

In [10]:

# jangkauan kuartil

# hitung kuartil ke-1 (Q1) dan kuartil ke-3 (Q3)
Q1 = hitung_persentil(kelas, frequency_table['Frekuensi'].values.tolist(), 25)
Q3 = hitung_persentil(kelas, frequency_table['Frekuensi'].values.tolist(), 75)

# hitung jangkauan kuartil (IQR)
IQR = Q3 - Q1

print(f"Kuartil ke-1 (Q1): {Q1}")
print(f"Kuartil ke-3 (Q3): {Q3}")
print(f"Jangkauan Kuartil (IQR): {IQR}")


Kuartil ke-1 (Q1): 0.1741655351191211
Kuartil ke-3 (Q3): 0.5224966053573633
Jangkauan Kuartil (IQR): 0.34833107023824217


In [11]:
# jangkauan persentil

# hitung persentil yang diinginkan, misalnya P10 dan P90
P10 = hitung_persentil(kelas, frequency_table['Frekuensi'].values.tolist(), 10)
P90 = hitung_persentil(kelas, frequency_table['Frekuensi'].values.tolist(), 90)

# hitung jangkauan persentil
jangkauan_persentil = P90 - P10

print(f"Persentil ke-10 (P10): {P10}")
print(f"Persentil ke-90 (P90): {P90}")
print(f"Jangkauan Persentil (P90 - P10): {jangkauan_persentil}")


Persentil ke-10 (P10): 0.06966621404764844
Persentil ke-90 (P90): 0.6269959264288361
Jangkauan Persentil (P90 - P10): 0.5573297123811876
