In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('../gym-tracking.csv')
data = df['Weight (kg)'].dropna().round().astype(int)

# tentukan banyak kelas (K) dengan Rumus Sturges
n = len(data)
K = round(1 + 3.322 * np.log10(n))

# tentukan rentang (R)
max_value = int(data.max())
min_value = int(data.min())
R = max_value - min_value

# tentukan lebar kelas (C)
C = int(np.ceil(R / K))


In [2]:
# buat tabel distribusi frekuensi

# tentukan batas kelas (kelas pertama hingga kelas terakhir)
lower_bound = min_value
upper_bound = lower_bound + C

# membuat interval kelas dan batas kelas
intervals = []
for i in range(K):
    intervals.append(f"{lower_bound} - {upper_bound}")
    lower_bound = upper_bound + 1
    upper_bound = lower_bound + C - 1

frequencies, bins = np.histogram(data, bins=K, range=(min_value, max_value))
cumulative_frequencies = np.cumsum(frequencies)
midpoints = [(bin_start + bin_end) / 2 for bin_start, bin_end in zip(bins[:-1], bins[1:])]
central_frequencies = frequencies * midpoints

# buat dataframe untuk tabel distribusi frekuensi
df_distribution = pd.DataFrame({
    'Interval Kelas': intervals,
    'Batas Kelas Bawah': bins[:-1],
    'Batas Kelas Atas': bins[1:],
    'Frekuensi': frequencies,
    'Titik Tengah': midpoints,
    'Frekuensi Terpusat': central_frequencies,
    'Frekuensi Kumulatif': cumulative_frequencies
})

df_distribution

Unnamed: 0,Interval Kelas,Batas Kelas Bawah,Batas Kelas Atas,Frekuensi,Titik Tengah,Frekuensi Terpusat,Frekuensi Kumulatif
0,40 - 49,40.0,48.181818,82,44.090909,3615.454545,82
1,50 - 58,48.181818,56.363636,128,52.272727,6690.909091,210
2,59 - 67,56.363636,64.545455,181,60.454545,10942.272727,391
3,68 - 76,64.545455,72.727273,148,68.636364,10158.181818,539
4,77 - 85,72.727273,80.909091,117,76.818182,8987.727273,656
5,86 - 94,80.909091,89.090909,134,85.0,11390.0,790
6,95 - 103,89.090909,97.272727,42,93.181818,3913.636364,832
7,104 - 112,97.272727,105.454545,35,101.363636,3547.727273,867
8,113 - 121,105.454545,113.636364,39,109.545455,4272.272727,906
9,122 - 130,113.636364,121.818182,28,117.727273,3296.363636,934


In [3]:
# hitung total frekuensi dan frekuensi terpusat
n = df_distribution['Frekuensi'].sum()
fm = df_distribution['Frekuensi Terpusat'].sum()

print(n)
print(fm)

973
71725.0


In [4]:
# menentukan rata-rata hitung
x = fm / n
print(f"Rata-rata hitung: {x}")

Rata-rata hitung: 73.7153134635149


In [5]:
# menentukan median

median = np.median(data)
print(f"Median: {median}")

Median: 70.0


In [6]:
# menentukan modus

modus = data.mode()[0]
print(f"Modus: {modus}")

Modus: 64


In [7]:
# mencari kuartil

Q1 = data.quantile(0.25)
Q2 = data.quantile(0.50)  
Q3 = data.quantile(0.75)

print(f"Kuartil Q1: {Q1}")
print(f"Kuartil Q2 (Median): {Q2}")
print(f"Kuartil Q3: {Q3}")

Kuartil Q1: 58.0
Kuartil Q2 (Median): 70.0
Kuartil Q3: 86.0


In [8]:
# mencari desil

D1 = data.quantile(0.10)  
D2 = data.quantile(0.20)  
D3 = data.quantile(0.30) 

print(f"Desil D1 (10%): {D1}")
print(f"Desil D2 (20%): {D2}")
print(f"Desil D3 (30%): {D3}")

Desil D1 (10%): 50.0
Desil D2 (20%): 56.0
Desil D3 (30%): 60.0


In [9]:
# mencari persentil

P25 = data.quantile(0.25)  
P50 = data.quantile(0.50)  
P75 = data.quantile(0.75)  

print(f"Persentil P25: {P25}")
print(f"Persentil P50 (Median): {P50}")
print(f"Persentil P75: {P75}")

Persentil P25: 58.0
Persentil P50 (Median): 70.0
Persentil P75: 86.0
