In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('../gym-tracking.csv')
data = df['Weight (kg)'].dropna().round().astype(int).head(50)

# tentukan banyak kelas (K) dengan Rumus Sturges
n = len(data)
K = round(1 + 3.322 * np.log10(n))

# tentukan rentang (R)
max_value = int(data.max())
min_value = int(data.min())
R = max_value - min_value

# tentukan lebar kelas (C)
C = int(np.ceil(R / K))

print(f"n: {n}")
print(f"K: {K}")
print(f"Max: {max_value}")
print(f"Min: {min_value}")
print(f"Rentang: {R}")
print(f"Lebar kelas: {C}")


n: 50
K: 7
Max: 128
Min: 42
Rentang: 86
Lebar kelas: 13


In [2]:
# buat tabel distribusi frekuensi

# tentukan batas kelas (kelas pertama hingga kelas terakhir)
lower_bound = min_value
upper_bound = lower_bound + C

# membuat interval kelas dan batas kelas
intervals = []
for i in range(K):
    intervals.append(f"{lower_bound} - {upper_bound}")
    lower_bound = upper_bound + 1
    upper_bound = lower_bound + C - 1

frequencies, bins = np.histogram(data, bins=K, range=(min_value, max_value))
cumulative_frequencies = np.cumsum(frequencies)
midpoints = [(bin_start + bin_end) / 2 for bin_start, bin_end in zip(bins[:-1], bins[1:])]
central_frequencies = frequencies * midpoints

# buat dataframe untuk tabel distribusi frekuensi
df_distribution = pd.DataFrame({
    'Interval Kelas': intervals,
    'Batas Kelas Bawah': bins[:-1],
    'Batas Kelas Atas': bins[1:],
    'Frekuensi': frequencies,
    'Titik Tengah': midpoints,
    'Frekuensi Terpusat': central_frequencies,
    'Frekuensi Kumulatif': cumulative_frequencies
})

df_distribution

Unnamed: 0,Interval Kelas,Batas Kelas Bawah,Batas Kelas Atas,Frekuensi,Titik Tengah,Frekuensi Terpusat,Frekuensi Kumulatif
0,42 - 55,42.0,54.285714,11,48.142857,529.571429,11
1,56 - 68,54.285714,66.571429,12,60.428571,725.142857,23
2,69 - 81,66.571429,78.857143,9,72.714286,654.428571,32
3,82 - 94,78.857143,91.142857,6,85.0,510.0,38
4,95 - 107,91.142857,103.428571,3,97.285714,291.857143,41
5,108 - 120,103.428571,115.714286,3,109.571429,328.714286,44
6,121 - 133,115.714286,128.0,6,121.857143,731.142857,50


In [3]:
# hitung total frekuensi dan frekuensi terpusat
n = df_distribution['Frekuensi'].sum()
fm = df_distribution['Frekuensi Terpusat'].sum()

print(f"n: {n}")
print(f"Frekuensi terpusat: {fm}")

n: 50
Frekuensi terpusat: 3770.857142857143


In [4]:
# menentukan rata-rata hitung
x = fm / n
print(f"Rata-rata hitung: {x}")

Rata-rata hitung: 75.41714285714286


In [5]:
# menentukan median

median = np.median(data)
print(f"Median: {median}")

Median: 69.0


In [6]:
# menentukan modus

modus = data.mode()[0]
print(f"Modus: {modus}")

Modus: 64


In [7]:
# mencari kuartil

Q1 = data.quantile(0.25)
Q2 = data.quantile(0.50)  
Q3 = data.quantile(0.75)

print(f"Kuartil Q1: {Q1}")
print(f"Kuartil Q2 (Median): {Q2}")
print(f"Kuartil Q3: {Q3}")

Kuartil Q1: 58.25
Kuartil Q2 (Median): 69.0
Kuartil Q3: 88.75


In [8]:
# mencari desil

D1 = data.quantile(0.10)  
D2 = data.quantile(0.20)  
D3 = data.quantile(0.30) 

print(f"Desil D1 (10%): {D1}")
print(f"Desil D2 (20%): {D2}")
print(f"Desil D3 (30%): {D3}")

Desil D1 (10%): 48.7
Desil D2 (20%): 53.8
Desil D3 (30%): 59.7


In [9]:
# mencari persentil

P25 = data.quantile(0.25)  
P50 = data.quantile(0.50)  
P75 = data.quantile(0.75)  

print(f"Persentil P25: {P25}")
print(f"Persentil P50 (Median): {P50}")
print(f"Persentil P75: {P75}")

Persentil P25: 58.25
Persentil P50 (Median): 69.0
Persentil P75: 88.75
