In [43]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tabulate import tabulate

In [44]:
df = pd.read_csv('vgsales.csv');

In [None]:
# mengelompokan data
data = df['NA_Sales']  # Ganti df dengan DataFrame Anda

min_value = data.min()
max_value = data.max()
range_data = max_value - min_value

# Hitung jumlah kelas dengan rumus Sturges
n = len(data)  # Jumlah data
num_classes = int(1 + 3.322 * np.log10(n))

#  Hitung panjang interval
class_width = range_data / num_classes

# Tentukan batas interval
bins = np.arange(min_value, max_value + class_width, class_width)

# Buat tabel distribusi frekuensi
labels = [f"{round(bins[i], 2)} - {round(bins[i+1], 2)}" for i in range(len(bins)-1)]
frequency_table = pd.cut(data, bins=bins, labels=labels, right=False).value_counts().sort_index().reset_index()

# Atur tabel menjadi lebih rapi
frequency_table.columns = ['Interval', 'Frekuensi']

         Interval  Frekuensi
0      0.0 - 2.77      16427
1     2.77 - 5.53        117
2      5.53 - 8.3         29
3     8.3 - 11.06         13
4   11.06 - 13.83          3
5    13.83 - 16.6          5
6    16.6 - 19.36          0
7   19.36 - 22.13          0
8   22.13 - 24.89          1
9   24.89 - 27.66          1
10  27.66 - 30.43          1
11  30.43 - 33.19          0
12  33.19 - 35.96          0
13  35.96 - 38.72          0
14  38.72 - 41.49          0


In [46]:
# Data tabel frekuensi
data = frequency_table.values.tolist()  # Konversi DataFrame ke list of lists
headers = ["Interval", "Frekuensi"]  # Header untuk tabel

# Membuat tabel menggunakan tabulate
table = tabulate(data, headers=headers, tablefmt="grid")

# Menampilkan tabel
print(table)

+---------------+-------------+
| Interval      |   Frekuensi |
| 0.0 - 2.77    |       16427 |
+---------------+-------------+
| 2.77 - 5.53   |         117 |
+---------------+-------------+
| 5.53 - 8.3    |          29 |
+---------------+-------------+
| 8.3 - 11.06   |          13 |
+---------------+-------------+
| 11.06 - 13.83 |           3 |
+---------------+-------------+
| 13.83 - 16.6  |           5 |
+---------------+-------------+
| 16.6 - 19.36  |           0 |
+---------------+-------------+
| 19.36 - 22.13 |           0 |
+---------------+-------------+
| 22.13 - 24.89 |           1 |
+---------------+-------------+
| 24.89 - 27.66 |           1 |
+---------------+-------------+
| 27.66 - 30.43 |           1 |
+---------------+-------------+
| 30.43 - 33.19 |           0 |
+---------------+-------------+
| 33.19 - 35.96 |           0 |
+---------------+-------------+
| 35.96 - 38.72 |           0 |
+---------------+-------------+
| 38.72 - 41.49 |           0 |
+-------

In [47]:
intervals = frequency_table['Interval'].to_list()
frequencies = frequency_table['Frekuensi'].to_list()

In [48]:
# mencari hitung rata-rata
midpoints = []

# hitung titik tengah
for interval in intervals:
    lower, upper = map(float, interval.split(' - '))
    midpoints.append((lower + upper) / 2)

# hitung f_i * m_i
fx = [f * m for f, m in zip(frequencies, midpoints)]

# hitung total f_i * m_i dan total f_i
total_fx = sum(fx)
total_f = sum(frequencies)

# hitung rata-rata
mean = total_fx / total_f

print(f"Titik tengah (m_i): {midpoints}")
print(f"f_i * m_i: {fx}")
print(f"Rata-rata hitung: {mean:.2f}")


Titik tengah (m_i): [1.385, 4.15, 6.915000000000001, 9.68, 12.445, 15.215, 17.98, 20.744999999999997, 23.509999999999998, 26.275, 29.045, 31.81, 34.575, 37.34, 40.105000000000004]
f_i * m_i: [22751.395, 485.55000000000007, 200.53500000000003, 125.84, 37.335, 76.075, 0.0, 0.0, 23.509999999999998, 26.275, 29.045, 0.0, 0.0, 0.0, 0.0]
Rata-rata hitung: 1.43


In [49]:
# mencari median

# hitung frekuensi
total_frequency = sum(frequencies)

# menentukan kelas median
N_2 = total_frequency / 2
cumulative_frequency = 0
median_class_index = -1

for i, f in enumerate(frequencies):
    cumulative_frequency += f
    if cumulative_frequency >= N_2:
        median_class_index = i
        break

if median_class_index == -1:
    raise ValueError("Tidak ditemukan kelas median. Periksa data input.")

# hitung komponen rumus median
lower_bound, upper_bound = map(float, intervals[median_class_index].split(' - '))
l_m = lower_bound - 0.5
f_m = frequencies[median_class_index]
cumulative_frequency_above = cumulative_frequency - f_m
c = upper_bound - lower_bound

median = l_m + ((N_2 - cumulative_frequency_above) / f_m) * c

print(f"Jumlah frekuensi (N): {total_frequency}")
print(f"Kelas median: {intervals[median_class_index]}")
print(f"Tepi bawah kelas median (L_m): {l_m}")
print(f"Frekuensi kelas median (f_m): {f_m}")
print(f"Frekuensi kumulatif sebelum kelas median (Σf): {cumulative_frequency_above}")
print(f"Panjang interval kelas (c): {c}")

print(f"Median: {median:.2f}")

Jumlah frekuensi (N): 16597
Kelas median: 0.0 - 2.77
Tepi bawah kelas median (L_m): -0.5
Frekuensi kelas median (f_m): 16427
Frekuensi kumulatif sebelum kelas median (Σf): 0
Panjang interval kelas (c): 2.77
Median: 0.90


In [50]:
# hitung modus
max_frequency = max(frequencies)
modus_class_index = frequencies.index(max_frequency)

# komponen rumus modus
lower_bound, upper_bound = map(float, intervals[modus_class_index].split(' - '))
l_mo = lower_bound  - 0.5
c = upper_bound - lower_bound
d1 = max_frequency - (frequencies[modus_class_index - 1] if modus_class_index > 0 else 0)
d2 = max_frequency - (frequencies[modus_class_index + 1] if modus_class_index < len(frequencies) -1 else 0)

# hitung modus
modus = l_mo + (d1 / (d1 + d2)) * c

print(f"Kelas modus: {intervals[modus_class_index]}")
print(f"Tepi bawah kelas modus (L_mo): {l_mo}")
print(f"d1: {d1}")
print(f"d2: {d2}")
print(f"Panjang interval kelas (c): {c}")
print(f"Modus: {modus:.2f}")

Kelas modus: 0.0 - 2.77
Tepi bawah kelas modus (L_mo): -0.5
d1: 16427
d2: 16310
Panjang interval kelas (c): 2.77
Modus: 0.89


In [51]:
# mengitung kuartil

total_frequencies = sum(frequencies)

def calcualte_position(i, total_frequencies):
    return (i * total_frequencies) / 4

def calculate_quartil(i, frequencies, intervals, total_frequencies):
    position = calcualte_position(i, total_frequencies)
    cumulative_freq = 0

    for idx, freq in enumerate(frequencies):
        cumulative_freq += freq

        if cumulative_freq >= position:
            lower_limit, upper_limit = intervals[idx]
            freq_before = cumulative_freq - freq
            class_width = upper_limit - lower_limit
            l_q = lower_limit
            f_q = freq

            q = l_q + ((position - freq_before) / f_q) * class_width
            return q

q1 = calculate_quartil(1, frequencies, intervals, total_frequencies)
q2 = calculate_quartil(2, frequencies, intervals, total_frequencies)
q3 = calculate_quartil(3, frequencies, intervals, total_frequencies)

print(f"Kuartil pertama (Q1): {q1:.2f}")
print(f"Kuartil kedua (Q2 - Median): {q2:.2f}")
print(f"Kuartil ketiga (Q3): {q3:.2f}")

ValueError: too many values to unpack (expected 2)