In [2]:
import numpy as np
import pandas as pd

# Membuat dataset dengan 100 data dan 2 variabel
np.random.seed(42)
data = {
    'Variabel_A': np.random.uniform(10000, 20000, 100),
    'Variabel_B': np.random.uniform(0.0001, 0.001, 100)
}

df = pd.DataFrame(data)
df

Unnamed: 0,Variabel_A,Variabel_B
0,13745.401188,0.000128
1,19507.143064,0.000673
2,17319.939418,0.000383
3,15986.584842,0.000558
4,11560.186404,0.000917
...,...,...
95,14937.955964,0.000414
96,15227.328294,0.000753
97,14275.410184,0.000907
98,10254.191267,0.000898


In [None]:
def calculate_statistics(df, dtype):
    df = df.astype(dtype)
    mean = df.mean()
    variance = df.var()
    correlation = df.corr()
    return mean, variance, correlation

mean32, var32, corr32 = calculate_statistics(df, np.float32)
mean64, var64, corr64 = calculate_statistics(df, np.float64)

In [10]:
print("Hasil Statistik dengan float32:")
print(f"{'Metric':<15}{'Variabel_A':>15}{'Variabel_B':>15}")
print(f"{'Mean':<15}{mean32['Variabel_A']:>15.6f}{mean32['Variabel_B']:>15.6f}")
print(f"{'Varians':<15}{var32['Variabel_A']:>15.6e}{var32['Variabel_B']:>15.6e}")
print("Korelasi:")
print(corr32.to_string(float_format="{:.6f}".format))

Hasil Statistik dengan float32:
Metric              Variabel_A     Variabel_B
Mean              14701.808594       0.000548
Varians           8.849995e+06   6.959051e-08
Korelasi:
            Variabel_A  Variabel_B
Variabel_A    1.000000   -0.034033
Variabel_B   -0.034033    1.000000


In [22]:
print("Hasil Statistik dengan float64:")
print(f"{'Metric':<15}{'Variabel_A':>15}{'Variabel_B':>15}")
print(f"{'Mean':<15}{mean64['Variabel_A']:>15.6f}{mean64['Variabel_B']:>15.6f}")
print(f"{'Varians':<15}{var64['Variabel_A']:>15.6e}{var64['Variabel_B']:>15.6e}")
print("Korelasi:")
print(corr64.to_string(float_format="{:.6f}".format))

Hasil Statistik dengan float64:
Metric              Variabel_A     Variabel_B
Mean              14701.807434       0.000548
Varians           8.849995e+06   6.959051e-08
Korelasi:
            Variabel_A  Variabel_B
Variabel_A    1.000000   -0.034033
Variabel_B   -0.034033    1.000000


In [13]:
print("\nPerbedaan Hasil Mean:")
print(mean64 - mean32
print("\nPerbedaan Hasil Varians:")
print(var64 - var32)
print("\nPerbedaan Hasil Korelasi:")
print(corr64 - corr32)


Perbedaan Hasil Mean:
Variabel_A   -1.159968e-03
Variabel_B    2.385954e-11
dtype: float64

Perbedaan Hasil Varians:
Variabel_A   -3.337585e-02
Variabel_B    1.341926e-15
dtype: float64

Perbedaan Hasil Korelasi:
              Variabel_A    Variabel_B
Variabel_A  0.000000e+00  5.137279e-09
Variabel_B  5.137279e-09  0.000000e+00


## Stabilisasi Numerik

In [14]:
# 3. Implementasi Kahan Summation
def kahan_sum(numbers):
    total = 0.0
    compensation = 0.0  
    for num in numbers:
        y = num - compensation
        t = total + y
        compensation = (t - total) - y
        total = t
    return total  

sum_normal = np.sum(df['Variabel_B'])
sum_kahan = kahan_sum(df['Variabel_B'])


In [16]:
print("\nPerbedaan Hasil Penjumlahan Biasa vs Kahan Summation:")
print(sum_normal)
print(sum_kahan)


Perbedaan Hasil Penjumlahan Biasa vs Kahan Summation:
0.054804855083952074
0.05480485508395206


In [17]:
# 4. Normalisasi Z-Score dan Analisis
z_score_df = (df - df.mean()) / df.std()
mean_z, var_z, corr_z = calculate_statistics(z_score_df, np.float64)

In [18]:
print("\nStatistik setelah Normalisasi Z-Score:")
print("Mean:")
print(mean_z.to_string(float_format="{:.6f}".format))
print("Varians:")
print(var_z.to_string(float_format="{:.6e}".format))
print("Korelasi:")
print(corr_z.to_string(float_format="{:.6f}".format))


Statistik setelah Normalisasi Z-Score:
Mean:
Variabel_A   -0.000000
Variabel_B   -0.000000
Varians:
Variabel_A   1.000000e+00
Variabel_B   1.000000e+00
Korelasi:
            Variabel_A  Variabel_B
Variabel_A    1.000000   -0.034033
Variabel_B   -0.034033    1.000000
