In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import powerlaw, geom, zscore, rankdata
from sklearn.preprocessing import quantile_transform

In [None]:

# Function to generate random data
def generate_data(size=10000):
    B = np.random.normal(5, 2, size)
    I = powerlaw.rvs(0.3, size=size)
    H = geom.rvs(0.005, size=size)
    return B, I, H

In [None]:
# Function to plot boxplots with custom colors
def plot_boxplot(data, labels, title):
    plt.figure(figsize=(10, 5))
    colors = ['#FFD700', '#90EE90', '#D8BFD8']  
    sns.boxplot(data=data, palette=colors, linewidth=2, width=0.6)
    plt.xticks(range(len(labels)), labels, fontsize=12, fontweight='bold')
    plt.title(title, fontsize=14, fontweight='bold', color='black')
    plt.grid(True, linestyle='-.', alpha=0.8)
    

In [None]:

# Function to plot histograms
def plot_histogram(original, transformed, title):
    plt.figure(figsize=(10, 5))
    sns.histplot(original, bins=50, color='purple', kde=True, label='Original', alpha=0.5)
    sns.histplot(transformed, bins=50, color='green', kde=True, label='Transformed', alpha=0.5)
    plt.title(title, fontsize=14, fontweight='bold', color='black')
    plt.legend()
    plt.grid(True, linestyle='-.', alpha=0.8)
    


In [None]:
# Function to normalize data
def normalize_data(B, I, H):
    normalizations = {
        "Max": lambda x: x / x.max(),
        "Sum": lambda x: x / x.sum(),
        "Z-Score": zscore,
        "Percentile": lambda x: rankdata(x) / len(x),
        "Median Matching": lambda x, m1: x * (m1 / np.median(x)),
        "Quantile": lambda x: quantile_transform(x.reshape(-1, 1), axis=0, copy=True).flatten()
    }
    
    medians = np.median([B, I, H], axis=1)  
    m1 = np.mean(medians)  
    
    transformed_data = {}
    
    for name, func in normalizations.items():
        if name == "Median Matching":
            transformed_data[name] = (func(B, m1), func(I, m1), func(H, m1))
        else:
            transformed_data[name] = (func(B), func(I), func(H))
    
    return transformed_data


In [None]:

# Generate original data
B, I, H = generate_data()


In [None]:

# Plot the original data distributions using boxplots
plot_boxplot([B, I, H], ['B (Gaussian)', 'I (Power Law)', 'H (Geometric)'], 'Original Variable Distribution')


In [None]:

# Normalize the data using various methods
transformed_data = normalize_data(B, I, H)


In [None]:

# Plot histograms and boxplots for each transformed data
for name, (B_new, I_new, H_new) in transformed_data.items():
    plot_histogram(B, B_new, 'B - ' + name + ' Normalization')
    plot_histogram(I, I_new, 'I - ' + name + ' Normalization')
    plot_histogram(H, H_new, 'H - ' + name + ' Normalization')
    plot_boxplot([B_new, I_new, H_new], ['B', 'I', 'H'], 'Box Plot - ' + name + ' Normalization')
