Skip to content

Examples

Raphael Constantinis edited this page Jul 23, 2025 · 1 revision

Examples

This page provides practical code demonstrations and use cases for the entropic_measurement library, including sample datasets and real-world applications in scientific computing and AI tasks.

Table of Contents

  1. Basic Usage
  2. Scientific Applications
  3. AI and Machine Learning
  4. Sample Datasets
  5. Performance Benchmarks

Basic Usage

Simple Entropy Calculation

import numpy as np
from entropic_measurement import entropy, mutual_information

# Basic entropy calculation for a probability distribution
probs = np.array([0.5, 0.3, 0.2])
H = entropy(probs)
print(f"Entropy: {H:.3f} bits")
# Output: Entropy: 1.485 bits

Mutual Information Between Variables

# Generate sample data
np.random.seed(42)
X = np.random.randint(0, 4, 1000)
Y = X + np.random.randint(0, 2, 1000)  # Y is correlated with X

# Calculate mutual information
mi = mutual_information(X, Y)
print(f"Mutual Information: {mi:.3f} bits")
# Output: Mutual Information: 0.847 bits

Scientific Applications

Analyzing Gene Expression Data

import pandas as pd
from entropic_measurement import conditional_entropy, information_gain

# Load sample gene expression data
# This example uses synthetic data for demonstration
gene_data = np.random.exponential(2, (100, 50))  # 100 samples, 50 genes
conditions = np.random.choice(['control', 'treatment'], 100)

# Calculate information gain for each gene
info_gains = []
for i in range(gene_data.shape[1]):
    # Discretize continuous gene expression values
    gene_expr = pd.cut(gene_data[:, i], bins=3, labels=['low', 'med', 'high'])
    ig = information_gain(gene_expr, conditions)
    info_gains.append(ig)

# Find most informative genes
top_genes = np.argsort(info_gains)[-5:]
print(f"Top 5 informative genes: {top_genes}")
print(f"Information gains: {[info_gains[i] for i in top_genes]}")

Climate Data Analysis

from entropic_measurement import transfer_entropy

# Simulate temperature and precipitation time series
days = 365
temp = 20 + 10 * np.sin(2 * np.pi * np.arange(days) / 365) + np.random.normal(0, 2, days)
precip = np.random.exponential(1, days)
# Add some dependence: higher temp -> lower precip probability
precip[temp > np.mean(temp)] *= 0.7

# Calculate transfer entropy (causality measure)
te_temp_to_precip = transfer_entropy(temp, precip, lag=1)
te_precip_to_temp = transfer_entropy(precip, temp, lag=1)

print(f"Transfer entropy (temp → precip): {te_temp_to_precip:.3f}")
print(f"Transfer entropy (precip → temp): {te_precip_to_temp:.3f}")

AI and Machine Learning

Feature Selection for Classification

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from entropic_measurement import feature_importance_entropy

# Generate synthetic classification dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, 
                          n_redundant=5, random_state=42)

# Calculate entropy-based feature importance
feature_scores = feature_importance_entropy(X, y)

# Select top features
n_top_features = 10
top_features = np.argsort(feature_scores)[-n_top_features:]

# Compare performance
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Full feature set
rf_full = RandomForestClassifier(random_state=42)
rf_full.fit(X_train, y_train)
score_full = rf_full.score(X_test, y_test)

# Selected features only
rf_selected = RandomForestClassifier(random_state=42)
rf_selected.fit(X_train[:, top_features], y_train)
score_selected = rf_selected.score(X_test[:, top_features], y_test)

print(f"Accuracy with all features: {score_full:.3f}")
print(f"Accuracy with top {n_top_features} features: {score_selected:.3f}")

Neural Network Activation Analysis

import tensorflow as tf
from entropic_measurement import layer_entropy_analysis

# Simple neural network for demonstration
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Load MNIST data (subset for example)
(x_train, y_train), _ = tf.keras.datasets.mnist.load_data()
x_train = x_train[:1000].reshape(-1, 784) / 255.0

# Analyze entropy at each layer
activations = []
for i, layer in enumerate(model.layers):
    if i == 0:
        layer_input = x_train
    else:
        temp_model = tf.keras.Model(inputs=model.input, outputs=model.layers[i-1].output)
        layer_input = temp_model(x_train)
    
    temp_model = tf.keras.Model(inputs=model.input, outputs=layer.output)
    layer_output = temp_model(x_train)
    
    # Calculate entropy of activations
    entropy_val = layer_entropy_analysis(layer_output.numpy())
    activations.append(entropy_val)
    print(f"Layer {i+1} entropy: {entropy_val:.3f}")

Sample Datasets

Generating Test Data

def generate_correlated_data(n_samples=1000, correlation=0.7):
    """Generate correlated binary sequences for testing."""
    x = np.random.binomial(1, 0.5, n_samples)
    y = np.zeros(n_samples)
    
    for i in range(n_samples):
        if np.random.random() < correlation:
            y[i] = x[i]  # Correlated
        else:
            y[i] = 1 - x[i]  # Anti-correlated
    
    return x.astype(int), y.astype(int)

# Test with different correlation levels
correlations = [0.5, 0.7, 0.9, 0.95]
for corr in correlations:
    x, y = generate_correlated_data(correlation=corr)
    mi = mutual_information(x, y)
    print(f"Correlation: {corr}, Mutual Information: {mi:.3f}")

Time Series Data

def generate_chaotic_series(length=1000, a=4.0):
    """Generate chaotic time series using logistic map."""
    x = np.zeros(length)
    x[0] = 0.5  # Initial condition
    
    for i in range(1, length):
        x[i] = a * x[i-1] * (1 - x[i-1])
    
    return x

# Generate chaotic and random series
chaotic = generate_chaotic_series()
random_series = np.random.random(1000)

# Compare their entropy rates
from entropic_measurement import entropy_rate

chaotic_rate = entropy_rate(chaotic, order=5)
random_rate = entropy_rate(random_series, order=5)

print(f"Chaotic series entropy rate: {chaotic_rate:.3f}")
print(f"Random series entropy rate: {random_rate:.3f}")

Performance Benchmarks

Computational Efficiency

import time
from entropic_measurement import entropy_parallel

# Benchmark different data sizes
sizes = [1000, 5000, 10000, 50000]
times_serial = []
times_parallel = []

for size in sizes:
    data = np.random.randint(0, 10, size)
    
    # Serial computation
    start = time.time()
    h_serial = entropy(data)
    times_serial.append(time.time() - start)
    
    # Parallel computation
    start = time.time()
    h_parallel = entropy_parallel(data, n_jobs=4)
    times_parallel.append(time.time() - start)
    
    print(f"Size: {size}, Serial: {times_serial[-1]:.4f}s, Parallel: {times_parallel[-1]:.4f}s")

# Plot results
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(sizes, times_serial, 'o-', label='Serial')
plt.plot(sizes, times_parallel, 's-', label='Parallel')
plt.xlabel('Data Size')
plt.ylabel('Computation Time (seconds)')
plt.legend()
plt.title('Entropy Calculation Performance')
plt.grid(True)
plt.show()

Memory Usage Optimization

from memory_profiler import profile
from entropic_measurement import entropy_streaming

@profile
def memory_efficient_entropy(data_stream):
    """Calculate entropy for large datasets using streaming approach."""
    return entropy_streaming(data_stream, chunk_size=1000)

# Process large dataset in chunks
large_data = np.random.randint(0, 100, 100000)
result = memory_efficient_entropy(large_data)
print(f"Streaming entropy result: {result:.3f}")

Advanced Examples

Information Bottleneck Analysis

from entropic_measurement import information_bottleneck

# Example: Compress features while preserving target information
X = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 1]], 1000)
y = (X[:, 0] + X[:, 1] > 0).astype(int)

# Apply information bottleneck
compressed_X, compression_curve = information_bottleneck(X, y, beta_range=[0.1, 1.0, 10.0])

print("Compression analysis:")
for beta, (mi_xt, mi_ty) in zip([0.1, 1.0, 10.0], compression_curve):
    print(f"β={beta}: I(X;T)={mi_xt:.3f}, I(T;Y)={mi_ty:.3f}")

Multi-Scale Entropy

from entropic_measurement import multiscale_entropy

# Analyze complexity at different time scales
physiological_signal = np.random.normal(0, 1, 2000)
# Add some structure
for i in range(1, len(physiological_signal)):
    physiological_signal[i] += 0.3 * physiological_signal[i-1]

# Calculate multi-scale entropy
scales = range(1, 21)
mse_values = multiscale_entropy(physiological_signal, scales=scales)

print("Multi-scale entropy analysis:")
for scale, mse in zip(scales[:5], mse_values[:5]):
    print(f"Scale {scale}: MSE = {mse:.3f}")

Getting Started

To run these examples, install the required dependencies:

pip install entropic_measurement numpy pandas scikit-learn matplotlib tensorflow

For more information, see the API Reference and Installation Guide.

Contributing Examples

We welcome contributions of additional examples! Please see our Contributing Guidelines for how to submit new examples and use cases.

Clone this wiki locally