# Statistical Operations

1. Basic Statistical Measures:
   - Central tendency (mean, median, mode)
   - Dispersion (variance, standard deviation, range, IQR)
   - Order statistics (min, max, quantiles)

2. Distributions:
   - Normal, Uniform, Bernoulli, Categorical
   - Poisson, Exponential, Beta, Gamma
   - Sampling and probability computations

3. Correlation and Covariance:
   - Correlation matrices
   - Covariance matrices
   - Rank correlation

4. Hypothesis Testing:
   - One-sample t-test
   - Two-sample independent t-test

5. Advanced Statistics:
   - Statistical moments
   - Kernel density estimation
   - Distribution fitting

6. Time Series Statistics:
   - Moving averages
   - Exponential smoothing
   - Autocorrelation

7. Enhanced Distribution Metrics:
   - Detailed skewness calculation
   - Kurtosis (both regular and excess)
   - Shapiro-Wilk test for normality
   - Comprehensive distribution statistics

8. Advanced Hypothesis Testing:
   - Expanded t-test (one-sample, independent, paired)
   - F-test for variance equality
   - One-way ANOVA
   - Chi-square test
   - Detailed statistics and degrees of freedom

9. Effect Size Measures:
   - Cohen's d
   - Correlation coefficient
   - Interpretation guidelines

In [None]:
import torch
import torch.distributions as dist
from torch.nn import functional as F

In [None]:
# Create example data
data = torch.randn(1000, 5)
categorical_data = torch.randint(0, 5, (1000,))

# ===== Basic Statistical Measures =====
class BasicStatistics:
    @staticmethod
    def compute_basic_stats(x):
        stats = {
            # Central Tendency
            'mean': torch.mean(x, dim=0),
            'median': torch.median(x, dim=0).values,
            'mode': torch.mode(x, dim=0).values,
            
            # Dispersion
            'variance': torch.var(x, dim=0, unbiased=True),
            'std': torch.std(x, dim=0, unbiased=True),
            'range': torch.max(x, dim=0).values - torch.min(x, dim=0).values,
            'iqr': torch.quantile(x, 0.75, dim=0) - torch.quantile(x, 0.25, dim=0),
            
            # Order Statistics
            'min': torch.min(x, dim=0).values,
            'max': torch.max(x, dim=0).values,
            'quantiles': torch.quantile(x, torch.tensor([0.25, 0.5, 0.75]), dim=0)
        }
        return stats



In [None]:
# ===== Distributions =====
class DistributionOperations:
    def __init__(self):
        # Normal Distribution
        self.normal = dist.Normal(0, 1)
        
        # Uniform Distribution
        self.uniform = dist.Uniform(0, 1)
        
        # Bernoulli Distribution
        self.bernoulli = dist.Bernoulli(0.5)
        
        # Categorical Distribution
        self.categorical = dist.Categorical(torch.ones(5))
        
        # Poisson Distribution
        self.poisson = dist.Poisson(5.0)
        
        # Exponential Distribution
        self.exponential = dist.Exponential(1.0)
        
        # Beta Distribution
        self.beta = dist.Beta(2.0, 2.0)
        
        # Gamma Distribution
        self.gamma = dist.Gamma(2.0, 2.0)
        
    def sample_and_stats(self, dist_type, sample_size):
        """Generate samples and compute statistics for a given distribution"""
        distribution = getattr(self, dist_type)
        samples = distribution.sample((sample_size,))
        
        stats = {
            'mean': torch.mean(samples),
            'std': torch.std(samples),
            'entropy': distribution.entropy(),
            'log_prob': distribution.log_prob(samples[0])
        }
        return samples, stats



In [None]:
# ===== Correlation and Covariance =====
class CorrelationStats:
    @staticmethod
    def compute_correlation_matrix(x):
        # Pearson correlation
        mean = torch.mean(x, dim=0)
        centered_data = x - mean
        std = torch.std(x, dim=0)
        normalized_data = centered_data / std
        correlation = torch.mm(normalized_data.T, normalized_data) / (x.size(0) - 1)
        return correlation
    
    @staticmethod
    def compute_covariance_matrix(x):
        # Covariance matrix
        mean = torch.mean(x, dim=0)
        centered_data = x - mean
        covariance = torch.mm(centered_data.T, centered_data) / (x.size(0) - 1)
        return covariance
    
    @staticmethod
    def compute_rank_correlation(x):
        # Spearman rank correlation
        ranked_data = torch.argsort(torch.argsort(x, dim=0), dim=0).float()
        return CorrelationStats.compute_correlation_matrix(ranked_data)



In [None]:
# ===== Hypothesis Testing =====
class HypothesisTests:
    @staticmethod
    def ttest_1samp(x, mu=0.0):
        """One-sample t-test"""
        n = x.size(0)
        mean = torch.mean(x)
        std = torch.std(x, unbiased=True)
        t_stat = (mean - mu) / (std / torch.sqrt(torch.tensor(n).float()))
        return t_stat
    
    @staticmethod
    def ttest_ind(x, y):
        """Independent two-sample t-test"""
        n1, n2 = x.size(0), y.size(0)
        mean1, mean2 = torch.mean(x), torch.mean(y)
        var1, var2 = torch.var(x, unbiased=True), torch.var(y, unbiased=True)
        
        # Pooled variance
        pooled_var = ((n1-1)*var1 + (n2-1)*var2) / (n1 + n2 - 2)
        t_stat = (mean1 - mean2) / torch.sqrt(pooled_var * (1/n1 + 1/n2))
        return t_stat



In [None]:
# ===== Advanced Statistical Operations =====
class AdvancedStatistics:
    @staticmethod
    def compute_moments(x):
        """Compute statistical moments"""
        mean = torch.mean(x, dim=0)
        centered_data = x - mean
        
        # Standardized moments
        variance = torch.mean(centered_data ** 2, dim=0)
        skewness = torch.mean(centered_data ** 3, dim=0) / variance ** 1.5
        kurtosis = torch.mean(centered_data ** 4, dim=0) / variance ** 2
        
        return {
            'mean': mean,
            'variance': variance,
            'skewness': skewness,
            'kurtosis': kurtosis
        }
    
    @staticmethod
    def kernel_density_estimation(x, bandwidth=None):
        """Simple Gaussian KDE"""
        if bandwidth is None:
            # Scott's rule
            bandwidth = x.std() * (x.size(0) ** (-1/5))
            
        x_grid = torch.linspace(x.min(), x.max(), 100)
        kde = torch.zeros_like(x_grid)
        
        for i in range(len(x_grid)):
            kde[i] = torch.mean(torch.exp(-0.5 * ((x_grid[i] - x) / bandwidth) ** 2))
            
        return x_grid, kde / (bandwidth * torch.sqrt(torch.tensor(2 * torch.pi)))



In [None]:
# ===== Time Series Statistics =====
class TimeSeriesStats:
    @staticmethod
    def moving_average(x, window_size):
        """Compute moving average"""
        weights = torch.ones(window_size) / window_size
        ma = F.conv1d(x.view(1, 1, -1), weights.view(1, 1, -1), padding=window_size//2)
        return ma.view(-1)
    
    @staticmethod
    def exponential_moving_average(x, alpha):
        """Compute exponential moving average"""
        ema = torch.zeros_like(x)
        ema[0] = x[0]
        for i in range(1, len(x)):
            ema[i] = alpha * x[i] + (1 - alpha) * ema[i-1]
        return ema
    
    @staticmethod
    def autocorrelation(x, max_lag):
        """Compute autocorrelation"""
        mean = torch.mean(x)
        centered_data = x - mean
        var = torch.var(x)
        
        autocorr = torch.zeros(max_lag + 1)
        for lag in range(max_lag + 1):
            autocorr[lag] = torch.mean(centered_data[:-lag] * centered_data[lag:]) / var
            
        return autocorr

In [None]:
# ===== Example Usage =====
def demonstrate_statistics():
    # Generate sample data
    data = torch.randn(1000, 5)
    
    # Basic statistics
    basic_stats = BasicStatistics.compute_basic_stats(data)
    print("Basic Statistics:", basic_stats)
    
    # Correlation analysis
    correlation = CorrelationStats.compute_correlation_matrix(data)
    print("Correlation Matrix:", correlation)
    
    # Distribution analysis
    dist_ops = DistributionOperations()
    samples, stats = dist_ops.sample_and_stats('normal', 1000)
    print("Normal Distribution Stats:", stats)
    
    # Time series analysis
    ts_data = torch.cumsum(torch.randn(100), 0)
    ma = TimeSeriesStats.moving_average(ts_data, 5)
    print("Moving Average:", ma)
    
import torch
import torch.distributions as dist
from torch.nn import functional as F

# Create example data
data = torch.randn(1000, 5)
categorical_data = torch.randint(0, 5, (1000,))

In [None]:
# ===== Basic Statistical Measures =====
class BasicStatistics:
    @staticmethod
    def compute_basic_stats(x):
        stats = {
            # Central Tendency
            'mean': torch.mean(x, dim=0),
            'median': torch.median(x, dim=0).values,
            'mode': torch.mode(x, dim=0).values,
            
            # Dispersion
            'variance': torch.var(x, dim=0, unbiased=True),
            'std': torch.std(x, dim=0, unbiased=True),
            'range': torch.max(x, dim=0).values - torch.min(x, dim=0).values,
            'iqr': torch.quantile(x, 0.75, dim=0) - torch.quantile(x, 0.25, dim=0),
            
            # Order Statistics
            'min': torch.min(x, dim=0).values,
            'max': torch.max(x, dim=0).values,
            'quantiles': torch.quantile(x, torch.tensor([0.25, 0.5, 0.75]), dim=0)
        }
        return stats

In [None]:
# ===== Distributions =====
class DistributionOperations:
    def __init__(self):
        # Normal Distribution
        self.normal = dist.Normal(0, 1)
        
        # Uniform Distribution
        self.uniform = dist.Uniform(0, 1)
        
        # Bernoulli Distribution
        self.bernoulli = dist.Bernoulli(0.5)
        
        # Categorical Distribution
        self.categorical = dist.Categorical(torch.ones(5))
        
        # Poisson Distribution
        self.poisson = dist.Poisson(5.0)
        
        # Exponential Distribution
        self.exponential = dist.Exponential(1.0)
        
        # Beta Distribution
        self.beta = dist.Beta(2.0, 2.0)
        
        # Gamma Distribution
        self.gamma = dist.Gamma(2.0, 2.0)
        
    def sample_and_stats(self, dist_type, sample_size):
        """Generate samples and compute statistics for a given distribution"""
        distribution = getattr(self, dist_type)
        samples = distribution.sample((sample_size,))
        
        stats = {
            'mean': torch.mean(samples),
            'std': torch.std(samples),
            'entropy': distribution.entropy(),
            'log_prob': distribution.log_prob(samples[0])
        }
        return samples, stats

In [None]:
# ===== Correlation and Covariance =====
class CorrelationStats:
    @staticmethod
    def compute_correlation_matrix(x):
        # Pearson correlation
        mean = torch.mean(x, dim=0)
        centered_data = x - mean
        std = torch.std(x, dim=0)
        normalized_data = centered_data / std
        correlation = torch.mm(normalized_data.T, normalized_data) / (x.size(0) - 1)
        return correlation
    
    @staticmethod
    def compute_covariance_matrix(x):
        # Covariance matrix
        mean = torch.mean(x, dim=0)
        centered_data = x - mean
        covariance = torch.mm(centered_data.T, centered_data) / (x.size(0) - 1)
        return covariance
    
    @staticmethod
    def compute_rank_correlation(x):
        # Spearman rank correlation
        ranked_data = torch.argsort(torch.argsort(x, dim=0), dim=0).float()
        return CorrelationStats.compute_correlation_matrix(ranked_data)



In [None]:
# ===== Hypothesis Testing =====
class HypothesisTests:
    @staticmethod
    def ttest_1samp(x, mu=0.0):
        """One-sample t-test"""
        n = x.size(0)
        mean = torch.mean(x)
        std = torch.std(x, unbiased=True)
        t_stat = (mean - mu) / (std / torch.sqrt(torch.tensor(n).float()))
        return t_stat
    
    @staticmethod
    def ttest_ind(x, y):
        """Independent two-sample t-test"""
        n1, n2 = x.size(0), y.size(0)
        mean1, mean2 = torch.mean(x), torch.mean(y)
        var1, var2 = torch.var(x, unbiased=True), torch.var(y, unbiased=True)
        
        # Pooled variance
        pooled_var = ((n1-1)*var1 + (n2-1)*var2) / (n1 + n2 - 2)
        t_stat = (mean1 - mean2) / torch.sqrt(pooled_var * (1/n1 + 1/n2))
        return t_stat



In [None]:
# ===== Advanced Statistical Operations =====
class AdvancedStatistics:
    @staticmethod
    def compute_moments(x):
        """Compute statistical moments"""
        mean = torch.mean(x, dim=0)
        centered_data = x - mean
        
        # Standardized moments
        variance = torch.mean(centered_data ** 2, dim=0)
        skewness = torch.mean(centered_data ** 3, dim=0) / variance ** 1.5
        kurtosis = torch.mean(centered_data ** 4, dim=0) / variance ** 2
        
        return {
            'mean': mean,
            'variance': variance,
            'skewness': skewness,
            'kurtosis': kurtosis
        }
    
    @staticmethod
    def kernel_density_estimation(x, bandwidth=None):
        """Simple Gaussian KDE"""
        if bandwidth is None:
            # Scott's rule
            bandwidth = x.std() * (x.size(0) ** (-1/5))
            
        x_grid = torch.linspace(x.min(), x.max(), 100)
        kde = torch.zeros_like(x_grid)
        
        for i in range(len(x_grid)):
            kde[i] = torch.mean(torch.exp(-0.5 * ((x_grid[i] - x) / bandwidth) ** 2))
            
        return x_grid, kde / (bandwidth * torch.sqrt(torch.tensor(2 * torch.pi)))



In [None]:
# ===== Time Series Statistics =====
class TimeSeriesStats:
    @staticmethod
    def moving_average(x, window_size):
        """Compute moving average"""
        weights = torch.ones(window_size) / window_size
        ma = F.conv1d(x.view(1, 1, -1), weights.view(1, 1, -1), padding=window_size//2)
        return ma.view(-1)
    
    @staticmethod
    def exponential_moving_average(x, alpha):
        """Compute exponential moving average"""
        ema = torch.zeros_like(x)
        ema[0] = x[0]
        for i in range(1, len(x)):
            ema[i] = alpha * x[i] + (1 - alpha) * ema[i-1]
        return ema
    
    @staticmethod
    def autocorrelation(x, max_lag):
        """Compute autocorrelation"""
        mean = torch.mean(x)
        centered_data = x - mean
        var = torch.var(x)
        
        autocorr = torch.zeros(max_lag + 1)
        for lag in range(max_lag + 1):
            autocorr[lag] = torch.mean(centered_data[:-lag] * centered_data[lag:]) / var
            
        return autocorr


In [None]:

# ===== Example Usage =====
def demonstrate_statistics():
    # Generate sample data
    data = torch.randn(1000, 5)
    
    # Basic statistics
    basic_stats = BasicStatistics.compute_basic_stats(data)
    print("Basic Statistics:", basic_stats)
    
    # Correlation analysis
    correlation = CorrelationStats.compute_correlation_matrix(data)
    print("Correlation Matrix:", correlation)
    
    # Distribution analysis
    dist_ops = DistributionOperations()
    samples, stats = dist_ops.sample_and_stats('normal', 1000)
    print("Normal Distribution Stats:", stats)
    
    # Time series analysis
    ts_data = torch.cumsum(torch.randn(100), 0)
    ma = TimeSeriesStats.moving_average(ts_data, 5)
    print("Moving Average:", ma)