# statistical user define functions

* Creating a statistical user-defined function (UDF) in Python involves defining a function that performs specific statistical operations on data. Below, I'll show you how to create a simple UDF for calculating common statistical measures such as mean, median, variance, and standard deviation.

  * EXAMPLES

In [1]:
# Mean
def mean(data):
    """Calculate the mean of a list of numbers."""
    return sum(data) / len(data) if len(data) > 0 else float('nan')
data = [1, 2, 3, 4, 5]
print("Mean:", mean(data))

Mean: 3.0


In [2]:
# Median 
def custom_median(data):
    """Calculate the median of a list of numbers."""
    sorted_data = sorted(data)
    n = len(sorted_data)
    if n == 0:
        return float('nan')
    mid = n // 2
    if n % 2 == 1:
        return sorted_data[mid]
    else:
        return (sorted_data[mid - 1] + sorted_data[mid]) / 2
data = [5, 3, 1, 4, 2]
print("Median:", custom_median(data))

Median: 3


In [5]:
# Variance
def variance(data):
    """Calculate the variance of a list of numbers."""
    mean1 = mean(data)
    return sum((x - mean1) ** 2 for x in data) / (len(data) - 1)
data = [1, 2, 3, 4, 5]
print("Variance:",variance(data))

Variance: 2.5


In [6]:
# Standard deviation
def std_dev(data):
    """Calculate the standard deviation of a list of numbers."""
    var = variance(data)
    return var ** 0.5
data = [1, 2, 3, 4, 5]
print("Standard Deviation:", std_dev(data))

Standard Deviation: 1.5811388300841898


In [7]:
# Correlation
def correlation(x, y):
    """Calculate the Pearson correlation coefficient between two lists of numbers."""
    n = len(x)
    if n != len(y) or n == 0:
        return float('nan')
    mean_x = mean(x)
    mean_y = mean(y)
    sum_xy = sum((xi - mean_x) * (yi - mean_y) for xi, yi in zip(x, y))
    sum_x2 = sum((xi - mean_x) ** 2 for xi in x)
    sum_y2 = sum((yi - mean_y) ** 2 for yi in y)
    return sum_xy / (sum_x2 ** 0.5 * sum_y2 ** 0.5)
x = [1, 2, 3, 4, 5]
y = [2, 4, 6, 8, 10]
print("Correlation Coefficient:", correlation(x, y))

Correlation Coefficient: 0.9999999999999998


In [8]:
# Percentile
def percentile(data, percentile):
    """Calculate the given percentile of a list of numbers."""
    sorted_data = sorted(data)
    index = (len(sorted_data) - 1) * percentile / 100.0
    lower = int(index)
    upper = lower + 1
    if upper >= len(sorted_data):
        return sorted_data[lower]
    return sorted_data[lower] + (sorted_data[upper] - sorted_data[lower]) * (index - lower)

data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
print("90th Percentile:", percentile(data, 90))

90th Percentile: 9.1


In [9]:
# Mode
from collections import Counter

def mode(data):
    """Calculate the mode(s) of a list of numbers."""
    if not data:
        return None
    frequency = Counter(data)
    max_count = max(frequency.values())
    return [k for k, v in frequency.items() if v == max_count]

# Example usage
data = [1, 2, 2, 3, 4, 4, 4, 5]
print("Mode:", mode(data))

Mode: [4]


In [10]:
# Range
def range(data):
    """Calculate the range of a list of numbers."""
    if not data:
        return float('nan')
    return max(data) - min(data)
data = [3, 1, 4, 1, 5, 9, 2, 6, 5]
print("Range:", range(data))

Range: 8


In [13]:
# Covariance
def covariance(x, y):
    """Calculate the covariance between two lists of numbers."""
    n = len(x)
    if n != len(y) or n == 0:
        return float('nan')
    mean_x = mean(x)
    mean_y = mean(y)
    covariance = sum((xi - mean_x) * (yi - mean_y) for xi, yi in zip(x, y)) / (n - 1)
    return covariance

x = [1, 2, 3, 4, 5]
y = [2, 4, 6, 8, 10]
print("Covariance:", covariance(x, y))

Covariance: 5.0


In [17]:
# kurtosis
def kurtosis(data):
    """Calculate the kurtosis of a list of numbers."""
    n = len(data)
    if n < 4:
        return float('nan')
    mean2 = mean(data)
    std_dev1 = std_dev(data)
    kurtosis = (sum((x - mean2) ** 4 for x in data) / n) / (std_dev1 ** 4) - 3
    return kurtosis

data = [3, 1, 4, 1, 5, 9, 2, 6, 5]
print("Kurtosis:", kurtosis(data))

Kurtosis: -0.9954275262917243
