In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

### Mean

In [3]:
data = [5,10,9,11,9,7]

# Step 1: Summ all the numbers in the list
total_sum = sum(data)

# Step 2: Divide the sum by the number of elements in the list
mean = total_sum / len(data)

print("Mean:", mean)

Mean: 8.5


### Range

In [4]:
# Step 1: find the maximum value in the list 
max_value = max(data)
print("Max:", max_value)

# Step 2: find the minimum value in the list
min_value = min(data)
print("Min:", min_value)

#Step 3: Calculate the range
range = max_value - min_value
print("Range:", range)

Max: 11
Min: 5
Range: 6


### Mean Absolute Deviation and the % total deviation

In [5]:

# Step 2: Calculate the absolute deviations from the mean
absolute_deviations = []
for x in data:
    absolute_deviation = abs(x - mean)
    absolute_deviations.append(absolute_deviation)

# or calculate the absolute deviations using the following:
# absolute_deviations = [abs(x - mean) for x in data]

# Step 3: Calculate the mean of the absolute deviations
mad = sum(absolute_deviations) / len(absolute_deviations)

# Format the MAD to three significant figures
mad_formatted = f"{mad:.3g}"

# Create a list of MAD for each data point
mad_list = [f"{abs(x - mean):.3g}" for x in data]

# Step 4: Calculate the total deviation
total_deviation = sum(absolute_deviations)

# Step 5: Calculate the percentage of each data point's absolute deviation relative to the toal deviation
percent_deviations = [(abs_dev / total_deviation) * 100 for abs_dev in absolute_deviations]

# Format the percentage deviations to three significant figures and add the "%" sign
percent_deviations_formatted = [f"{percent:.3g}%" for percent in percent_deviations]

print("Mean Absolute Deviation:", mad_formatted)
print("MAD for each data point:", mad_list)
print("Percent of each data point's deviation relative to the total deviation:", percent_deviations_formatted)

Mean Absolute Deviation: 1.67
MAD for each data point: ['3.5', '1.5', '0.5', '2.5', '0.5', '1.5']
Percent of each data point's deviation relative to the total deviation: ['35%', '15%', '5%', '25%', '5%', '15%']


### Variance-Population 

In [6]:

# Step 1: Calculate the squared deviations from the mean
squared_deviations = [(x - mean) ** 2 for x in data]

# Step 2: Calculate the population variance
population_variance = sum(squared_deviations) / len(data)


print("Population Variance:", f"{population_variance:.3g}")

Population Variance: 3.92


Variance-Sample

In [7]:


# Step 1: Calculate the squared deviations from the mean
squared_deviations = [(x - mean) ** 2 for x in data]

# Step 2: Calculate the sample variance
sample_variance = sum(squared_deviations) / (len(data) - 1)

print("Sample Variance:", f"{sample_variance:.3g}")

Sample Variance: 4.7


### Standard Deviation- Poupulation and Sample 

In [8]:


# Step 1: Calculate the squared deviations from the mean
squared_deviations = [(x - mean) ** 2 for x in data]

# Step 2: Calculate the population variance
population_variance = sum(squared_deviations) / len(data)

# Step 3: Calculate the sample variance
sample_variance = sum(squared_deviations) / (len(data) - 1)

# Step 4: Calculate the population standard deviation
population_std_dev = math.sqrt(population_variance)

# Step 5: Calculate the sample standard deviation
sample_std_dev = math.sqrt(sample_variance)

print("Population Standard Deviation:", f"{population_std_dev:.3g}")
print("Sample Standard Deviation:", f"{sample_std_dev:.3g}")

Population Standard Deviation: 1.98
Sample Standard Deviation: 2.17


### Empirical Rule

In [9]:


# Step 1: Calculate the mean
mean = sum(data) / len(data)

# Step 2: Calculate the squared deviations from the mean
squared_deviations = [(x - mean) ** 2 for x in data]

# Step 3: Calculate the sample variance
sample_variance = sum(squared_deviations) / (len(data) - 1)

# Step 4: Calculate the sample standard deviation
sample_std_dev = math.sqrt(sample_variance)

# Step 5: Calculate the intervals for the Empirical Rule
one_std_dev = (mean - sample_std_dev, mean + sample_std_dev)
two_std_dev = (mean - 2 * sample_std_dev, mean + 2 * sample_std_dev)
three_std_dev = (mean - 3 * sample_std_dev, mean + 3 * sample_std_dev)

print("Mean:", f"{mean:.3g}")
print("Sample Standard Deviation:", f"{sample_std_dev:.3g}")
print("68% of data falls within:", one_std_dev)
print("95% of data falls within:", two_std_dev)
print("99.7% of data falls within:", three_std_dev)

Mean: 8.5
Sample Standard Deviation: 2.17
68% of data falls within: (6.33205166113212, 10.66794833886788)
95% of data falls within: (4.16410332226424, 12.83589667773576)
99.7% of data falls within: (1.9961549833963605, 15.00384501660364)


### Chebyshev's Theorem 

In [10]:

mean = sum(data) / len(data)

# Step 2: Calculate the squared deviations from the mean
squared_deviations = [(x - mean) ** 2 for x in data]

# Step 3: Calculate the sample variance
sample_variance = sum(squared_deviations) / (len(data) - 1)

# Step 4: Calculate the sample standard deviation
sample_std_dev = math.sqrt(sample_variance)

# Function to calculate Chebyshev intervals
def chebyshev_interval(k):
    lower_bound = mean - k * sample_std_dev
    upper_bound = mean + k * sample_std_dev
    return (lower_bound, upper_bound)

# Example: Calculate intervals for k = 1, 2, and 3
k_values = [1, 2, 3]
chebyshev_intervals = {k: chebyshev_interval(k) for k in k_values}

# Calculate the minimum proportion of data within k standard deviations
chebyshev_proportions = {k: 1 - 1 / (k ** 2) for k in k_values}

print("Mean:", f"{mean:.3g}")
print("Sample Standard Deviation:", f"{sample_std_dev:.3g}")
for k in k_values:
    print(f"At least {chebyshev_proportions[k]*100:.1f}% of data falls within {k} standard deviations: {chebyshev_intervals[k]}")

Mean: 8.5
Sample Standard Deviation: 2.17
At least 0.0% of data falls within 1 standard deviations: (6.33205166113212, 10.66794833886788)
At least 75.0% of data falls within 2 standard deviations: (4.16410332226424, 12.83589667773576)
At least 88.9% of data falls within 3 standard deviations: (1.9961549833963605, 15.00384501660364)


### Coefficient of Variation

In [12]:
import math

data = [5, 10, 9, 11, 9, 7]

# Step 1: Calculate the mean
mean = sum(data) / len(data)

# Step 2: Calculate the squared deviations from the mean
squared_deviations = [(x - mean) ** 2 for x in data]

# Step 3: Calculate the sample variance
sample_variance = sum(squared_deviations) / (len(data) - 1)

# Step 4: Calculate the sample standard deviation
sample_std_dev = math.sqrt(sample_variance)

# Step 5: Calculate the Coefficient of Variation (CV)
cv = (sample_std_dev / mean) * 100

print("Mean:", f"{mean:.3g}")
print("Sample Standard Deviation:", f"{sample_std_dev:.3g}")
print("Coefficient of Variation (CV):", f"{cv:.3g}%")

Mean: 8.5
Sample Standard Deviation: 2.17
Coefficient of Variation (CV): 25.5%
