# Standardizing Data

To standardize a data set, you simply subtract the average from each point and divide by the standard deviation. The standarizded data will then have a mean of zero and a standard deviation of one.

$$ z = \frac{x - \bar{x}}{\sigma} $$

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

# Parameters for the normal distribution
mu = 10
sigma = 2
sample_size = 100

# Generate the sample
sample = np.random.normal(mu, sigma, sample_size)

# Standardize the sample
standardized_sample = (sample - np.mean(sample)) / np.std(sample)

# Plotting
plt.figure(figsize=(12, 5))

# Original sample
plt.subplot(1, 2, 1)
plt.hist(sample, bins=20, alpha=0.7, color='skyblue')
plt.title('Original Sample (Mean=5, Std Dev=2)')
plt.xlabel('Value')
plt.ylabel('Frequency')

# Standardized sample
plt.subplot(1, 2, 2)
plt.hist(standardized_sample, bins=20, alpha=0.7, color='salmon')
plt.title('Standardized Sample (Mean=0, Std Dev=1)')
plt.xlabel('Standardized Value')
plt.ylabel('Frequency')

plt.tight_layout()
plt.show()

# Verification of standardized sample mean and standard deviation:
print(f"Mean of standardized sample: {np.mean(standardized_sample):.4f}")
print(f"Standard deviation of standardized sample: {np.std(standardized_sample):.4f}")

In [None]:
# Create box plots
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.boxplot(sample)
plt.title('Original Sample')
plt.ylabel('Value')

plt.subplot(1, 2, 2)
plt.boxplot(standardized_sample)
plt.title('Standardized Sample')
plt.ylabel('Standardized Value')

plt.tight_layout()
plt.show()

In [None]:
x = np.random.uniform(0, 1, sample_size)
x.shape

In [None]:
random_jitter = np.random.uniform(0, .1, sample_size)

x1 = random_jitter
y1 = sample

x2 = x1 
y2 = standardized_sample

plt.figure(figsize=(2, 6))
plt.plot(x1, y1, '.', x2, y2, '.')
plt.xlim([-.5, .5])

# Add horizontal line at y=0
plt.axhline(y=mu, color='blue', linestyle='--', label='original mean')

# Add text box
textstr = '\n'.join((
    f'Original Std: {sigma:.1f}',
    f'Standardized Std: 1.0'
))

# these are matplotlib.patch.Patch properties
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)

# place a text box in upper left in axes coords
plt.text(1.05, 0.5, textstr, transform=plt.gca().transAxes, fontsize=10,
        verticalalignment='top', bbox=props)

# Add horizontal line at y=0
plt.axhline(y=0, color='red', linestyle='--', label = 'standardized mean')
plt.title("Data before and after standardization")
plt.legend(bbox_to_anchor=(1.05, .6), loc='lower left', borderaxespad=0.)
plt.show()