# Getting Started with Real Simple Stats

This notebook provides a comprehensive introduction to the Real Simple Stats library.

## Installation

```bash
pip install real-simple-stats
```

In [None]:
# Import the library
import real_simple_stats as rss
from real_simple_stats import descriptive_statistics as desc
from real_simple_stats import probability_utils as prob
from real_simple_stats import normal_distributions as norm
from real_simple_stats import hypothesis_testing as ht

import numpy as np
import matplotlib.pyplot as plt

## 1. Descriptive Statistics

Let's start with basic descriptive statistics using a sample dataset.

In [None]:
# Sample data: test scores
test_scores = [85, 92, 78, 96, 88, 91, 84, 89, 93, 87, 90, 86, 94, 82, 95]

print("Test Scores Dataset:")
print(test_scores)
print(f"Number of students: {len(test_scores)}")

In [None]:
# Calculate basic statistics
mean_score = desc.mean(test_scores)
median_score = desc.median(test_scores)
std_dev = desc.sample_std_dev(test_scores)
variance = desc.sample_variance(test_scores)

print("Basic Descriptive Statistics:")
print(f"Mean: {mean_score:.2f}")
print(f"Median: {median_score:.2f}")
print(f"Standard Deviation: {std_dev:.2f}")
print(f"Variance: {variance:.2f}")

In [None]:
# Five-number summary
summary = desc.five_number_summary(test_scores)

print("Five-Number Summary:")
for stat, value in summary.items():
    print(f"{stat.capitalize()}: {value}")

# Interquartile Range
iqr = desc.interquartile_range(test_scores)
print(f"\nInterquartile Range (IQR): {iqr}")

## 2. Probability Calculations

Let's explore basic probability concepts.

In [None]:
# Basic probability operations
p_rain = 0.3  # Probability of rain
p_no_rain = prob.probability_not(p_rain)

print(f"Probability of rain: {p_rain}")
print(f"Probability of no rain: {p_no_rain}")

# Joint probability (independent events)
p_rain_today = 0.3
p_rain_tomorrow = 0.4
p_rain_both_days = prob.joint_probability(p_rain_today, p_rain_tomorrow)

print(f"\nProbability of rain both days: {p_rain_both_days}")

In [None]:
# Combinatorics
n_students = 20
k_selected = 5

combinations = prob.combinations(n_students, k_selected)
permutations = prob.permutations(n_students, k_selected)

print(f"Ways to choose {k_selected} students from {n_students}:")
print(f"Combinations: {combinations:,}")
print(f"Permutations: {permutations:,}")

## 3. Normal Distribution

Working with the normal distribution - the foundation of many statistical methods.

In [None]:
# Standard normal distribution examples
z_scores = [-2, -1, 0, 1, 2]

print("Standard Normal Distribution:")
print("Z-score\tPDF\t\tCDF")
print("-" * 35)

for z in z_scores:
    pdf_val = norm.normal_pdf(z, mean=0, std_dev=1)
    cdf_val = norm.normal_cdf(z, mean=0, std_dev=1)
    print(f"{z:>4}\t{pdf_val:.4f}\t\t{cdf_val:.4f}")

In [None]:
# Custom normal distribution (test scores)
mean_test = 85
std_test = 10

# What's the probability a student scores above 95?
prob_above_95 = 1 - norm.normal_cdf(95, mean_test, std_test)
print(f"Probability of scoring above 95: {prob_above_95:.4f} ({prob_above_95*100:.2f}%)")

# What score represents the 90th percentile?
percentile_90 = norm.inverse_normal_cdf(0.90, mean_test, std_test)
print(f"90th percentile score: {percentile_90:.2f}")

## 4. Hypothesis Testing

Let's perform a simple hypothesis test.

In [None]:
# One-sample t-test
# H₀: μ = 90 (null hypothesis: mean score is 90)
# H₁: μ ≠ 90 (alternative hypothesis: mean score is not 90)

null_mean = 90
alpha = 0.05

t_statistic, p_value = ht.one_sample_t_test(test_scores, null_mean)

print("One-Sample t-Test Results:")
print(f"Sample mean: {desc.mean(test_scores):.2f}")
print(f"Null hypothesis mean: {null_mean}")
print(f"t-statistic: {t_statistic:.4f}")
print(f"p-value: {p_value:.6f}")
print(f"Significance level (α): {alpha}")

if p_value < alpha:
    print(f"\nConclusion: Reject H₀ (p < α)")
    print(f"The sample provides sufficient evidence that the mean is not {null_mean}.")
else:
    print(f"\nConclusion: Fail to reject H₀ (p ≥ α)")
    print(f"The sample does not provide sufficient evidence that the mean differs from {null_mean}.")

## 5. Confidence Intervals

Calculate confidence intervals for the mean.

In [None]:
# 95% confidence interval for the mean
confidence_level = 0.95
ci_lower, ci_upper = ht.confidence_interval_mean(test_scores, confidence_level)

print(f"{confidence_level*100}% Confidence Interval for the Mean:")
print(f"Lower bound: {ci_lower:.2f}")
print(f"Upper bound: {ci_upper:.2f}")
print(f"\nInterpretation: We are {confidence_level*100}% confident that the true population mean")
print(f"lies between {ci_lower:.2f} and {ci_upper:.2f}.")

## 6. Using the Glossary

The library includes a comprehensive statistical glossary.

In [None]:
from real_simple_stats.glossary import lookup

# Look up statistical terms
terms = ['p-value', 'confidence interval', 'standard deviation']

for term in terms:
    try:
        definition = lookup(term)
        print(f"\n{term.upper()}:")
        print(definition)
        print("-" * 50)
    except KeyError:
        print(f"Term '{term}' not found in glossary.")

## 7. Visualization Example

Create some basic visualizations of our data.

In [None]:
# Histogram of test scores
plt.figure(figsize=(10, 6))

plt.subplot(1, 2, 1)
plt.hist(test_scores, bins=8, alpha=0.7, color='skyblue', edgecolor='black')
plt.axvline(desc.mean(test_scores), color='red', linestyle='--', label=f'Mean: {desc.mean(test_scores):.1f}')
plt.axvline(desc.median(test_scores), color='green', linestyle='--', label=f'Median: {desc.median(test_scores):.1f}')
plt.xlabel('Test Scores')
plt.ylabel('Frequency')
plt.title('Distribution of Test Scores')
plt.legend()
plt.grid(True, alpha=0.3)

# Box plot
plt.subplot(1, 2, 2)
plt.boxplot(test_scores, vert=True)
plt.ylabel('Test Scores')
plt.title('Box Plot of Test Scores')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Next Steps

This notebook covered the basics of Real Simple Stats. To learn more:

1. **Explore other modules**: Check out `binomial_distributions`, `linear_regression_utils`, `chi_square_utils`
2. **Read the documentation**: Visit [real-simple-stats.readthedocs.io](https://real-simple-stats.readthedocs.io/)
3. **Try the CLI**: Use `rss-calc --help` for command-line calculations
4. **Check out more examples**: Look in the `examples/` directory

Happy analyzing! 📊