# Getting Started with Statistics for Data Scientists

This notebook helps you verify your environment setup and introduces basic libraries.

## 1. Import Core Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings

# Configure display settings
warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)
pd.set_option('display.max_columns', None)
pd.set_option('display.precision', 3)

print("✅ All libraries imported successfully!")
print(f"NumPy version: {np.__version__}")
print(f"Pandas version: {pd.__version__}")

## 2. Quick Test - Generate Sample Data

In [None]:
# Generate random data
np.random.seed(42)
data = np.random.normal(loc=100, scale=15, size=1000)

# Basic statistics
print("Sample Statistics:")
print(f"Mean: {np.mean(data):.2f}")
print(f"Median: {np.median(data):.2f}")
print(f"Std Dev: {np.std(data):.2f}")
print(f"Min: {np.min(data):.2f}")
print(f"Max: {np.max(data):.2f}")

## 3. Visualization Test

In [None]:
# Create a histogram
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.hist(data, bins=30, edgecolor='black', alpha=0.7)
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram of Sample Data')

plt.subplot(1, 2, 2)
sns.boxplot(y=data)
plt.ylabel('Value')
plt.title('Box Plot of Sample Data')

plt.tight_layout()
plt.show()

print("✅ Visualization libraries working correctly!")

## 4. Create Sample DataFrame

In [None]:
# Create a sample dataset
df = pd.DataFrame({
    'A': np.random.randn(100),
    'B': np.random.randn(100),
    'C': np.random.choice(['Category1', 'Category2', 'Category3'], 100),
    'D': np.random.randint(1, 100, 100)
})

print("DataFrame Info:")
print(df.info())
print("\nFirst few rows:")
display(df.head())
print("\nDescriptive Statistics:")
display(df.describe())

## 5. Statistical Test Example

In [None]:
# Simple t-test example
sample1 = np.random.normal(100, 15, 100)
sample2 = np.random.normal(105, 15, 100)

t_stat, p_value = stats.ttest_ind(sample1, sample2)

print("Independent T-Test:")
print(f"T-statistic: {t_stat:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"\nConclusion (α=0.05): {'Reject H0' if p_value < 0.05 else 'Fail to reject H0'}")

## ✅ Environment Check Complete!

If all cells above ran successfully, your environment is ready for statistics practice.

### Next Steps:
1. Create a new notebook for each chapter
2. Follow along with book examples
3. Practice with exercises
4. Experiment with your own datasets