In [None]:
# NumPy Tutorial: Numerical Computing in Python

NumPy (Numerical Python) is the fundamental package for scientific computing in Python. It provides:
- A powerful N-dimensional array object
- Sophisticated broadcasting functions
- Tools for integrating C/C++ and Fortran code
- Useful linear algebra, Fourier transform, and random number capabilities

Let's explore NumPy step by step!

Hello World


In [1]:
# 1. Importing NumPy
import numpy as np
import matplotlib.pyplot as plt

print("NumPy version:", np.__version__)
print("Hello NumPy World!")

NumPy version: 2.2.6
Hello NumPy World!


## 2. Creating NumPy Arrays

NumPy arrays are more efficient than Python lists for numerical operations. Here are various ways to create them:

In [3]:
# Creating arrays from lists
import numpy as idighs
arr1 = idighs.array([1, 2, 3, 4, 5])
print("1D array from list:", arr1)
print("Type:", type(arr1))
print("Data type:", arr1.dtype)

# 2D array
arr2 = idighs.array([[1, 2, 3], [4, 5, 6]])
print("\n2D array:")
print(arr2)
print("Shape:", arr2.shape)
print("Dimensions:", arr2.ndim)

# Array with specific data type
arr3 = idighs.array([1, 2, 3], dtype=np.float32)
print("\nFloat32 array:", arr3)
print("Data type:", arr3.dtype)

1D array from list: [1 2 3 4 5]
Type: <class 'numpy.ndarray'>
Data type: int64

2D array:
[[1 2 3]
 [4 5 6]]
Shape: (2, 3)
Dimensions: 2

Float32 array: [1. 2. 3.]
Data type: float32


In [None]:
# Built-in array creation functions
zeros = np.zeros((3, 4))
print("Zeros array (3x4):")
print(zeros)

ones = np.ones((2, 3))
print("\nOnes array (2x3):")
print(ones)

# Array filled with a specific value
full = np.full((2, 3), 7)
print("\nArray filled with 7:")
print(full)

# Identity matrix
identity = np.eye(3)
print("\nIdentity matrix (3x3):")
print(identity)

# Range arrays
range_arr = np.arange(0, 10, 2)  # start, stop, step
print("\nRange array (0 to 10, step 2):", range_arr)

# Linearly spaced values
linspace_arr = np.linspace(0, 1, 5)  # start, stop, number of points
print("Linearly spaced (0 to 1, 5 points):", linspace_arr)

## 3. Array Operations

NumPy arrays support vectorized operations, which are much faster than Python loops:

In [None]:
# Arithmetic operations
arr_a = np.array([1, 2, 3, 4])
arr_b = np.array([5, 6, 7, 8])

print("Array A:", arr_a)
print("Array B:", arr_b)

# Element-wise operations
print("\nAddition:", arr_a + arr_b)
print("Subtraction:", arr_a - arr_b)
print("Multiplication:", arr_a * arr_b)
print("Division:", arr_a / arr_b)
print("Power:", arr_a ** 2)

# Operations with scalars
print("\nScalar operations:")
print("A + 10:", arr_a + 10)
print("A * 2:", arr_a * 2)

# Mathematical functions
print("\nMath functions:")
print("Square root:", np.sqrt(arr_a))
print("Exponential:", np.exp(arr_a))
print("Sine:", np.sin(arr_a))
print("Natural log:", np.log(arr_a))

## 4. Array Indexing and Slicing

Access and modify array elements using indexing and slicing:

In [None]:
# 1D array indexing
arr = np.array([10, 20, 30, 40, 50])
print("Original array:", arr)
print("First element (index 0):", arr[0])
print("Last element (index -1):", arr[-1])
print("Elements 1 to 3:", arr[1:4])
print("Every second element:", arr[::2])

# 2D array indexing
arr_2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print("\n2D array:")
print(arr_2d)
print("Element at row 1, column 2:", arr_2d[1, 2])
print("First row:", arr_2d[0, :])
print("Second column:", arr_2d[:, 1])
print("Subarray (first 2 rows, first 2 columns):")
print(arr_2d[:2, :2])

# Boolean indexing
arr = np.array([1, 2, 3, 4, 5, 6])
mask = arr > 3
print("\nBoolean indexing:")
print("Original array:", arr)
print("Mask (elements > 3):", mask)
print("Elements > 3:", arr[mask])
print("Elements > 3 (direct):", arr[arr > 3])

## 5. Array Statistics and Aggregation

NumPy provides many functions for statistical analysis:

In [None]:
# Create a sample array
data = np.array([1, 5, 3, 8, 2, 7, 4, 6, 9, 1])
print("Data:", data)

# Basic statistics
print("\nBasic Statistics:")
print("Sum:", np.sum(data))
print("Mean:", np.mean(data))
print("Median:", np.median(data))
print("Standard deviation:", np.std(data))
print("Variance:", np.var(data))
print("Minimum:", np.min(data))
print("Maximum:", np.max(data))

# Index of min/max
print("\nIndices:")
print("Index of minimum:", np.argmin(data))
print("Index of maximum:", np.argmax(data))

# 2D array statistics
data_2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print("\n2D array:")
print(data_2d)
print("Sum along axis 0 (columns):", np.sum(data_2d, axis=0))
print("Sum along axis 1 (rows):", np.sum(data_2d, axis=1))
print("Mean along axis 0:", np.mean(data_2d, axis=0))

# Cumulative operations
print("\nCumulative operations:")
print("Original data:", data[:5])  # First 5 elements
print("Cumulative sum:", np.cumsum(data[:5]))
print("Cumulative product:", np.cumprod(data[:5]))

## 6. Array Reshaping and Manipulation

Change the shape and structure of arrays:

In [None]:
# Reshaping arrays
arr = np.arange(12)
print("Original array:", arr)
print("Shape:", arr.shape)

# Reshape to 2D
reshaped = arr.reshape(3, 4)
print("\nReshaped to 3x4:")
print(reshaped)

# Reshape to 3D
reshaped_3d = arr.reshape(2, 2, 3)
print("\nReshaped to 2x2x3:")
print(reshaped_3d)

# Flatten array
flattened = reshaped.flatten()
print("\nFlattened:", flattened)

# Transpose
print("\nTranspose of 3x4 array:")
print(reshaped.T)

# Stack arrays
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])
print("\nArray 1:", arr1)
print("Array 2:", arr2)

# Vertical stack
vstack = np.vstack([arr1, arr2])
print("Vertical stack:")
print(vstack)

# Horizontal stack
hstack = np.hstack([arr1, arr2])
print("Horizontal stack:", hstack)

# Split arrays
print("\nSplitting arrays:")
arr = np.array([1, 2, 3, 4, 5, 6])
split_arr = np.split(arr, 3)  # Split into 3 parts
print("Original:", arr)
print("Split into 3 parts:", split_arr)

## 7. Random Numbers

NumPy provides powerful random number generation capabilities:

In [None]:
# Set seed for reproducibility
np.random.seed(42)

# Random integers
print("Random integers (0-9):", np.random.randint(0, 10, size=5))

# Random floats
print("Random floats (0-1):", np.random.random(5))
print("Random floats (uniform distribution):", np.random.uniform(0, 10, 5))

# Normal distribution
normal_data = np.random.normal(0, 1, 1000)  # mean=0, std=1, size=1000
print(f"Normal distribution sample (size 1000):")
print(f"Mean: {np.mean(normal_data):.3f}")
print(f"Std: {np.std(normal_data):.3f}")

# Random choice
choices = np.array(['apple', 'banana', 'orange', 'grape'])
random_choice = np.random.choice(choices, size=3)
print(f"Random choices: {random_choice}")

# Shuffle array
arr = np.arange(10)
print(f"Original array: {arr}")
np.random.shuffle(arr)
print(f"Shuffled array: {arr}")

# Random sample from different distributions
print("\nDifferent distributions:")
exponential = np.random.exponential(2, 5)
print(f"Exponential: {exponential}")

binomial = np.random.binomial(10, 0.5, 5)
print(f"Binomial: {binomial}")

poisson = np.random.poisson(3, 5)
print(f"Poisson: {poisson}")

## 8. Practical Example: Data Analysis

Let's apply NumPy to analyze some simulated sales data:

In [None]:
# Simulate sales data for 4 quarters, 3 products
np.random.seed(42)

# Create sales data (rows = quarters, columns = products)
quarters = ['Q1', 'Q2', 'Q3', 'Q4']
products = ['Product A', 'Product B', 'Product C']

# Generate random sales data
sales_data = np.random.randint(50, 200, size=(4, 3))
print("Sales Data (Quarters x Products):")
print("Quarters:", quarters)
print("Products:", products)
print(sales_data)

# Analysis
print("\n=== SALES ANALYSIS ===")

# Total sales per quarter
quarterly_totals = np.sum(sales_data, axis=1)
print(f"\nQuarterly totals: {quarterly_totals}")
for i, quarter in enumerate(quarters):
    print(f"{quarter}: ${quarterly_totals[i]:,}")

# Total sales per product
product_totals = np.sum(sales_data, axis=0)
print(f"\nProduct totals: {product_totals}")
for i, product in enumerate(products):
    print(f"{product}: ${product_totals[i]:,}")

# Overall statistics
print(f"\nOverall Statistics:")
print(f"Total sales: ${np.sum(sales_data):,}")
print(f"Average quarterly sales: ${np.mean(quarterly_totals):,.2f}")
print(f"Best quarter: {quarters[np.argmax(quarterly_totals)]} (${np.max(quarterly_totals):,})")
print(f"Best product: {products[np.argmax(product_totals)]} (${np.max(product_totals):,})")

# Growth analysis (Q4 vs Q1)
growth_rates = (sales_data[3] - sales_data[0]) / sales_data[0] * 100
print(f"\nGrowth from Q1 to Q4:")
for i, product in enumerate(products):
    print(f"{product}: {growth_rates[i]:+.1f}%")

# Find quarters where any product exceeded $150
high_sales_mask = sales_data > 150
high_sales_quarters, high_sales_products = np.where(high_sales_mask)
print(f"\nHigh sales (>$150) occurrences:")
for q, p in zip(high_sales_quarters, high_sales_products):
    print(f"{quarters[q]} - {products[p]}: ${sales_data[q, p]}")

## 9. Visualization with NumPy and Matplotlib

Let's create some visualizations to better understand our data:

In [None]:
# Create visualizations
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(12, 10))

# 1. Bar chart of quarterly totals
ax1.bar(quarters, quarterly_totals, color=['skyblue', 'lightgreen', 'lightcoral', 'lightyellow'])
ax1.set_title('Total Sales by Quarter')
ax1.set_ylabel('Sales ($)')
for i, v in enumerate(quarterly_totals):
    ax1.text(i, v + 5, f'${v}', ha='center')

# 2. Bar chart of product totals
ax2.bar(products, product_totals, color=['lightblue', 'lightgreen', 'lightpink'])
ax2.set_title('Total Sales by Product')
ax2.set_ylabel('Sales ($)')
ax2.tick_params(axis='x', rotation=45)
for i, v in enumerate(product_totals):
    ax2.text(i, v + 10, f'${v}', ha='center')

# 3. Line plot showing trends
for i, product in enumerate(products):
    ax3.plot(quarters, sales_data[:, i], marker='o', label=product, linewidth=2)
ax3.set_title('Sales Trends by Quarter')
ax3.set_ylabel('Sales ($)')
ax3.legend()
ax3.grid(True, alpha=0.3)

# 4. Heatmap of sales data
im = ax4.imshow(sales_data, cmap='YlOrRd', aspect='auto')
ax4.set_title('Sales Heatmap')
ax4.set_xticks(range(len(products)))
ax4.set_xticklabels(products)
ax4.set_yticks(range(len(quarters)))
ax4.set_yticklabels(quarters)

# Add values to heatmap
for i in range(len(quarters)):
    for j in range(len(products)):
        text = ax4.text(j, i, f'${sales_data[i, j]}', 
                       ha="center", va="center", color="black", fontweight='bold')

plt.tight_layout()
plt.show()

# Mathematical functions visualization
x = np.linspace(-2*np.pi, 2*np.pi, 100)
y1 = np.sin(x)
y2 = np.cos(x)
y3 = np.tan(x)

plt.figure(figsize=(10, 6))
plt.plot(x, y1, label='sin(x)', linewidth=2)
plt.plot(x, y2, label='cos(x)', linewidth=2)
plt.plot(x, y3, label='tan(x)', linewidth=2)
plt.title('Trigonometric Functions')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.grid(True, alpha=0.3)
plt.ylim(-3, 3)
plt.show()

## 10. Key NumPy Takeaways

### Why NumPy is Essential:

1. **Performance**: NumPy operations are implemented in C and are much faster than pure Python
2. **Memory Efficiency**: NumPy arrays use less memory than Python lists
3. **Vectorization**: Perform operations on entire arrays without writing loops
4. **Broadcasting**: Perform operations on arrays of different shapes
5. **Ecosystem**: Foundation for pandas, scikit-learn, matplotlib, and other scientific Python libraries

### Common Use Cases:
- **Data Analysis**: Processing large datasets efficiently
- **Scientific Computing**: Mathematical operations and simulations
- **Machine Learning**: Feature engineering and data preprocessing
- **Image Processing**: Manipulating pixel arrays
- **Financial Analysis**: Time series analysis and statistical calculations

### Best Practices:
- Always use vectorized operations instead of Python loops
- Use appropriate data types to save memory
- Take advantage of broadcasting for operations on different-shaped arrays
- Use boolean indexing for filtering data
- Leverage NumPy's statistical functions for data analysis

### Next Steps:
- Explore **pandas** for more advanced data manipulation
- Learn **matplotlib** and **seaborn** for data visualization
- Study **scikit-learn** for machine learning with NumPy arrays
- Practice with real datasets to solidify your understanding