# NumPy Fundamentals - Part 4a

## Week 2, Day 1 (Wednesday) - April 16th, 2025

### Overview
This is a continuation of Part 4, focusing on practical applications of NumPy for data analysis.

In [None]:
# Import NumPy
import numpy as np

## 2. Practical Applications

Let's explore some practical applications of NumPy for data analysis.

### 2.1 Image Processing

In image processing, images are represented as multi-dimensional arrays. Let's create a simple example of image manipulation:

In [None]:
# Create a simple 5x5 grayscale image (values 0-255)
image = np.array([
    [50, 50, 50, 50, 50],
    [50, 100, 100, 100, 50],
    [50, 100, 200, 100, 50],
    [50, 100, 100, 100, 50],
    [50, 50, 50, 50, 50]
])

print("Original image:")
print(image)

# Increase brightness (add 50 to each pixel)
brighter = image + 50
# Ensure values stay in the valid range (0-255)
brighter = np.clip(brighter, 0, 255)

print("
Brighter image:")
print(brighter)

# Invert the image (255 - pixel value)
inverted = 255 - image
print("
Inverted image:")
print(inverted)

# Apply a simple blur using a 3x3 mean filter
def apply_mean_filter(image):
    # Create output image with same shape
    result = np.zeros_like(image)
    rows, cols = image.shape
    
    # For each pixel (excluding borders)
    for i in range(1, rows-1):
        for j in range(1, cols-1):
            # 3x3 neighborhood
            neighborhood = image[i-1:i+2, j-1:j+2]
            # Mean value
            result[i, j] = np.mean(neighborhood)
    
    return result

blurred = apply_mean_filter(image)
print("
Blurred image:")
print(blurred.astype(int))  # Convert to int for cleaner display

### Vectorized Image Processing

We can make our image processing functions more efficient using vectorization. Let's implement a vectorized version of our blur filter using NumPy's built-in functions:

In [None]:
# Create a larger test image (20x20) with a simple pattern
large_image = np.zeros((20, 20))
large_image[5:15, 5:15] = 200  # Create a white square in the middle
large_image[8:12, 8:12] = 50   # Create a gray square inside

# Add some random noise
np.random.seed(42)
noise = np.random.normal(0, 20, large_image.shape)
noisy_image = np.clip(large_image + noise, 0, 255)

# Define a vectorized version of the mean filter using NumPy's 2D convolution
def mean_filter_vectorized(image, kernel_size=3):
    # Create output image
    result = np.zeros_like(image)
    
    # Calculate padding size
    pad = kernel_size // 2
    
    # Pad the image
    padded_image = np.pad(image, pad, mode='constant')
    
    # Vectorized implementation using sliding window
    rows, cols = image.shape
    for i in range(rows):
        for j in range(cols):
            # Extract the neighborhood using slicing (vectorized operation)
            neighborhood = padded_image[i:i+kernel_size, j:j+kernel_size]
            # Calculate mean (vectorized operation)
            result[i, j] = np.mean(neighborhood)
    
    return result

# Apply the filter to the noisy image
filtered_image = mean_filter_vectorized(noisy_image)

# Print a small section of the images to see the effect
section = (slice(8, 13), slice(8, 13))  # 5x5 section in the middle
print("Original image section:")
print(large_image[section].astype(int))
print("\nNoisy image section:")
print(noisy_image[section].astype(int))
print("\nFiltered image section:")
print(filtered_image[section].astype(int))

### More Advanced Image Operations

NumPy can be used for more advanced image processing techniques:

In [None]:
# Edge detection using a Sobel filter
def sobel_edge_detection(image):
    # Sobel operators for x and y directions
    sobel_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])
    sobel_y = np.array([[-1, -2, -1], [0, 0, 0], [1, 2, 1]])
    
    # Result images
    edges_x = np.zeros_like(image)
    edges_y = np.zeros_like(image)
    
    # Pad the image
    padded_image = np.pad(image, 1, mode='constant')
    
    # Apply Sobel operators
    rows, cols = image.shape
    for i in range(rows):
        for j in range(cols):
            # Extract 3x3 neighborhood
            neighborhood = padded_image[i:i+3, j:j+3]
            # Apply convolution
            edges_x[i, j] = np.sum(neighborhood * sobel_x)
            edges_y[i, j] = np.sum(neighborhood * sobel_y)
    
    # Calculate magnitude of the gradient
    edges_magnitude = np.sqrt(edges_x**2 + edges_y**2)
    
    # Normalize to 0-255 range
    edges_magnitude = edges_magnitude / np.max(edges_magnitude) * 255
    
    return edges_magnitude

# Apply edge detection to our image
edges = sobel_edge_detection(large_image)

# Print a small section of the edge-detected image
print("Edge detection result (section):")
print(edges[section].astype(int))

### 2.2 Financial Data Analysis

NumPy is widely used in financial data analysis. Let's create a simple example of stock price analysis:

In [None]:
# Generate random stock prices for 5 days
np.random.seed(42)  # For reproducibility
days = 252  # Trading days in a year
stocks = 5   # Number of stocks

# Starting prices
start_prices = np.array([100, 200, 50, 75, 300])

# Daily returns (% change) - normal distribution with mean 0.0005 (0.05%) and std 0.01 (1%)
daily_returns = np.random.normal(0.0005, 0.01, (days, stocks))

# Calculate price series
# Each day's price = previous day's price * (1 + daily return)
price_series = np.zeros((days, stocks))
price_series[0] = start_prices

for day in range(1, days):
    price_series[day] = price_series[day-1] * (1 + daily_returns[day])

# Print first 5 days of prices
print("First 5 days of stock prices:")
print(price_series[:5])

# Calculate summary statistics
final_prices = price_series[-1]
overall_returns = (final_prices - start_prices) / start_prices * 100

print("\nSummary Statistics:")
print(f"Starting prices: {start_prices}")
print(f"Final prices: {final_prices}")
print(f"Overall returns (%): {overall_returns}")

# Calculate daily returns from prices
calculated_returns = np.diff(price_series, axis=0) / price_series[:-1] * 100

# Calculate volatility (standard deviation of returns)
volatility = np.std(calculated_returns, axis=0)
print(f"\nVolatility (std of daily returns %): {volatility}")

# Find the best and worst performing stock
best_stock = np.argmax(overall_returns)
worst_stock = np.argmin(overall_returns)

print(f"\nBest performing stock: Stock {best_stock+1} with {overall_returns[best_stock]:.2f}% return")
print(f"Worst performing stock: Stock {worst_stock+1} with {overall_returns[worst_stock]:.2f}% return")

# Calculate correlation matrix between stocks
correlation_matrix = np.corrcoef(calculated_returns.T)
print("\nCorrelation matrix between stocks:")
print(correlation_matrix)