In [None]:
import numpy as np

# 1. Data Loading & Metadata Inspection 
# Load the dataset while skipping the header row
numpy_array = np.genfromtxt('mnist_test.csv', delimiter=',', skip_header=1)

# Inspect the fundamental properties of the ndarray
print(f"Data Type: {type(numpy_array)}")
print(f"Array Shape (Rows, Cols): {numpy_array.shape}")
print(f"Dimensions: {numpy_array.ndim}")
print(f"Data Type of elements: {numpy_array.dtype}")

# 2. Feature-Target Splitting 
# y: The 'Target' (the actual digit 0-9) located in the first column
y = numpy_array[:, 0]
print('Labels (y):', y)

# x: The 'Features' (pixel values) located in all columns after the first
x = numpy_array[:, 1:]
print('Pixel Data (x):', x)

# 3. Image Reconstruction (Reshaping) 
# MNIST images are flattened into 784 pixels (28x28). 
# Let's grab the 5th image (index 4) and restore its spatial dimensions.
row_5 = x[4]
resize_pix = row_5.reshape(28, 28)
print(f"Reshaped image dimensions: {resize_pix.shape}")

# Slice the first 10 images for batch processing
first_10_images = x[:10] 
print(f"Shape of first 10 images: {first_10_images.shape}")

# 4. Boolean Masking (Filtering)
# Create a mask to identify all rows where the label is '7'
mask_row = (y == 7)
only_seven = x[mask_row] # Apply mask to filter the pixel data
print(f"Number of '7's found: {len(only_seven)}")
print(f"Shape of filtered '7' dataset: {only_seven.shape}")

# 5. Array Stacking (Horizontal vs. Vertical) 
# Generate two synthetic 3x3 matrices for demonstration
stack_1 = np.random.randint(1, 10, size=(3, 3))
stack_2 = np.random.randint(10, 20, size=(3, 3))

# hstack: Combines arrays column-wise (widens the matrix)
hstack = np.hstack((stack_1, stack_2))

# vstack: Combines arrays row-wise (lengthens the matrix)
vstack = np.vstack((stack_1, stack_2))

print("Horizontal Stack (6 columns):\n", hstack)
print("\nVertical Stack (6 rows):\n", vstack)

In [None]:
import numpy as np

#  1. Basic Normalization & Statistics
# Scale pixel values from [0, 255] to [0, 1] for better neural network convergence
x_norm = x.astype('float32') / 255.0

# Calculate the mean across the first axis (per-pixel mean)
x_mean = x.mean(axis=0)
print(f"Shape of mean array: {x_mean.shape}")

# Calculate global standard deviation
x_sd = x.std()
print(f"Global Standard Deviation: {x_sd}")

#  2. Vector Operations 
# Select two specific images and compute their dot product 
# Note: Higher dot products can indicate higher similarity between flattened vectors
random_1 = x_norm[44]
random_2 = x_norm[88]
result = np.dot(random_1, random_2)
print(f'Dot Product of two random images: {result:.4f}')

#  3. Data Loading & Standardization 
# Load MNIST test data; skip header to avoid string-to-float errors
np_array = np.genfromtxt('mnist_test.csv', delimiter=',', skip_header=1)

# Split data into labels (first column) and image features (remaining columns)
y_value = np_array[:, 0]
x_value = np_array[:, 1:]

# Calculate global statistics for the loaded dataset
x_value_mean = x_value.mean()
x_value_std = x_value.std()

# Standardize the data (Z-score normalization)
# Formula: z = (x - μ) / σ
z = (x_value - x_value_mean) / x_value_std
print("Standardized Data (Z-score):")
print(z)

#  4. Image Masking (Binarization) 
# Create a binary mask where pixels > 120 are '1' (white) and others are '0' (black)
masking = x_value > 120
binary_image = np.where(masking, 1, 0)
print("Binary Mask Result:")
print(binary_image)

#  5. Random Array Manipulation 
# Generate a 5x10 matrix of random integers between 1 and 9
rand_1 = np.random.randint(1, 10, size=(5, 10))
print("Random Matrix:")
print(rand_1)

# Identify the index of the maximum value in each row
max_idx = rand_1.argmax(axis=1)
print(f"Indices of row-wise maximums: {max_idx}")

In [None]:
import numpy as np

# Create matrix A (5 rows, 3 columns)
matrix_a = np.random.randint(1, 10, size=(5, 3))
# Create matrix B (3 rows, 2 columns)
matrix_b = np.random.randint(1, 10, size=(3, 2))

# Matrix Multiplication using dot product
# The inner dimensions (3) must match
matrix = np.dot(matrix_a, matrix_b)

# Alternative, more modern syntax using the '@' operator
mat = matrix_a @ matrix_b

print(f"Shape of A: {matrix_a.shape}")
print(f"Shape of B: {matrix_b.shape}")
print(f"Resulting Shape: {matrix.shape}")


In [None]:
import pandas as pd
import numpy as np

# 1. Feature Extraction 
# Calculate the total 'ink' for each image by summing pixel values across the row (axis 1)
# Since x_norm is scaled [0, 1], this represents the total active area of the digit.
pix = x_norm.sum(axis=1)
print(f"Ink vector shape: {pix.shape}")

# 2. Data Integration 
# Create a Pandas DataFrame to link the labels (y) with our new 'Ink_Amount' feature
df = pd.DataFrame({
    'Digit': y, 
    'Ink_Amount': pix
})

# 3. Statistical Aggregation 
# Use the 'Split-Apply-Combine' strategy:
# 1. Split the data by Digit
# 2. Apply the mean function to Ink_Amount
# 3. Combine results into a new Series
avg_ink_per_digit = df.groupby('Digit')['Ink_Amount'].mean()

print("\n--- Average Ink Amount Per Digit ---")
print(avg_ink_per_digit)

# 4. Insights & Results 
# Identify which digit requires the most 'ink' on average
most_ink_digit = avg_ink_per_digit.idxmax()
max_ink_value = avg_ink_per_digit.max()

print(f"\nAnalysis Result: Digit '{most_ink_digit}' uses the most ink (Average: {max_ink_value:.2f} units).")

In [None]:
import pandas as pd
import numpy as np

# 1. Data Loading 
# Load the cleaned Titanic dataset
df = pd.read_csv('Titanic-Dataset-Cleaned.csv')

# 2. Feature Selection
# Extract 'Age' and 'Fare' columns to see if older passengers bought more expensive tickets
age_fare = df[['Age', 'Fare']]

# Convert the Pandas DataFrame to a NumPy array for mathematical operations
np_arr = np.array(age_fare)

# 3. Correlation Calculation 
# Compute the Pearson product-moment correlation coefficients
# rowvar=False indicates that each column represents a variable
correlation_matrix = np.corrcoef(np_arr, rowvar=False)

# Extract the specific correlation between Age (index 0) and Fare (index 1)
correlation_value = correlation_matrix[0, 1]

print("Correlation Matrix:\n", correlation_matrix)
print(f"\nPearson Correlation (Age vs. Fare): {correlation_value:.4f}")