# numpy math operations

In [2]:
a = [1, 2, 3]
b = [4, 5, 6]
c = a + b
print(c)   # does a concatenation and not row addition 

[1, 2, 3, 4, 5, 6]


In [13]:
import numpy as np
a = np.array ([1, 2, 3])
b = np.array ([4, 5, 6])

print(a + b)
print(a - b)
print(a * b)
print(a / b)
print(b // a) # integer division
print(a > b)
print(b > a)
print(a == b)

print(np.mod (a, b))
print(np.power(a, b))

[5 7 9]
[-3 -3 -3]
[ 4 10 18]
[0.25 0.4  0.5 ]
[4 2 2]
[False False False]
[ True  True  True]
[False False False]
[1 2 3]
[  1  32 729]


In [None]:
# Vectorized  Calculations with Logical Operators

'''

In NumPy, vectorized logical AND and OR operations are performed element-wise on boolean arrays (or arrays that can be implicitly converted to boolean, where 0 is False and non-zero is True). They are crucial for creating powerful boolean masks and filtering data.

Unlike Python's built-in and and or keywords, which perform short-circuiting logical operations on scalar boolean values (or objects that can be evaluated for truthiness), NumPy's vectorized operations are designed for arrays and apply the logic to each corresponding element of the input arrays.

1. Logical AND
NumPy provides two main ways to perform element-wise logical AND:

np.logical_and(array1, array2): This is the explicit function for logical AND.
& (bitwise AND operator): When used with NumPy arrays, especially boolean arrays, & is overloaded to perform element-wise logical AND. This is generally the preferred and more concise syntax for boolean arrays.
Example:

Python

import numpy as np

# Example 1: Operating on boolean arrays
a = np.array([True, False, True, False])
b = np.array([True, True, False, False])

# Using np.logical_and()
result_and_func = np.logical_and(a, b)
print("np.logical_and(a, b):", result_and_func)
# Output: [ True False False False]

# Using the & operator (most common for boolean arrays)
result_and_op = a & b
print("a & b:", result_and_op)
# Output: [ True False False False]

# Example 2: Combining comparison operations (common use case)
data = np.array([10, 25, 5, 40, 15, 30])

# Find elements greater than 10 AND less than 30
condition1 = data > 10
condition2 = data < 30

# Logical AND of the boolean masks
filtered_data = data[condition1 & condition2]
print("\nOriginal data:", data)
print("Condition 1 (data > 10):", condition1)
print("Condition 2 (data < 30):", condition2)
print("Combined condition (data > 10 AND data < 30):", condition1 & condition2)
print("Filtered data:", filtered_data)
# Output: [25 15]

# Example 3: Logical AND with non-boolean arrays (0 is False, non-zero is True)
x = np.array([0, 1, 0, 5])
y = np.array([1, 0, 7, 0])

result_non_bool_and = np.logical_and(x, y)
print("\nnp.logical_and([0,1,0,5], [1,0,7,0]):", result_non_bool_and)
# Output: [False False True False]
# (0 and 1) -> False
# (1 and 0) -> False
# (0 and 7) -> True (because 7 is True)
# (5 and 0) -> False
2. Logical OR
Similarly, for element-wise logical OR:

np.logical_or(array1, array2): The explicit function.
| (bitwise OR operator): Overloaded for element-wise logical OR on NumPy arrays. This is also the preferred and more concise syntax.
Example:

Python

import numpy as np

# Example 1: Operating on boolean arrays
a = np.array([True, False, True, False])
b = np.array([True, True, False, False])

# Using np.logical_or()
result_or_func = np.logical_or(a, b)
print("np.logical_or(a, b):", result_or_func)
# Output: [ True True True False]

# Using the | operator (most common for boolean arrays)
result_or_op = a | b
print("a | b:", result_or_op)
# Output: [ True True True False]

# Example 2: Combining conditions
salaries = np.array([50000, 75000, 30000, 100000, 45000])

# Find salaries less than 40000 OR greater than 90000
condition1 = salaries < 40000
condition2 = salaries > 90000

# Logical OR of the boolean masks
filtered_salaries = salaries[condition1 | condition2]
print("\nOriginal salaries:", salaries)
print("Condition 1 (salaries < 40000):", condition1)
print("Condition 2 (salaries > 90000):", condition2)
print("Combined condition (salaries < 40000 OR salaries > 90000):", condition1 | condition2)
print("Filtered salaries:", filtered_salaries)
# Output: [30000 100000]

# Example 3: Logical OR with non-boolean arrays
x = np.array([0, 1, 0, 5])
y = np.array([1, 0, 7, 0])

result_non_bool_or = np.logical_or(x, y)
print("\nnp.logical_or([0,1,0,5], [1,0,7,0]):", result_non_bool_or)
# Output: [ True True True True]
# (0 or 1) -> True
# (1 or 0) -> True
# (0 or 7) -> True
# (5 or 0) -> True
Important Considerations:
Python's and / or vs. NumPy's & / |:

Python's and and or are short-circuiting logical operators. They evaluate expressions from left to right and stop as soon as the result is determined. They also return the value of the last evaluated operand, not necessarily a boolean True/False. They are not designed for element-wise operations on arrays.
NumPy's & and | are bitwise operators that are overloaded for boolean arrays to perform element-wise logical operations. They do not short-circuit. They evaluate the condition for every single element in the arrays.
Crucially: You cannot use and or or directly with NumPy arrays in a boolean context (e.g., if array_a and array_b: or array_a or array_b). NumPy will raise a ValueError because the truth value of an array with more than one element is ambiguous. You must use np.all() or np.any() for array-wide truthiness checks, or &/|/np.logical_and/np.logical_or for element-wise operations.
Precedence: The & and | operators have higher precedence than comparison operators (>, <, ==, etc.). This means you often need to use parentheses to ensure the comparison operations are performed first.

data > 10 & data < 30 (Incorrect: 10 & data would be evaluated first)
(data > 10) & (data < 30) (Correct: comparisons are done first, then logical AND)
These vectorized logical operations are fundamental for creating complex boolean masks for data filtering, conditional assignment, and various other data manipulation tasks in NumPy, all with C-speed efficiency.'



'''

In [None]:
# VECTORISED BIT WISE OPERATIONS WITH NUMPY

'''

NumPy, in addition to its vectorized arithmetic and logical operations, also provides vectorized bitwise operations. These operations work on the individual bits of the binary representation of integer (and sometimes boolean) array elements. They are performed element-wise across arrays, just like arithmetic operations.

Bitwise operations are generally used for:

Low-level data manipulation.
Working with flags or masks at the bit level.
Optimized calculations in specific algorithms (though less common in general data science).
The bitwise operators in Python (&, |, ^, ~, <<, >>) are overloaded in NumPy to apply element-wise when used with NumPy arrays.

Let's look at the main bitwise operations:

1. Bitwise AND (&)
Performs a bitwise AND operation on corresponding bits of two arrays. A bit is set to 1 if both corresponding bits are 1.

Python

import numpy as np

a = np.array([5, 6, 12, 13], dtype=np.int8)  # Binary: [0101, 0110, 1100, 1101]
b = np.array([3, 7, 10, 15], dtype=np.int8)  # Binary: [0011, 0111, 1010, 1111]

#   5 (0101) & 3 (0011) = 1 (0001)
#   6 (0110) & 7 (0111) = 6 (0110)
#  12 (1100) & 10 (1010) = 8 (1000)
#  13 (1101) & 15 (1111) = 13 (1101)
result_and = a & b
print("a & b:", result_and)
# Output: [ 1  6  8 13]
2. Bitwise OR (|)
Performs a bitwise OR operation. A bit is set to 1 if at least one of the corresponding bits is 1.

Python

import numpy as np

a = np.array([5, 6, 12, 13], dtype=np.int8)  # Binary: [0101, 0110, 1100, 1101]
b = np.array([3, 7, 10, 15], dtype=np.int8)  # Binary: [0011, 0111, 1010, 1111]

#   5 (0101) | 3 (0011) = 7 (0111)
#   6 (0110) | 7 (0111) = 7 (0111)
#  12 (1100) | 10 (1010) = 14 (1110)
#  13 (1101) | 15 (1111) = 15 (1111)
result_or = a | b
print("a | b:", result_or)
# Output: [ 7  7 14 15]
3. Bitwise XOR (^)
Performs a bitwise XOR (exclusive OR) operation. A bit is set to 1 if the corresponding bits are different (one is 0 and the other is 1).

Python

import numpy as np

a = np.array([5, 6, 12, 13], dtype=np.int8)  # Binary: [0101, 0110, 1100, 1101]
b = np.array([3, 7, 10, 15], dtype=np.int8)  # Binary: [0011, 0111, 1010, 1111]

#   5 (0101) ^ 3 (0011) = 6 (0110)
#   6 (0110) ^ 7 (0111) = 1 (0001)
#  12 (1100) ^ 10 (1010) = 6 (0110)
#  13 (1101) ^ 15 (1111) = 2 (0010)
result_xor = a ^ b
print("a ^ b:", result_xor)
# Output: [6 1 6 2]
4. Bitwise NOT (~)
Performs a bitwise NOT (inversion) operation. It flips all the bits (0 becomes 1, and 1 becomes 0). For signed integers, this typically means the two's complement, which can lead to unexpected negative numbers if you're not careful about dtype and bit representation.

Python

import numpy as np

# For positive integers, ~x is equivalent to -(x + 1) in two's complement.
# Let's use a smaller dtype for clarity in binary representation if possible,
# but remember Python's default int handling vs. fixed-size NumPy dtypes.

x = np.array([5, -5], dtype=np.int8) # 8-bit signed integer
print("Original x:", x)

# 5 (int8) -> 00000101
# ~5       -> 11111010 (two's complement for -6)
# -5 (int8) -> 11111011 (two's complement for -5)
# ~(-5)    -> 00000100 (two's complement for 4)

result_not = ~x
print("~x:", result_not)
# Output: [-6  4]
Important for ~: The behavior of ~ depends heavily on the integer's data type (signed vs. unsigned, and number of bits). For example, ~0 in a 32-bit signed integer would be -1.

5. Left Shift (<<)
Shifts the bits of each element to the left by a specified number of positions. Equivalent to multiplying by powers of 2.

Python

import numpy as np

arr = np.array([1, 2, 3], dtype=np.int8)
shift_by = 2

# 1 (0001) << 2 = 4 (0100)
# 2 (0010) << 2 = 8 (1000)
# 3 (0011) << 2 = 12 (1100)
result_lshift = arr << shift_by
print("arr << 2:", result_lshift)
# Output: [ 4  8 12]
6. Right Shift (>>)
Shifts the bits of each element to the right by a specified number of positions. Equivalent to integer division by powers of 2.

Python

import numpy as np

arr = np.array([8, 12, 15], dtype=np.int8)
shift_by = 2

#  8 (1000) >> 2 = 2 (0010)
# 12 (1100) >> 2 = 3 (0011)
# 15 (1111) >> 2 = 3 (0011)
result_rshift = arr >> shift_by
print("arr >> 2:", result_rshift)
# Output: [2 3 3]
Key Considerations:
Integer Dtypes: Bitwise operations are primarily designed for and make sense with integer data types (int8, uint8, int16, etc.). Using them on floating-point arrays will likely result in a TypeError.
Broadcasting: Like other NumPy operations, bitwise operations support broadcasting, allowing you to perform operations between arrays of different (but compatible) shapes.
Efficiency: Because they are implemented as UFuncs in C, these operations are highly optimized and very fast for large arrays.


'''


In [None]:
# broadcasting : example : convert celsius to fahrenheit


In [None]:
a = np.array([1, 2, 3, 4])
print(np.sqrt(a))
print(np.exp(a))  # exponential 
print(np.log(a))
print(np.sin(a))

[1.         1.41421356 1.73205081 2.        ]
[ 2.71828183  7.3890561  20.08553692 54.59815003]
[0.         0.69314718 1.09861229 1.38629436]
[ 0.84147098  0.90929743  0.14112001 -0.7568025 ]


In [17]:
# use case of e
# In ML, we have activation functions
# ex: sigmoid activation function

x = np.array([-1, 0, 1])
sigmoid = 1/(1+ np.exp(-x))
print (sigmoid)



[0.26894142 0.5        0.73105858]


In [18]:
# Aggregation Functions

a = np.array ([[1, 2,3], 
               [4, 5, 6]])

print(np.sum(a))

21


In [None]:
print(np.sum(a, axis = 0)) 
# across all rows, dix the column

[5 7 9]


In [21]:
print(np.sum(a))
print(np.mean(a))
print(np.std(a))
print(np.min(a))
print(np.max(a))


21
3.5
1.707825127659933
1
6


# Matrix Operation

In [22]:
# Element wise multiplication

import numpy as np

# Define your NumPy arrays
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

# Perform element-wise multiplication
result = a * b

print("Array 'a':", a)
print("Array 'b':", b)
print("Result of a * b (element-wise multiplication):", result)


# Use case:  To keep size same 

Array 'a': [1 2 3]
Array 'b': [4 5 6]
Result of a * b (element-wise multiplication): [ 4 10 18]


In [None]:
'''
Explanation:

As you can see from the output, the multiplication a * b does the following:

The first element of a (which is 1) is multiplied by the first element of b (which is 4), resulting in 4.
The second element of a (which is 2) is multiplied by the second element of b (which is 5), resulting in 10.
The third element of a (which is 3) is multiplied by the third element of b (which is 6), resulting in 18.
This is the standard behavior for the * operator when used with NumPy arrays: it performs element-wise operations.

Contrast with Matrix Multiplication (Dot Product)
It's important to distinguish this from matrix multiplication (also known as the dot product or inner product) which is a different operation in linear algebra.

For matrix multiplication, you would use:

The @ operator (preferred for matrices in Python 3.5+)
np.dot(a, b) function
Let's see that for your 1D arrays:
'''

# Dot Product

In [23]:
# Matrix multiplication (dot product) for 1D arrays
dot_product = a @ b
# Or: dot_product = np.dot(a, b)

print("\nResult of a @ b (dot product/inner product):", dot_product)
# Output: 1*4 + 2*5 + 3*6 = 4 + 10 + 18 = 32


Result of a @ b (dot product/inner product): 32


In [None]:

'''


So, to summarize:

a * b (element-wise multiplication): Multiplies corresponding elements.
a @ b (matrix multiplication/dot product): Performs the linear algebra matrix product. For 1D arrays, this is the inner product (sum of element-wise products).


'''

# Broadcasting with numpy array

In [None]:
'''
NumPy broadcasting is a powerful and flexible mechanism for working with arrays of different shapes during arithmetic operations. It allows NumPy to perform operations on arrays that would normally require them to have the exact same shape, often without actually making copies of the data, which makes it very efficient.

The core idea is that, under certain rules, NumPy can "stretch" or "broadcast" the smaller array across the larger array so that they have compatible shapes.

The Broadcasting Rules
NumPy's broadcasting rules are applied dimension by dimension, starting from the trailing dimension (the rightmost one). Two dimensions are compatible when:

They are equal.
One of them is 1.
If neither of these conditions is met, a ValueError is raised (e.g., "operands could not be broadcast together with shapes...").

If an array has fewer dimensions than the other, it's padded with leading ones (1) for comparison.

Visualizing Broadcasting
Imagine the smaller array being "stretched" or "copied" conceptually to match the larger array's shape. NumPy does this efficiently without actually creating multiple copies in memory.

Examples of Broadcasting
Let's illustrate with common scenarios:

1. Scalar and Array:

A scalar (0-dimensional array) can be broadcast to any array.

Python

import numpy as np

arr = np.array([1, 2, 3])
scalar = 10

result = arr + scalar
print("Array + Scalar:")
print(f"  {arr.shape} + scalar -> {result.shape}")
print(f"  {arr} + {scalar} = {result}")
# Output: [11 12 13]
Explanation: The scalar 10 is conceptually stretched to [10, 10, 10] to match arr's shape (3,).
2. 1D Array and 2D Array:

This is where it gets more interesting.

Python

import numpy as np

matrix = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]]) # Shape: (3, 3)

row_vector = np.array([10, 20, 30]) # Shape: (3,)

# How NumPy sees it for broadcasting:
# matrix:    (3, 3)
# row_vector: ( , 3)  <-- '1' is implicitly added to the left
#             (1, 3)

result = matrix + row_vector
print("\nMatrix + Row Vector:")
print(f"  {matrix.shape} + {row_vector.shape} -> {result.shape}")
print(f"  {matrix}\n+ {row_vector}\n=\n{result}")
# Output:
# [[11 22 33]
#  [14 25 36]
#  [17 28 39]]
Explanation: row_vector (3,) is padded to (1, 3).
Rightmost dimension: 3 (from matrix) and 3 (from row_vector) are equal. (Compatible)
Next dimension (left): 3 (from matrix) and 1 (from row_vector) are compatible because one is 1.
So, row_vector is conceptually copied along the first axis (rows) to match the matrix's 3 rows.
3. Column Vector and 2D Array:

To create a column vector, you typically need to explicitly add a new axis.

Python

import numpy as np

matrix = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]]) # Shape: (3, 3)

col_vector = np.array([[10],
                       [20],
                       [30]]) # Shape: (3, 1) - explicit column vector

# How NumPy sees it for broadcasting:
# matrix:     (3, 3)
# col_vector: (3, 1)

result = matrix + col_vector
print("\nMatrix + Column Vector:")
print(f"  {matrix.shape} + {col_vector.shape} -> {result.shape}")
print(f"  {matrix}\n+ {col_vector}\n=\n{result}")
# Output:
# [[11 12 13]
#  [24 25 26]
#  [37 38 39]]
Explanation:
Rightmost dimension: 3 (from matrix) and 1 (from col_vector) are compatible because one is 1. col_vector is stretched across columns.
Next dimension (left): 3 (from matrix) and 3 (from col_vector) are equal. (Compatible)
4. Incompatible Shapes:

Python

import numpy as np

a = np.array([1, 2, 3]) # Shape: (3,)
b = np.array([10, 20])  # Shape: (2,)

try:
    result = a + b
    print(result)
except ValueError as e:
    print(f"\nIncompatible shapes example:\n{e}")
# Output: ValueError: operands could not be broadcast together with shapes (3,) (2,)
Explanation:
Rightmost dimension: 3 (from a) and 2 (from b) are neither equal nor is one of them 1. Broadcasting fails.
5. Broadcasting with np.newaxis / None:

You can explicitly add new dimensions of size 1 using np.newaxis (or its alias None) to facilitate broadcasting.

Python

import numpy as np

row_vec = np.array([1, 2, 3])  # Shape: (3,)
col_vec = np.array([10, 20])   # Shape: (2,)

# Create a column vector from row_vec
# row_vec[:, np.newaxis] has shape (3, 1)
# col_vec[np.newaxis, :] has shape (1, 2)

#   (3, 1)
# + (1, 2)
# --------
# = (3, 2) (resulting shape)
result_cross_add = row_vec[:, np.newaxis] + col_vec[np.newaxis, :]
print("\nCross-addition with newaxis:")
print(f"  {row_vec[:, np.newaxis].shape} + {col_vec[np.newaxis, :].shape} -> {result_cross_add.shape}")
print(f"  {row_vec[:, np.newaxis]}\n+ {col_vec[np.newaxis, :]}\n=\n{result_cross_add}")
# Output:
# [[11 12]
#  [21 22]
#  [31 32]]
Explanation:
row_vec[:, np.newaxis] reshapes (3,) to (3, 1).
col_vec[np.newaxis, :] reshapes (2,) to (1, 2).
Now, apply broadcasting rules:
Rightmost: 1 vs 2. Compatible (1 is present). (1, 2) stretches to (3, 2).
Left: 3 vs 1. Compatible (1 is present). (3, 1) stretches to (3, 2).
Resulting shape (3, 2).
Benefits of Broadcasting:
Code Conciseness: Reduces the need for explicit loops, making code cleaner and easier to read.
Performance: Operations are implemented in optimized C code, avoiding the overhead of Python loops and often preventing the creation of large, temporary arrays that would consume significant memory.
Memory Efficiency: NumPy performs broadcasting by creating "views" or smart indexing schemes, rather than physically copying data into new arrays of the larger shape. This saves memory.
Understanding broadcasting is fundamental to writing efficient and idiomatic NumPy code.
'''

In [24]:
a  = ([1, 2, 3])
b = ([[1, 2], [3, 4]])
c =([4, 5])

# vstack and hstack

In [None]:
a = np.array([[1, 2 ]])
b = np.array([[3, 4 ]])

print(np.vstack((a, b)))
print ("\n")
print(np.hstack((a, b)))

[[1 2]
 [3 4]]


[[1 2 3 4]]


# np.char.add and np.char.multiply

'''

NumPy's np.char module provides a set of vectorized string operations specifically designed to work with arrays of strings (or string-like data types). This is super useful because standard Python string methods (+, *) don't work element-wise on arrays of strings by default.

np.char.add and np.char.multiply are two common functions in this module.

np.char.add(x1, x2): Element-wise String Concatenation
This function performs element-wise string concatenation (joining) of two arrays of strings. It works similarly to the + operator for Python strings, but applied across arrays.

Parameters:

x1, x2: Input arrays (can be strings, unicode, or object arrays).
Example:

Python

import numpy as np

# Create two arrays of strings
names = np.array(['Alice', 'Bob', 'Charlie'])
surnames = np.array(['Smith', 'Johnson', 'Brown'])

# 1. Concatenate elements from two arrays
full_names = np.char.add(names, surnames)
print("Concatenating two arrays (incorrect, missing space):\n", full_names)
# Output: ['AliceSmith' 'BobJohnson' 'CharlieBrown']

# To add a space in between, you can concatenate multiple times
# Or, often easier:
full_names_with_space = np.char.add(np.char.add(names, ' '), surnames)
print("\nConcatenating with a space:\n", full_names_with_space)
# Output: ['Alice Smith' 'Bob Johnson' 'Charlie Brown']

# 2. Concatenate an array with a scalar string
greeting_prefix = np.array(['Hello, ', 'Hi, ', 'Greetings, '])
messages = np.char.add(greeting_prefix, names)
print("\nConcatenating array with scalar string:\n", messages)
# Output: ['Hello, Alice' 'Hi, Bob' 'Greetings, Charlie']

# You can also use broadcasting rules
suffix = '!'
messages_suffix = np.char.add(names, suffix)
print("\nConcatenating array with a suffix:\n", messages_suffix)
# Output: ['Alice!' 'Bob!' 'Charlie!']

# 3. Concatenating 2D arrays
first_parts = np.array([['A', 'B'], ['C', 'D']])
second_parts = np.array([['x', 'y'], ['z', 'w']])
combined_2d = np.char.add(first_parts, second_parts)
print("\nConcatenating 2D arrays:\n", combined_2d)
# Output:
# [['Ax' 'By']
#  ['Cz' 'Dw']]
np.char.multiply(a, i): Element-wise String Repetition
This function performs element-wise string repetition (multiplication). It repeats each string in the input array a i number of times.

Parameters:

a: Input array of strings.
i: An integer or an array of integers representing the number of times to repeat each string.
Example:

Python

import numpy as np

# Create an array of strings
words = np.array(['apple', 'banana', 'cherry'])

# 1. Multiply by a scalar integer
repeated_words_scalar = np.char.multiply(words, 3)
print("Words repeated 3 times (scalar):\n", repeated_words_scalar)
# Output: ['appleappleapple' 'bananabananabanana' 'cherrycherrycherry']

# 2. Multiply by an array of integers (must be broadcastable)
repeat_counts = np.array([1, 2, 4])
repeated_words_array = np.char.multiply(words, repeat_counts)
print("\nWords repeated by array of counts:\n", repeated_words_array)
# Output: ['apple' 'bananabanana' 'cherrycherrycherrycherry']

# Example with a 2D array and a scalar
emoji = np.array([['😊', '😂'], ['👍', '😎']])
repeated_emoji = np.char.multiply(emoji, 2)
print("\nEmojis repeated twice:\n", repeated_emoji)
# Output:
# [['😊😊' '😂😂']
#  ['👍👍' '😎😎']]
Why use np.char functions?
Vectorization: They apply operations element-wise, meaning you don't need to write explicit Python loops. This makes your code more concise and often much faster, especially for large arrays of strings.
Broadcasting: They support NumPy's broadcasting rules, allowing operations between arrays of different (but compatible) shapes.
Convenience: They mirror many standard Python string methods (.lower(), .upper(), .strip(), .split(), etc.) but apply them across entire arrays, simplifying string array manipulation.
'''

# --- np.char.upper() ---# --- np.char.lower() ---# --- np.char.capitalize() ---# --- np.char.title() ---# --- np.char.swapcase() ---

In [None]:



import numpy as np

# Corrected array definition
a = np.array(["HELLo WorLD", "python programming"])

print("Original array 'a':", a)

# --- np.char.upper() ---
# Converts all cased characters in each element to uppercase.
# Mimics Python's str.upper()
upper_case_array = np.char.upper(a)
print("\nnp.char.upper(a):\n", upper_case_array)
# Output: ['HELLO WORLD' 'PYTHON PROGRAMMING']

# --- np.char.lower() ---
# Converts all cased characters in each element to lowercase.
# Mimics Python's str.lower()
lower_case_array = np.char.lower(a)
print("\nnp.char.lower(a):\n", lower_case_array)
# Output: ['hello world' 'python programming']

# --- np.char.capitalize() ---
# Converts the first character of each string to uppercase and the remaining to lowercase.
# Mimics Python's str.capitalize()
capitalized_array = np.char.capitalize(a)
print("\nnp.char.capitalize(a):\n", capitalized_array)
# Output: ['Hello world' 'Python programming']

# --- Other useful np.char functions (for completeness) ---

# np.char.title()
# Converts the first character of each word to uppercase and remaining to lowercase.
# Mimics Python's str.title()
title_case_array = np.char.title(a)
print("\nnp.char.title(a):\n", title_case_array)
# Output: ['Hello World' 'Python Programming']

# np.char.swapcase()
# Converts uppercase characters to lowercase and vice versa.
# Mimics Python's str.swapcase()
swap_case_array = np.char.swapcase(a)
print("\nnp.char.swapcase(a):\n", swap_case_array)
# Output: ['hellO wORLd' 'PYTHON PROGRAMMING']

array(['Hello world'], dtype='<U11')

In [None]:
'''

two more handy functions from NumPy's np.char module for searching within strings element-wise! These are direct vectorized equivalents of Python's str.find() and str.count() methods.

np.char.find(a, sub, start=0, end=None): Element-wise Find Substring
This function returns the lowest index in each string where the substring is found. If the substring is not found, it returns -1.

Parameters:

a (required): The input array of strings.
sub (required): The substring to search for.
start (optional): The starting position for the search (inclusive). Defaults to 0.
end (optional): The ending position for the search (exclusive). Defaults to the end of the string.
Example:

Python

import numpy as np

words = np.array(['apple', 'banana', 'apricot', 'grape'])

print("Original array 'words':", words)

# 1. Find the index of 'a' in each word
find_a = np.char.find(words, 'a')
print("\nnp.char.find(words, 'a'):", find_a)
# Output: [0 1 0 2]
# 'apple': 'a' is at index 0
# 'banana': 'a' is at index 1 (second 'a')
# 'apricot': 'a' is at index 0
# 'grape': 'a' is at index 2

# 2. Find the index of 'an'
find_an = np.char.find(words, 'an')
print("np.char.find(words, 'an'):", find_an)
# Output: [-1  1 -1 -1]
# 'apple': -1 (not found)
# 'banana': 'an' is at index 1
# 'apricot': -1
# 'grape': -1

# 3. Find the index of 'p' starting from index 1
find_p_from_1 = np.char.find(words, 'p', start=1)
print("np.char.find(words, 'p', start=1):", find_p_from_1)
# Output: [1 -1 1 -1]
# 'apple': 'p' at index 1
# 'banana': -1
# 'apricot': 'p' at index 1
# 'grape': -1

# 4. Find the index of 'e' within a slice (e.g., from index 2 to 4)
find_e_in_slice = np.char.find(words, 'e', start=2, end=4)
print("np.char.find(words, 'e', start=2, end=4):", find_e_in_slice)
# Output: [3 -1 -1 3]
# 'apple': 'e' at index 3
# 'banana': -1
# 'apricot': -1
# 'grape': 'e' at index 3
np.char.count(a, sub, start=0, end=None): Element-wise Count Substring Occurrences
This function returns the number of non-overlapping occurrences of the substring in each string element of the array.

Parameters:

a (required): The input array of strings.
sub (required): The substring to count.
start (optional): The starting position for the search (inclusive). Defaults to 0.
end (optional): The ending position for the search (exclusive). Defaults to the end of the string.
Example:

Python

import numpy as np

text_array = np.array(['hello world', 'python programming', 'apple banana apple'])

print("\nOriginal array 'text_array':", text_array)

# 1. Count occurrences of 'o'
count_o = np.char.count(text_array, 'o')
print("\nnp.char.count(text_array, 'o'):", count_o)
# Output: [2 2 0]
# 'hello world': 2 'o's
# 'python programming': 2 'o's
# 'apple banana apple': 0 'o's

# 2. Count occurrences of 'apple'
count_apple = np.char.count(text_array, 'apple')
print("np.char.count(text_array, 'apple'):", count_apple)
# Output: [0 0 2]

# 3. Count occurrences of 'p' within a slice
count_p_in_slice = np.char.count(text_array, 'p', start=0, end=10)
print("np.char.count(text_array, 'p', start=0, end=10):", count_p_in_slice)
# Output: [0 2 2]
# 'hello world': 0 'p's
# 'python programming': 'p' at index 0, 'p' at index 4 (before end=10) -> 2
# 'apple banana apple': 'p' at index 1, 'p' at index 2 (before end=10) -> 2
Both np.char.find() and np.char.count() are extremely useful for tasks like:

Searching for patterns in textual data.
Feature engineering from text (e.g., counting specific characters or words).
Data cleaning and validation.
They offer significant performance benefits over looping through Python strings when working with large NumPy arrays.


'''

# Padding with np.char.center

In [None]:
'''

'''

# Calculating Measures of Central Tendency with Numpy

In [None]:
# Calculating Measures of Central Tendency with Numpy

import numpy as np

# Create a sample 1D array (representing a dataset)
data_1d = np.array([10, 12, 15, 12, 18, 20, 22, 15, 13, 10])

# Create a sample 2D array (e.g., student scores in different subjects)
data_2d = np.array([[80, 90, 75],
                    [60, 85, 70],
                    [95, 70, 88],
                    [70, 75, 80]])

print("--- 1D Array Analysis ---")
print("Data (1D):", data_1d)

# 1. Mean (Average)
# np.mean(array, axis=None)
mean_1d = np.mean(data_1d)
print(f"Mean (1D): {mean_1d:.2f}")

# 2. Median (Middle value when sorted)
# np.median(array, axis=None)
median_1d = np.median(data_1d)
print(f"Median (1D): {median_1d:.2f}")

# 3. Standard Deviation (Measure of spread)
# np.std(array, axis=None, ddof=0)
# ddof = Delta Degrees of Freedom. Default 0 (population std). Use 1 for sample std.
std_1d_pop = np.std(data_1d)
std_1d_sample = np.std(data_1d, ddof=1)
print(f"Standard Deviation (1D, Population): {std_1d_pop:.2f}")
print(f"Standard Deviation (1D, Sample): {std_1d_sample:.2f}")

# 4. Variance (Square of Standard Deviation)
# np.var(array, axis=None, ddof=0)
var_1d_pop = np.var(data_1d)
var_1d_sample = np.var(data_1d, ddof=1)
print(f"Variance (1D, Population): {var_1d_pop:.2f}")
print(f"Variance (1D, Sample): {var_1d_sample:.2f}")

# 5. Maximum Value
# np.max(array, axis=None) or array.max()
max_1d = np.max(data_1d)
print(f"Maximum (1D): {max_1d}")

# 6. Minimum Value
# np.min(array, axis=None) or array.min()
min_1d = np.min(data_1d)
print(f"Minimum (1D): {min_1d}")

print("\n--- 2D Array Analysis (Across Axes) ---")
print("Data (2D):\n", data_2d)
# Shape: (4, 3) - 4 rows (e.g., students), 3 columns (e.g., subjects)

# Mean
# Mean of all elements
mean_2d_all = np.mean(data_2d)
print(f"\nMean of all elements (2D): {mean_2d_all:.2f}")

# Mean along axis 0 (columns, i.e., average score per subject)
mean_2d_axis0 = np.mean(data_2d, axis=0)
print(f"Mean along axis 0 (per column/subject): {mean_2d_axis0}")
# Output: [76.25 80.   78.25] (average score for subject 1, subject 2, subject 3)

# Mean along axis 1 (rows, i.e., average score per student)
mean_2d_axis1 = np.mean(data_2d, axis=1)
print(f"Mean along axis 1 (per row/student): {mean_2d_axis1}")
# Output: [81.67 71.67 84.33 75.  ] (average score for student 1, student 2, etc.)


# Median
median_2d_axis0 = np.median(data_2d, axis=0)
print(f"\nMedian along axis 0 (per column/subject): {median_2d_axis0}")

# Standard Deviation

std_2d_axis1 = np.std(data_2d, axis=1)
print(f"Standard Deviation along axis 1 (per student): {std_2d_axis1}")

# Max and Min
max_2d_axis0 = np.max(data_2d, axis=0)
print(f"\nMax along axis 0 (per column/subject): {max_2d_axis0}")

min_2d_axis1 = np.min(data_2d, axis=1)
print(f"Min along axis 1 (per row/student): {min_2d_axis1}")

# Percentiles

In [None]:
'''

In statistics, a percentile is a measure used to indicate the value below which a given percentage of observations in a group of observations fall. For example, if a score is at the 80th percentile, it means that 80% of the scores in the dataset are lower than that score.

Percentiles are very useful for understanding the distribution of data and the relative standing of individual data points within a dataset. Common percentiles include:

25th percentile (Q1 - First Quartile): The value below which 25% of the data falls.
50th percentile (Q2 - Second Quartile or Median): The value below which 50% of the data falls. This is equivalent to the median.
75th percentile (Q3 - Third Quartile): The value below which 75% of the data falls.
np.percentile(a, q, axis=None, out=None, overwrite_input=False, method='linear', keepdims=False)
NumPy provides the np.percentile() function to calculate the q-th percentile(s) of the data along a specified axis.

Parameters:

a (required): The input array or object that can be converted to an array.
q (required): The percentile(s) to compute. This can be a single float or a sequence (list or array) of floats. Values must be between 0 and 100 inclusive.
axis (optional): The axis or axes along which the percentiles are computed.
None (default): Computes the percentile(s) over the flattened array.
int: Computes the percentile(s) along the specified axis.
tuple of int: Computes the percentile(s) along multiple axes.
method (optional): This parameter specifies the interpolation method to use when the desired percentile lies between two data points. Different methods can yield slightly different results, especially with small datasets. The common methods include:
'linear' (default): Interpolates linearly between the two nearest data points.
'lower': Returns the lower of the two data points.
'higher': Returns the higher of the two data points.
'nearest': Returns the data point closest to the desired percentile.
'midpoint': Returns the average of the two nearest data points.
(Note: In older NumPy versions, this parameter was called interpolation. method is the current preferred name and offers more options.)
keepdims (optional): If True, the axes which are reduced are left in the result as dimensions with size one. This ensures the result can broadcast correctly against the original array. Defaults to False.
Examples:

Python

import numpy as np

# Sample 1D array
data_1d = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

# Sample 2D array
data_2d = np.array([[10, 20, 30],
                    [15, 25, 35],
                    [12, 22, 32]])

print("--- 1D Array Percentiles ---")
print("Data (1D):", data_1d)

# 1. Calculate the 50th percentile (median)
p50_1d = np.percentile(data_1d, 50)
print(f"50th percentile (median) of 1D data: {p50_1d}")
# Output: 5.5 (because median of 10 elements: (5+6)/2)

# 2. Calculate multiple percentiles (e.g., quartiles)
quartiles_1d = np.percentile(data_1d, [25, 50, 75])
print(f"25th, 50th, 75th percentiles of 1D data: {quartiles_1d}")
# Output: [3.25 5.5  7.75]

# 3. Using different interpolation methods
data_small = np.array([1, 2, 3, 4])
p25_linear = np.percentile(data_small, 25, method='linear')
p25_lower = np.percentile(data_small, 25, method='lower')
p25_higher = np.percentile(data_small, 25, method='higher')
p25_nearest = np.percentile(data_small, 25, method='nearest')
p25_midpoint = np.percentile(data_small, 25, method='midpoint')

print(f"\nData for interpolation example: {data_small}")
print(f"25th percentile (linear): {p25_linear}") # 1 + (2-1)*0.25 = 1.25
print(f"25th percentile (lower): {p25_lower}")   # 1
print(f"25th percentile (higher): {p25_higher}") # 2
print(f"25th percentile (nearest): {p25_nearest}") # 1 (closer to 1.25 than 2)
print(f"25th percentile (midpoint): {p25_midpoint}") # (1+2)/2 = 1.5


print("\n--- 2D Array Percentiles (with axis) ---")
print("Data (2D):\n", data_2d)

# 1. 70th percentile of all elements (flattened array)
p70_2d_all = np.percentile(data_2d, 70)
print(f"\n70th percentile of all elements: {p70_2d_all:.2f}")

# 2. 70th percentile along axis 0 (column-wise)
# For each column: [10, 15, 12], [20, 25, 22], [30, 35, 32]
p70_2d_axis0 = np.percentile(data_2d, 70, axis=0)
print(f"70th percentile along axis 0 (per column): {p70_2d_axis0}")

# 3. 70th percentile along axis 1 (row-wise)
# For each row: [10, 20, 30], [15, 25, 35], [12, 22, 32]
p70_2d_axis1 = np.percentile(data_2d, 70, axis=1)
print(f"70th percentile along axis 1 (per row): {p70_2d_axis1}")

# 4. Using keepdims
p70_2d_axis1_keepdims = np.percentile(data_2d, 70, axis=1, keepdims=True)
print(f"70th percentile along axis 1 (per row, keepdims=True):\n {p70_2d_axis1_keepdims}")
print(f"Shape with keepdims: {p70_2d_axis1_keepdims.shape}")
# Original row-wise result was (3,), now (3, 1)

Understanding percentiles and how to calculate them with np.percentile() is fundamental for statistical analysis in Python, allowing you to gauge data distribution and relative standing efficiently.


'''

# Quantiles

In [None]:
'''
You're asking about quantiles, which are very closely related to percentiles!

Quantile vs. Percentile: The Core Difference
The key difference between "quantile" and "percentile" often lies in the scale of the input q value:

Percentile: The q value is typically expressed as a percentage from 0 to 100. For example, the 25th percentile, 50th percentile, 75th percentile.
Quantile: The q value is typically expressed as a proportion from 0.0 to 1.0. For example, the 0.25 quantile, 0.50 quantile, 0.75 quantile.
So, the 0.25 quantile is the same value as the 25th percentile. The 0.50 quantile is the same as the 50th percentile (which is also the median).

Special Quantiles
Some quantiles have special names:

Median: The 0.5 quantile (or 50th percentile), which divides the data into two equal halves.
Quartiles: The 0.25, 0.50, and 0.75 quantiles (or 25th, 50th, and 75th percentiles), which divide the data into four equal parts. These are often referred to as Q1, Q2, and Q3.
Deciles: The 0.1, 0.2, ..., 0.9 quantiles, which divide the data into ten equal parts.
Quintiles: The 0.2, 0.4, 0.6, 0.8 quantiles, which divide the data into five equal parts.
np.quantile() in NumPy
NumPy provides a dedicated function, np.quantile(), which is functionally identical to np.percentile() but expects the q value(s) to be in the range [0.0, 1.0]. In fact, np.percentile internally calls np.quantile after scaling the q values.

Syntax:

np.quantile(a, q, axis=None, out=None, overwrite_input=False, method='linear', keepdims=False)

The parameters are the same as np.percentile, except for q:

a (required): The input array.
q (required): The quantile(s) to compute. This can be a single float or a sequence (list or array) of floats. Values must be between 0.0 and 1.0 inclusive.
axis (optional): Axis or axes along which the quantiles are computed.
method (optional): Interpolation method (e.g., 'linear', 'lower', 'higher', 'nearest', 'midpoint').
Example:

Python

import numpy as np

# Sample 1D array
data_1d = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

# Sample 2D array
data_2d = np.array([[10, 20, 30],
                    [15, 25, 35],
                    [12, 22, 32]])

print("--- 1D Array Quantiles ---")
print("Data (1D):", data_1d)

# 1. Calculate the 0.5 quantile (median)
q50_1d = np.quantile(data_1d, 0.5)
print(f"0.5 quantile (median) of 1D data: {q50_1d}")
# Output: 5.5

# 2. Calculate multiple quantiles (e.g., quartiles)
quartiles_1d = np.quantile(data_1d, [0.25, 0.5, 0.75])
print(f"0.25, 0.50, 0.75 quantiles of 1D data: {quartiles_1d}")
# Output: [3.25 5.5  7.75]

# 3. Using different interpolation methods
data_small = np.array([1, 2, 3, 4])
q25_linear = np.quantile(data_small, 0.25, method='linear')
q25_lower = np.quantile(data_small, 0.25, method='lower')
print(f"\nData for interpolation example: {data_small}")
print(f"0.25 quantile (linear): {q25_linear}") # 1 + (2-1)*0.25 = 1.25
print(f"0.25 quantile (lower): {q25_lower}")   # 1

print("\n--- 2D Array Quantiles (with axis) ---")
print("Data (2D):\n", data_2d)

# 1. 0.70 quantile of all elements (flattened array)
q70_2d_all = np.quantile(data_2d, 0.70)
print(f"\n0.70 quantile of all elements: {q70_2d_all:.2f}")

# 2. 0.70 quantile along axis 0 (column-wise)
q70_2d_axis0 = np.quantile(data_2d, 0.70, axis=0)
print(f"0.70 quantile along axis 0 (per column): {q70_2d_axis0}")

# 3. 0.70 quantile along axis 1 (row-wise)
q70_2d_axis1 = np.quantile(data_2d, 0.70, axis=1)
print(f"0.70 quantile along axis 1 (per row): {q70_2d_axis1}")
In summary, np.quantile() and np.percentile() serve the exact same purpose in NumPy, differing only in the conventional input range for q. You can choose which one to use based on whether you prefer to think in terms of percentages (0-100) or proportions (0.0-1.0).


'''

In [None]:
# missed


# View

In [None]:
'''

In NumPy, a view is a powerful concept that allows you to access and manipulate portions of an array without copying its underlying data.
 This is crucial for efficiency, especially when working with very large datasets, as it saves both memory and computation time.

Think of a view as a different "window" into the same block of memory. If you change data through this window, the original data changes too, and vice versa.

How Views are Created
Views are commonly created through several NumPy operations:

1. Slicing (Basic Indexing)
This is the most common way to create a view. When you slice a NumPy array, the result is almost always a view.
'''

In [30]:
import numpy as np

original_array = np.array([10, 20, 30, 40, 50])
print(f"Original array: {original_array}, ID: {id(original_array.base)}")

# Create a slice (which is a view)
a_view = original_array[1:4]
print(f"View of array: {a_view}, ID: {id(a_view.base)}") # 'base' points to the original array
print(f"Is it a view? {a_view.base is original_array}") # True

# Modify the view
a_view[0] = 99
print(f"\nAfter modifying view:")
print(f"Original array: {original_array}") # Original array is changed!
print(f"View of array: {a_view}")

Original array: [10 20 30 40 50], ID: 140736398051104
View of array: [20 30 40], ID: 1800292449904
Is it a view? True

After modifying view:
Original array: [10 99 30 40 50]
View of array: [99 30 40]
