In [47]:
import numpy as np
import pandas as pd

# --- NumPy Arrays ---
print("--- NumPy Arrays ---")
# Creating NumPy arrays
list_data = [1, 2, 3, 4, 5]
np_array1 = np.array(list_data)
print(f"1D NumPy array from list: {np_array1}, type: {type(np_array1)}, dtype: {np_array1.dtype}")

list_of_lists = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
np_array2d = np.array(list_of_lists)
print(f"\n2D NumPy array:\n{np_array2d}")
print(f"Shape: {np_array2d.shape}, Dimensions: {np_array2d.ndim}, Size: {np_array2d.size}")


--- NumPy Arrays ---
1D NumPy array from list: [1 2 3 4 5], type: <class 'numpy.ndarray'>, dtype: int64

2D NumPy array:
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
Shape: (4, 3), Dimensions: 2, Size: 12


In [51]:
np_array1.reshape(5, -1)

array([[1],
       [2],
       [3],
       [4],
       [5]])

In [None]:

np_array2d
# 3, 4 -> 1
# 
# 480 x 640
0 -> 255


256

In [55]:
np_array2d.reshape(2, 6)

array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12]])

In [None]:
scalar = np.array(5) # -> 0 dimensional array
vector = np.array([1, 2, 3]) # -> 1 dimensional array
matrix = np.array([[1, 2], [4]]) # -> 2 dimensional array
matrix

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

In [57]:

# Special arrays
zeros_array = np.zeros((2, 3)) # Array of all zeros
ones_array = np.ones((3, 2))   # Array of all ones
eye_array = np.eye(3)          # Identity matrix
range_array = np.arange(0, 10, 2) # Like Python's range, but returns an array (start, stop, step)
linspace_array = np.linspace(0, 1, 5) # Array of evenly spaced values (start, stop, num_samples)
random_array = np.random.rand(2, 2) # Random values in [0, 1)
random_int_array = np.random.randint(0, 10, size=(3,3)) # Random integers

print(f"\nZeros array (2x3):\n{zeros_array}")
print(f"\nRange array (0-10, step 2): {range_array}")
print(f"\nLinspace array (0-1, 5 samples): {linspace_array}")
print(f"\nRandom array (2x2):\n{random_array}")



Zeros array (2x3):
[[0. 0. 0.]
 [0. 0. 0.]]

Range array (0-10, step 2): [0 2 4 6 8]

Linspace array (0-1, 5 samples): [0.   0.25 0.5  0.75 1.  ]

Random array (2x2):
[[0.98837465 0.12992279]
 [0.08219533 0.26681372]]


In [59]:
matrix

array([[1, 2],
       [3, 4]])

In [65]:

# --- Array Indexing and Slicing (similar to lists, but more powerful) ---
print("\n--- Array Indexing and Slicing ---")
arr = np.arange(10, 20)
print(f"Original array: {arr}")
print(f"Element at index 3: {arr[3]}")
print(f"Slice from index 2 to 5 (exclusive): {arr[2:5]}")
# arr[2:5] = 100 # Assign a scalar to a slice
print(f"Array after broadcast assignment: {arr}")




--- Array Indexing and Slicing ---
Original array: [10 11 12 13 14 15 16 17 18 19]
Element at index 3: 13
Slice from index 2 to 5 (exclusive): [12 13 14]
Array after broadcast assignment: [10 11 12 13 14 15 16 17 18 19]


In [72]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [69]:
arr2d
bool_idx = arr2d > 5
bool_idx

array([[False, False, False],
       [False, False,  True],
       [ True,  True,  True]])

In [74]:
bool_idx

array([[False, False, False],
       [False, False,  True],
       [ True,  True,  True]])

In [93]:

# --- Vectorization (Element-wise operations) ---
print("\n--- Vectorization ---")
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

print(f"Array a: {a}")
print(f"Array b: {b}")
print(f"a + b (element-wise sum): {a + b}")
print(f"a * 2 (scalar multiplication): {a * 2}")
print(f"a ** 2 (element-wise square): {a ** 2}")
print(f"np.sin(a) (universal function): {np.sin(a)}")


--- Vectorization ---
Array a: [1 2 3]
Array b: [4 5 6]
a + b (element-wise sum): [5 7 9]
a * 2 (scalar multiplication): [2 4 6]
a ** 2 (element-wise square): [1 4 9]
np.sin(a) (universal function): [0.84147098 0.90929743 0.14112001]


In [94]:
print(a)
print(b)

[1 2 3]
[4 5 6]


In [89]:
a + b

array([5, 7, 9])

In [97]:
np.log(a)

array([0.        , 0.69314718, 1.09861229])

In [98]:
# Mathematical operations on 2D arrays
arr_A = np.array([[1,2],[3,4]])
arr_B = np.array([[5,6],[7,8]])
print(f"\nArray A:\n{arr_A}")
print(f"Array B:\n{arr_B}")
print(f"A + B (element-wise sum):\n{arr_A + arr_B}")
print(f"A * B (element-wise product, NOT matrix multiplication):\n{arr_A * arr_B}")
print(f"Matrix multiplication (dot product) A @ B or np.dot(A,B):\n{arr_A @ arr_B}") # or np.dot(arr_A, arr_B)


Array A:
[[1 2]
 [3 4]]
Array B:
[[5 6]
 [7 8]]
A + B (element-wise sum):
[[ 6  8]
 [10 12]]
A * B (element-wise product, NOT matrix multiplication):
[[ 5 12]
 [21 32]]
Matrix multiplication (dot product) A @ B or np.dot(A,B):
[[19 22]
 [43 50]]


In [16]:

# --- Aggregation functions ---
print("\n--- Aggregation Functions ---")
big_array = np.random.randn(1000) # 1000 random numbers from standard normal distribution
print(f"Sum of big_array: {big_array.sum()} or {np.sum(big_array)}")
print(f"Mean of big_array: {big_array.mean()}")
print(f"Std dev of big_array: {big_array.std()}")
print(f"Min/Max of big_array: {big_array.min()}, {big_array.max()}")

arr2d_agg = np.array([[1,5,3],[4,2,6]])
print(f"\narr2d_agg:\n{arr2d_agg}")
print(f"Sum of all elements: {arr2d_agg.sum()}")
print(f"Sum along columns (axis=0): {arr2d_agg.sum(axis=0)}") # Result is [1+4, 5+2, 3+6]
print(f"Sum along rows (axis=1): {arr2d_agg.sum(axis=1)}")   # Result is [1+5+3, 4+2+6]


--- Aggregation Functions ---
Sum of big_array: -26.928780400818255 or -26.928780400818255
Mean of big_array: -0.026928780400818256
Std dev of big_array: 1.0293751503384858
Min/Max of big_array: -3.6835192693961543, 2.958480414048263

arr2d_agg:
[[1 5 3]
 [4 2 6]]
Sum of all elements: 21
Sum along columns (axis=0): [5 7 9]
Sum along rows (axis=1): [ 9 12]


In [20]:

# --- Reshaping Arrays ---
print("\n--- Reshaping Arrays ---")
original = np.arange(1, 13) # 1 to 12
print(f"Original array (1D, 12 elements): {original}")
reshaped1 = original.reshape(3, 4) # Reshape to 3 rows, 4 columns
print(f"Reshaped to 3x4:\n{reshaped1}")
reshaped2 = original.reshape(2, 2, 3) # Reshape to 2 blocks of 2x3 matrices
print(f"Reshaped to 2x2x3:\n{reshaped2}")
# Use -1 to infer one dimension: original.reshape(4, -1) will be 4x3


--- Reshaping Arrays ---
Original array (1D, 12 elements): [ 1  2  3  4  5  6  7  8  9 10 11 12]
Reshaped to 3x4:
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
Reshaped to 2x2x3:
[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]


In [21]:

# Flatten an array
flattened = reshaped1.flatten() # Creates a copy
raveled = reshaped1.ravel()     # May return a view if possible (more memory efficient)
print(f"Flattened array: {flattened}")

Flattened array: [ 1  2  3  4  5  6  7  8  9 10 11 12]


In [26]:
# --- Pandas Integration ---
print("\n--- Pandas Integration with NumPy ---")
# Pandas uses NumPy arrays under the hood for its Series and DataFrames
df = pd.DataFrame({
    'A': np.random.rand(5),
    'B': np.random.randint(1, 10, size=5),
    'C': ['x', 'y', 'z', 'x', 'y']
})
print("DataFrame created with NumPy arrays:")
print(df)

# Get NumPy array from a DataFrame column (Series)
col_A_np = df['A'].to_numpy() # Recommended way
# col_A_np_values = df['A'].values # Older way, still works
print(f"\nColumn 'A' as NumPy array: {col_A_np}, type: {type(col_A_np)}")




--- Pandas Integration with NumPy ---
DataFrame created with NumPy arrays:
          A  B  C
0  0.267175  2  x
1  0.547608  8  y
2  0.002078  8  z
3  0.836983  2  x
4  0.312283  6  y

Column 'A' as NumPy array: [0.26717506 0.54760845 0.00207813 0.83698349 0.31228296], type: <class 'numpy.ndarray'>


In [28]:
# Get NumPy array from the entire DataFrame
df_np = df[['A', 'B']].to_numpy() # For selected numerical columns
print(f"\nDataFrame (cols A, B) as NumPy array:\n{df_np}")

# Applying NumPy ufuncs to Pandas Series
series = pd.Series([1, 4, 9, 16])
print(f"\nOriginal Series: \n{series}")
sqrt_series_np = np.sqrt(series) # Applying NumPy's sqrt function
print(f"Square root of Series using NumPy:\n{sqrt_series_np}")


DataFrame (cols A, B) as NumPy array:
[[2.67175063e-01 2.00000000e+00]
 [5.47608446e-01 8.00000000e+00]
 [2.07813322e-03 8.00000000e+00]
 [8.36983494e-01 2.00000000e+00]
 [3.12282960e-01 6.00000000e+00]]

Original Series: 
0     1
1     4
2     9
3    16
dtype: int64
Square root of Series using NumPy:
0    1.0
1    2.0
2    3.0
3    4.0
dtype: float64


In [100]:
# --- Merging/Joining DataFrames (Recap/Extend Pandas section if needed) ---
# This is more Pandas than NumPy, but often used together
left_df = pd.DataFrame({'key': ['K0', 'K1', 'K2'], 'A': ['A0', 'A1', 'A2']})
right_df = pd.DataFrame({'key': ['K0', 'K1', 'K3'], 'B': ['B0', 'B1', 'B3']})
right_df

Unnamed: 0,key,B
0,K0,B0
1,K1,B1
2,K3,B3


In [None]:



print(f"\nLeft DataFrame:\n{left_df}")
print(f"Right DataFrame:\n{right_df}")




Left DataFrame:
  key   A
0  K0  A0
1  K1  A1
2  K2  A2
Right DataFrame:
  key   B
0  K0  B0
1  K1  B1
2  K3  B3


In [30]:
# Merge (like SQL join)
merged_inner = pd.merge(left_df, right_df, on='key', how='inner')
print(f"\nInner merge on 'key':\n{merged_inner}")

merged_left = pd.merge(left_df, right_df, on='key', how='left')
print(f"\nLeft merge on 'key':\n{merged_left}")


Inner merge on 'key':
  key   A   B
0  K0  A0  B0
1  K1  A1  B1

Left merge on 'key':
  key   A    B
0  K0  A0   B0
1  K1  A1   B1
2  K2  A2  NaN


In [34]:
# Concatenation
df1 = pd.DataFrame({'A': [1,2], 'B': [3,4]})
df2 = pd.DataFrame({'A': [5,6], 'B': [7,8]})
concatenated_rows = pd.concat([df1, df2], axis=0) # axis=0 for rows (default)
concatenated_rows

Unnamed: 0,A,B
0,1,3
1,2,4
0,5,7
1,6,8


In [35]:
concatenated_cols = pd.concat([df1, df2.reset_index(drop=True)], axis=1) # axis=1 for columns, ensure index align
concatenated_cols

Unnamed: 0,A,B,A.1,B.1
0,1,3,5,7
1,2,4,6,8
