# Joining NumPy Arrays

Joining means putting contents of two or more arrays in a single array.

In SQL we join tables based on a key, whereas in NumPy we join arrays by axes.

We pass a sequence of arrays that we want to join to the concatenate() function, along with the axis. If axis is not explicitly passed, it is taken as 0.

In [1]:
import numpy as np

In [2]:
arr1 = np.array([1, 2, 3])

arr2 = np.array([4, 5, 6])

arr = np.concatenate((arr1, arr2)) # axis=0

print(arr)

[1 2 3 4 5 6]


In [3]:
arr1 = np.array([[1, 2], [3, 4]])

arr2 = np.array([[5, 6], [7, 8]])

arr = np.concatenate((arr1, arr2), axis=1)

print(arr)

[[1 2 5 6]
 [3 4 7 8]]


Joining Arrays Using Stack Functions

Stacking is same as concatenation, the only difference is that stacking is done along a new axis.

We can concatenate two 1-D arrays along the second axis which would result in putting them one over the other, ie. stacking.

We pass a sequence of arrays that we want to join to the stack() method along with the axis. If axis is not explicitly passed it is taken as 0.

In [4]:
arr1 = np.array([1, 2, 3])

arr2 = np.array([4, 5, 6])

arr = np.stack((arr1, arr2), axis=1)

print(arr)

[[1 4]
 [2 5]
 [3 6]]


NumPy provides a helper function: hstack() to stack along rows

In [6]:
arr1 = np.array([1, 2, 3])

arr2 = np.array([4, 5, 6])

arr = np.hstack((arr1, arr2))

print(arr)

[1 2 3 4 5 6]


NumPy provides a helper function: vstack()  to stack along columns.

In [7]:
arr1 = np.array([1, 2, 3])

arr2 = np.array([4, 5, 6])

arr = np.vstack((arr1, arr2))

print(arr)

[[1 2 3]
 [4 5 6]]


NumPy provides a helper function: dstack() to stack along height, which is the same as depth.

In [8]:
arr1 = np.array([1, 2, 3])

arr2 = np.array([4, 5, 6])

arr = np.dstack((arr1, arr2))

print(arr)

[[[1 4]
  [2 5]
  [3 6]]]


# Splitting NumPy Arrays

Splitting is reverse operation of Joining.

Joining merges multiple arrays into one and Splitting breaks one array into multiple.

We use array_split() for splitting arrays, we pass it the array we want to split and the **number** of splits.

In [9]:
arr = np.array([1, 2, 3, 4, 5, 6])

newarr = np.array_split(arr, 3)

print(newarr)

[array([1, 2]), array([3, 4]), array([5, 6])]


In [10]:
arr = np.array([1, 2, 3, 4, 5, 6])

newarr = np.array_split(arr, 4)

print(newarr)

[array([1, 2]), array([3, 4]), array([5]), array([6])]


In [11]:
arr = np.array([1, 2, 3, 4, 5, 6])

newarr = np.array_split(arr, 3)

print(newarr[0])
print(newarr[1])
print(newarr[2])

[1 2]
[3 4]
[5 6]


In [12]:
arr = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]])

newarr = np.array_split(arr, 3)

print(newarr) # The example above returns three 2-D arrays.

[array([[1, 2],
       [3, 4]]), array([[5, 6],
       [7, 8]]), array([[ 9, 10],
       [11, 12]])]


In [16]:
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]])

newarr = np.array_split(arr, 3, axis=1)
for x in newarr:
    print(x, '\n', x.ndim, '\n')

[[ 1]
 [ 4]
 [ 7]
 [10]
 [13]
 [16]] 
 2 

[[ 2]
 [ 5]
 [ 8]
 [11]
 [14]
 [17]] 
 2 

[[ 3]
 [ 6]
 [ 9]
 [12]
 [15]
 [18]] 
 2 



An alternate solution is using hsplit() opposite of hstack()
Use the hsplit() method to split the 2-D array into three 2-D arrays along rows.

In [17]:
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]])

newarr = np.hsplit(arr, 3)

print(newarr)

[array([[ 1],
       [ 4],
       [ 7],
       [10],
       [13],
       [16]]), array([[ 2],
       [ 5],
       [ 8],
       [11],
       [14],
       [17]]), array([[ 3],
       [ 6],
       [ 9],
       [12],
       [15],
       [18]])]


# Searching Arrays
You can search an array for a certain value, and return the indexes that get a match.

To search an array, use the where() method.

In [18]:
arr = np.array([1, 2, 3, 4, 5, 4, 4])

x = np.where(arr == 4)

print(x)

(array([3, 5, 6]),)


In [19]:
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8])

x = np.where(arr%2 == 0)

print(x)

(array([1, 3, 5, 7]),)


Search Sorted

There is a method called searchsorted() which performs a binary search in the array, and returns the index where the specified value would be inserted to maintain the search order.

In [20]:
arr = np.array([6, 7, 8, 9])

x = np.searchsorted(arr, 7)

print(x)

1


In [21]:
arr = np.array([6, 7, 8, 9])

x = np.searchsorted(arr, 7, side='right')

print(x) # By default the left most index is returned, but we can give side='right' to return the right most index instead.

2


Find the indexes where the values 2, 4, and 6 should be inserted:

In [28]:
arr = np.array([1, 3, 5, 7, 9])

x = np.searchsorted(arr, [2, 4, 6])
y = np.searchsorted(arr, [21, 4, 6])

print(x)
print(y)

[1 2 3]
[5 2 3]


In [2]:
!pip install scipy


Collecting scipy
  Downloading scipy-1.14.1-cp312-cp312-win_amd64.whl.metadata (60 kB)
Downloading scipy-1.14.1-cp312-cp312-win_amd64.whl (44.5 MB)
   ---------------------------------------- 0.0/44.5 MB ? eta -:--:--
   -------- ------------------------------- 9.4/44.5 MB 49.1 MB/s eta 0:00:01
   ------------------ --------------------- 20.4/44.5 MB 49.7 MB/s eta 0:00:01
   --------------------------- ------------ 30.7/44.5 MB 51.3 MB/s eta 0:00:01
   ------------------------------------ --- 41.2/44.5 MB 50.4 MB/s eta 0:00:01
   ---------------------------------------- 44.5/44.5 MB 45.0 MB/s eta 0:00:00
Installing collected packages: scipy
Successfully installed scipy-1.14.1


In [5]:
import numpy as np
from scipy import stats

a = np.array([1, 3, 4, 2, 7])

m = stats.mode(a)
print(m)

ModeResult(mode=np.int64(1), count=np.int64(1))


In [7]:
# create an 1 d array
arr1 = np.array([9, 8, 7, 6, 6, 6, 6, 5, 5, 4,
                 3, 2, 1, 1, 1, 1])
 
# display the mode
print(stats.mode(arr1))

ModeResult(mode=np.int64(1), count=np.int64(4))


In [10]:
arr = np.array([[1, 2, 3, 4, 5],
                [1, 2, 2, 2, 2],
                [4, 5, 7, 9, 4],
                [6, 7, 8, 9, 2],
                [2, 3, 4, 8, 6]])
 
# applying mode operation and printing the
# result
print(stats.mode(arr, axis=1))

ModeResult(mode=array([1, 2, 4, 2, 2]), count=array([1, 4, 2, 1, 1]))


In [11]:
arr = [1,2,3,4]
print("Q2 quantile of arr:", np.quantile(arr, 0.50))
print("Q1 quantile of arr:", np.quantile(arr, 0.25))
print("Q3 quantile of arr:", np.quantile(arr, 0.75))

Q2 quantile of arr: 2.5
Q1 quantile of arr: 1.75
Q3 quantile of arr: 3.25
100th quantile of arr: 1.3


pandas:
0    1.75
Name: 0.25, dtype: float64

In [35]:
def calc_mean(data: list)-> float:
    s = 0
    for el in data:
        s += el
    return s/len(data)

In [36]:
def calc_variance(data:list)-> float:
    s = 0
    m_w = calc_mean(data)
    for el in data:
        s += (el-m_w)**2
    return s/len(data)

In [37]:
import math
def calc_stdev(data: list)->float:
    return math.sqrt(calc_variance(data))

In [64]:
def quantil1(data: list) -> float:
    p = (len(data) + 1) / 4
    if p.is_integer():
        ind = p
        return data[p-1]
    else:
        ind = (len(data) + 1) // 4
        return data[ind-1] + (p-ind)*(data[ind]-data[ind-1])
    

In [39]:
def quantil2(data: list) -> float:
    pass

In [40]:
def quantil3(data: list) -> float:
    pass

In [50]:
def calc_median(data: list) -> float:
    # daten = sortiert(data)
    ind = int((len(data)/2))
    if not len(data) % 2:
        return (data[ind-1] + data[ind])/2
    else:

        return data[ind]

In [52]:
number_list = [1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 8]
uniq_values = []
mode_values = []
for i in number_list:
    if i not in uniq_values:
        uniq_values.append(i)
    else:
        mode_values.append(i)
print(set(mode_values))

{8, 2, 4, 5}


In [56]:
from collections import Counter 
 
# list of elements to calculate mode 
n_num = [1, 2, 3, 4, 5, 5] 
n = len(n_num) 
 
data = Counter(n_num) 
get_mode = dict(data) 
mode = [k for k, v in get_mode.items() if v == max(list(data.values()))] 
 
if len(mode) == n: 
    get_mode = "No mode found"
else: 
    get_mode = float(', '.join(map(str, mode))) 
     
print(get_mode) 

5.0


In [59]:
def find_mode(num_list):
    unique_num_list = list(set(num_list))
    dictionary = {}
    for i in unique_num_list:
        get_count = num_list.count(i)
        dictionary[i]= get_count
    max_repeat = 0 
    for i in unique_num_list:
        get_value = dictionary[i]
        if get_value>max_repeat:
            max_repeat = get_value
    result = ''
    for i in unique_num_list:
        if dictionary[i]==max_repeat:
            result = result+str(i)+" "
    return result


num_list = list(map(int, input().split()))
# Call the appropriate functions and print the results
mode = find_mode(num_list)
print(mode)

6 


In [65]:
el_s = [2, 5, 6, 6, 9, 9, 9, 10, 11, 13]
print(f"Mittelwert: {calc_mean(el_s):.4f}")
print(f"Variance: {calc_variance(el_s):.4f}")
print(f"STD: {calc_stdev(el_s):.4f}")
print(f"median: {calc_median(el_s):.4f}")
print(find_mode(el_s))
print(f"Q1: {quantil1(el_s)}")

Mittelwert: 8.0000
Variance: 9.4000
STD: 3.0659
median: 9.0000
9 
Q1: 5.75


In [66]:
# Example
car = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
x = car.get("price", 15000)
print(x) # Output: 15000
print(car)

15000
{'brand': 'Ford', 'model': 'Mustang', 'year': 1964}
