# Numpy

In [183]:
import numpy as np

In [184]:
# Array vs List
arr = np.array([1,2,3,4,5])
print(arr)      # When printing an array, the elements are seperated by a space between them

list1 = [1,2,3,4,5]
print(list1)    # When printing a list, the elements in the list are seperated by a comma (,)

[1 2 3 4 5]
[1, 2, 3, 4, 5]


In [185]:
# Slicing in array is same like slicing in list
print(arr[2])   # Accessing an element at  a particular index
print(arr[5:-5:-2])     # Slicing from-to a range. The third operand is for skipping. So if the skipper is 3 starting from position 0,
                        # then the next element after index 0 i.e, at index 3 will be considered
print(arr[::-1])        # Reversing an array (similar to reversing a list)

3
[5 3]
[5 4 3 2 1]


In [186]:
# Multi-dimensional Array
# Remember that when creating a multi-dimensional array, a point should always be considered that the np.array method takes one arguement. 
# So the multi-dimension must be written in square brackets (which represent a single arguement) but we can have multiple dimensions in it 
a_mul = np.array([
    [[[1,1,1,1],
    [2,2,2,2],
    [3,3,3,3]]],

    [[[4,4,4,4],
    [5,5,5,5],
    [6,6,6,6]]]
    ])
print(a_mul)
print(a_mul.shape)
print(a_mul.ndim)   # Specifies the depth of array. In the above example, The elements are present in a list,
                    # which is present in an another list thatis again in a major (main) list
print(a_mul.size)   # This method provides us the total number of elements in the (multidimentional) array

[[[[1 1 1 1]
   [2 2 2 2]
   [3 3 3 3]]]


 [[[4 4 4 4]
   [5 5 5 5]
   [6 6 6 6]]]]
(2, 1, 3, 4)
4
24


In [187]:
# Create arrays with default values/fill with a particular value.
a = np.full((2,2), 7)   # Arguements are: the shape of the n-dimensional array and the number to be filled within the n-d array
print(a)

# Same can be done

[[7 7]
 [7 7]]


In [188]:
# In real world use cases, we often use np.zeros or np.ones to create a buffer array of a specific dimension. The reason are as follows
# 1. Initializing array: When creating an array in python, we need to initialize any it with some data values. 
#                        np.zeros and np.ones provide a convenient way to do so, without having to manually set each element.
# 2. Placeholder arrays: In many cases, you might need an array to serve as a placeholder or a template for further computations. 
#                        np.zeros and np.ones allow you to create such arrays quickly and efficiently.
# Custom data types: np.zeros and np.ones can also be used to create arrays with custom data types, like integers, floats point numbers. 
#                    This flexibility is particularly useful when working with specialized libraries or domains.
zeros = np.zeros((3,2,5))
print(zeros, end='\n\n')
ones = np.ones((3,2,4))
print(ones)

[[[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]]

[[[1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[1. 1. 1. 1.]
  [1. 1. 1. 1.]]]


In [189]:
# We also have a method that creates an empty nd array. WHile np.zeros and np.ones require initialization and thus consume lot of memory, 
# we can also create an empty array variable that stores the address of the empty array and no data.
empty = np.empty((3,3))
print(empty)

[[nan  0.  0.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]]


In [190]:
# arange and linspace: 
# arange(): When we want to get an array with values within a defined range, we could use arange. 
#           The arguement to arange() are the starting value, ending value and steps 
# linspace(): Unlike arange, linspace() generates an array with evenlly spaced values oer specifc intervals

arange = np.arange(0, 11, 2)    # Here, the third value in the arguement indicates step count
print(arange)
print()
linspace = np.linspace(0,21, 4) # here the third arguement indicates the number of evenly distributed values 
print(linspace)

[ 0  2  4  6  8 10]

[ 0.  7. 14. 21.]


In [191]:
# Mathematical operations in array: Doing mathematical operations in array is quite different when compared with lists
# When we perform any operation on a list with a scaler the result is either an extension of the list or an exeption
# Whn a mathematical operation is performed on an array, it acts as a vector and operations are performed on each element in the vector.

# Array addition
print(arr+5)
print(arr-5)
print(arr*5)
print(arr/5)
print(arr%5)

[ 6  7  8  9 10]
[-4 -3 -2 -1  0]
[ 5 10 15 20 25]
[0.2 0.4 0.6 0.8 1. ]
[1 2 3 4 0]


In [192]:
# Array methods

# Adding/appending elements in an existing array using np.append() method. This operation adds any value from the end of the array.
arr = np.append(arr, [6,7,8])
print(arr)

[1 2 3 4 5 6 7 8]


In [193]:
#Inserting element at a specific index using insert() method
arr = np.insert(arr, 4, 1)
print(arr)

[1 2 3 4 1 5 6 7 8]


In [194]:
# Deleting elements from the array: This method requires three parameters, the arr, index of the element to be deleted and the axis.
# The axis can be 0 for operation in rows and 1 is for opearation in columns.
# If there are no element present at the specified index or axis, this method does not perform any ooperation.
arr = np.delete(arr, 2, axis=0)

In [195]:
arr

array([1, 2, 4, 1, 5, 6, 7, 8])

In [196]:
# Aggregating functions in numpy
print(a_mul, end='\n\n\n')
# 1. Sum(): Returns the sum of array elements along the specidied axis (default is to flatten the array)
print("Sum of the elements in the array:\n",np.sum(a_mul, axis=3), end='\n\n')

# 2. Mean(): Retunrs the mean of array elements along an axis (default is to flatten the array)
print("Mean of the elements in the array:\n",np.mean(a_mul, axis=2))

# 3. Product(): Return the Product of array elements along a specified axis
print("Product of elements:\n", np.prod(a_mul, axis=0))

# 4. Minimum: Returns a minimum value from the array elements
print("Minimum value in the array elements:", np.min(a_mul, axis=2))

# 5. Maximum: Returns a maximum value from the array elements
print("Maximum value in the array elements:", np.max(a_mul, axis=2))

# Median: Returns the median value of the array
print("Median of the given array:",np.median(a_mul))

# Standard Deviation: Returns the standard deviation of array elements along a specified axis 
print("Standard Deviation of the array elements:",np.std(a_mul.T, axis=1))

# Variance: Returns the variance of array elements along a specified axis.
print("Variance of the array elements:",np.var(a_mul.T, axis=1))

# Argmin: Returns the indices of the minimum values along a specified axis.
print("Minimum values for subarrays:\n",np.argmin(a_mul.T, axis=1))

# Argmax: Returns the indices of the maximum values along a specified axis.
print("Maximum values for subarrays:\n",np.argmax(a_mul.T, axis=1))

[[[[1 1 1 1]
   [2 2 2 2]
   [3 3 3 3]]]


 [[[4 4 4 4]
   [5 5 5 5]
   [6 6 6 6]]]]


Sum of the elements in the array:
 [[[ 4  8 12]]

 [[16 20 24]]]

Mean of the elements in the array:
 [[[2. 2. 2. 2.]]

 [[5. 5. 5. 5.]]]
Product of elements:
 [[[ 4  4  4  4]
  [10 10 10 10]
  [18 18 18 18]]]
Minimum value in the array elements: [[[1 1 1 1]]

 [[4 4 4 4]]]
Maximum value in the array elements: [[[3 3 3 3]]

 [[6 6 6 6]]]
Median of the given array: 3.5
Standard Deviation of the array elements: [[[0.81649658 0.81649658]]

 [[0.81649658 0.81649658]]

 [[0.81649658 0.81649658]]

 [[0.81649658 0.81649658]]]
Variance of the array elements: [[[0.66666667 0.66666667]]

 [[0.66666667 0.66666667]]

 [[0.66666667 0.66666667]]

 [[0.66666667 0.66666667]]]
Minimum values for subarrays:
 [[[0 0]]

 [[0 0]]

 [[0 0]]

 [[0 0]]]
Maximum values for subarrays:
 [[[2 2]]

 [[2 2]]

 [[2 2]]

 [[2 2]]]


# Pandas

In [197]:
import pandas as pd

In [198]:
# Dataframe: It is a type of table with functionalities to work with different types of data like csv, spreadsheets, etc.
data = pd.DataFrame([[24,31,32],[23,65,32],[73,15,86]], columns=['S', 'B', 'C'])
data.head()

Unnamed: 0,S,B,C
0,24,31,32
1,23,65,32
2,73,15,86


In [199]:
data.loc[list(data[data['B']%5==0].index)]

Unnamed: 0,S,B,C
1,23,65,32
2,73,15,86


In [200]:
greatest_of_three = lambda row: row.iloc[0] if row.iloc[0] >= row.iloc[1] and row.iloc[0] >= row.iloc[2] else (row.iloc[1] if row.iloc[1] >= row.iloc[2] else row.iloc[2])

In [202]:
# apply a function/lambda function over dataset
data["greatest_number"] = data.apply(greatest_of_three, axis=1)
data.head()

Unnamed: 0,S,B,C,greatest_number
0,24,31,32,32
1,23,65,32,65
2,73,15,86,86


In [204]:
data['greatest_number'].interpolate()

0    32
1    65
2    86
Name: greatest_number, dtype: int64