# Numpy
    - Numpy is a numerical computing library for python.
    - Numpy support multi dimensional arrays and matrices
    - It has a lot of in-built mathematical functions
    
<img src="./imgs/numpy.png" width=400>

In [1]:
# installing numpy library
!pip install numpy



In [4]:
# importing numpy as np
import numpy as np

## Why Numpy ?
    - performs fast operations (because of Vectorization)
    - numpy arrays can be treated as vectors and matrices from linear algebra
    
<img src="./imgs/array.jpg" width=400>

In [1]:
lst = [1,2,3,4,5,6,7,8,9,10]

In [2]:
%timeit [i**2 for i in lst] #vvi

1.7 µs ± 26.2 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [5]:
arr = np.array(lst)

In [6]:
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [7]:
%timeit arr**2

1.31 µs ± 7.64 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [8]:
lst

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [10]:
# if i want to add 1 to each element of this vector
lst + [1]

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1]

In [11]:
# but it's possible in numpy array
arr + 1

array([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

# Numpy Basics

**array** : Fundamental element in numpy is homogenous array. Numpy Arrays can be 1D, 2D, 3D . . . nD
 
    - Different ways to create np array
        1. np.array()
        2. np.arange()

 

In [12]:
l = [1,5,8,9]

np_arr = np.array(l)

In [18]:
np_arr

array([1, 5, 8, 9])

In [14]:
type(np_arr)`

numpy.ndarray

In [15]:
np_arr.ndim

1

In [16]:
#important
np_arr.shape

(4,)

**Another way of creating np array**

In [21]:
new_arr = np.arange(3, 11, 2)

In [22]:
new_arr

array([3, 5, 7, 9])

In [23]:
lst_2d = [ [1,2,3], [4,5,6], [7,8,9] ]

In [24]:
arr_2d = np.array(lst_2d)

In [25]:
arr_2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [26]:
arr_2d.ndim

2

In [27]:
arr_2d.shape

(3, 3)

## Special Arrays in Numpy
    - zeros()
    - ones()
    - diag()
    - identity()

In [29]:
np.zeros((3,3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [36]:
np.ones((5,5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [33]:
np.diag([1,2,3,4,5])

array([[1, 0, 0, 0, 0],
       [0, 2, 0, 0, 0],
       [0, 0, 3, 0, 0],
       [0, 0, 0, 4, 0],
       [0, 0, 0, 0, 5]])

In [34]:
np.identity(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

## Indexing in Array

In [3]:
np.random.randint(10, 100 )

21

In [4]:
new_arr = np.random.randint(0, 20, (5,4))

In [5]:
new_arr

array([[15,  8,  7,  5],
       [ 1, 15, 13, 16],
       [19, 18,  7, 10],
       [ 0,  4, 18,  0],
       [ 3,  4,  0,  9]])

In [6]:
new_arr.shape

(5, 4)

In [None]:
new_arr[1,1]

In [None]:
new_arr[1][1]

In [7]:
# first row
new_arr[0]

array([15,  8,  7,  5])

In [58]:
# second last row
new_arr[-2]

array([15,  3,  9, 15])

In [10]:
# first element
print(new_arr[0, 0])

# last element
print(new_arr[-1, -1])

15
9


**Array slicing**

In [60]:
new_arr

array([[ 1,  5, 19,  8],
       [11,  5, 12,  1],
       [ 6,  8,  2,  9],
       [15,  3,  9, 15],
       [ 2,  4, 14,  6]])

In [64]:
new_arr[2: , 1: ]

array([[ 8,  2,  9],
       [ 3,  9, 15],
       [ 4, 14,  6]])

In [66]:
# third column only
new_arr[: , -1]

array([ 8,  1,  9, 15,  6])

In [68]:
new_arr[0,0] = 0

In [69]:
new_arr

array([[ 0,  5, 19,  8],
       [11,  5, 12,  1],
       [ 6,  8,  2,  9],
       [15,  3,  9, 15],
       [ 2,  4, 14,  6]])

In [72]:
# masking 
mask = new_arr > 10
print(mask)

[[False False  True False]
 [ True False  True False]
 [False False False False]
 [ True False False  True]
 [False False  True False]]


In [73]:
np.sum(mask)

6

In [75]:
# get all values greater than 10
new_arr[mask]

array([19, 11, 12, 15, 15, 14])

In [76]:
new_arr

array([[ 0,  5, 19,  8],
       [11,  5, 12,  1],
       [ 6,  8,  2,  9],
       [15,  3,  9, 15],
       [ 2,  4, 14,  6]])

In [78]:
new_arr[2: , 2:] = 0

In [79]:
new_arr

array([[ 0,  5, 19,  8],
       [11,  5, 12,  1],
       [ 6,  8,  0,  0],
       [15,  3,  0,  0],
       [ 2,  4,  0,  0]])

## Basic Operations in Arrays

In [80]:
a = np.array([10,20,30,40])
b = np.arange(1, 5)

In [81]:
a

array([10, 20, 30, 40])

In [82]:
b

array([1, 2, 3, 4])

In [83]:
a + b

array([11, 22, 33, 44])

In [84]:
a - b

array([ 9, 18, 27, 36])

In [85]:
a * b

array([ 10,  40,  90, 160])

In [None]:
b**2

In [86]:
# masking
a>15

array([False,  True,  True,  True])

In [87]:
np.log(b)

array([0.        , 0.69314718, 1.09861229, 1.38629436])

In [88]:
np.sin(a)

array([-0.54402111,  0.91294525, -0.98803162,  0.74511316])

**Matrix Product**

In [89]:
A = np.random.randint(0, 5, (3, 4))
B = np.random.randint(0, 5, (4, 2))

In [90]:
A

array([[3, 3, 3, 3],
       [0, 4, 3, 3],
       [0, 3, 0, 1]])

In [91]:
B

array([[4, 2],
       [0, 2],
       [4, 4],
       [3, 4]])

In [92]:
# Dot product
np.dot(A, B)

array([[33, 36],
       [21, 32],
       [ 3, 10]])

## More Operations on Arrays

In [93]:
A = np.arange(0, 24)

In [96]:
A

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [97]:
A = A.reshape(6,4)

In [100]:
A

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [101]:
np.sqrt(A)

array([[0.        , 1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974, 2.64575131],
       [2.82842712, 3.        , 3.16227766, 3.31662479],
       [3.46410162, 3.60555128, 3.74165739, 3.87298335],
       [4.        , 4.12310563, 4.24264069, 4.35889894],
       [4.47213595, 4.58257569, 4.69041576, 4.79583152]])

In [102]:
np.sum(A)

276

In [103]:
np.max(A)

23

In [104]:
np.mean(A)

11.5

In [105]:
np.std(A)

6.922186552431729

In [106]:
A

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [108]:
np.sum(A, axis=0)

array([60, 66, 72, 78])

In [109]:
np.mean(A, axis = 1)

array([ 1.5,  5.5,  9.5, 13.5, 17.5, 21.5])

## Shape Manipulation

In [15]:
A

NameError: name 'A' is not defined

In [112]:
A.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [113]:
A.reshape(8,3)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17],
       [18, 19, 20],
       [21, 22, 23]])

In [114]:
A.T

array([[ 0,  4,  8, 12, 16, 20],
       [ 1,  5,  9, 13, 17, 21],
       [ 2,  6, 10, 14, 18, 22],
       [ 3,  7, 11, 15, 19, 23]])

In [115]:
np.transpose(A)

array([[ 0,  4,  8, 12, 16, 20],
       [ 1,  5,  9, 13, 17, 21],
       [ 2,  6, 10, 14, 18, 22],
       [ 3,  7, 11, 15, 19, 23]])

**Stacking of arrays**
    - vstack
    - hstack

In [118]:
a = np.random.randint(0, 10, (2,2))
b = np.random.randint(0, 10, (2,2))

In [119]:
a

array([[6, 0],
       [8, 6]])

In [120]:
b

array([[9, 6],
       [5, 4]])

In [123]:
np.hstack((a,b))

array([[6, 0, 9, 6],
       [8, 6, 5, 4]])

In [126]:
np.vstack((a,b))

array([[6, 0],
       [8, 6],
       [9, 6],
       [5, 4]])

In [2]:
import numpy as np

## Broadcasting
    - First Rule of Numpy : 2 Arrays can performs opertions only when they have same shapes
    - broadcasting let two arrays of different shapes to do some operations.
        - A small array will repeat itself, and convert to the same shape as of another array.

In [18]:
A = np.random.randint(0, 10, size=(3,3))
a = np.array([[1,2,3]])

In [19]:
A

array([[4, 6, 1],
       [8, 5, 1],
       [9, 9, 4]])

In [20]:
a

array([[1, 2, 3]])

In [21]:
print(A.shape, a.shape)

(3, 3) (1, 3)


In [22]:
A + a

array([[ 5,  8,  4],
       [ 9,  7,  4],
       [10, 11,  7]])

In [23]:
a.T

array([[1],
       [2],
       [3]])

In [24]:
A + a.T

array([[ 5,  7,  2],
       [10,  7,  3],
       [12, 12,  7]])

In [25]:
A

array([[4, 6, 1],
       [8, 5, 1],
       [9, 9, 4]])

In [26]:
A + 4

array([[ 8, 10,  5],
       [12,  9,  5],
       [13, 13,  8]])

## Vectorization
    - performing operations directly on Arrays

In [161]:
p1 = np.array([1,2, 4, 7, -2, 9, 6, 0])
p2 = np.array([5,5, 8, 2, -9, 2, 1, 6])

In [163]:
s = 0
for i in range(8):
    s += (p2[i] - p1[i])**2
    
print(s**0.5)

15.0


In [165]:
# efficient
def distance(p1, p2):
    return np.sqrt(np.sum((p2-p1)**2))

In [168]:
distance(p1, p2)

15.0

## Challenges:
 
1/2 ∑ ( a - b )<sup>2</sup>

In [2]:
import numpy as np

In [3]:
a = np.random.randint(0, 10, (2,2))
b = np.random.randint(0, 10, (2,2))

In [4]:
a

array([[6, 6],
       [8, 9]])

In [5]:
b

array([[5, 8],
       [0, 1]])

In [6]:
0.5*np.mean(np.square(a-b))

16.625

In [7]:
import numpy as np

def mean_squared_difference(a, b):
    # Ensure both a and b are NumPy arrays
    a = np.array(a)
    b = np.array(b)

    # Calculate the squared differences
    squared_diff = (a - b) ** 2

    # Calculate the mean of squared differences
    mean_squared_diff = np.mean(squared_diff)

    return 0.5 * mean_squared_diff

# Example usage:

result = mean_squared_difference(a, b)
print(result)


16.625


In [2]:
#try yourself

In [3]:
import numpy as np

In [4]:
print("hello world")

hello world


In [5]:
import sklearn

In [6]:
i=45

In [7]:
print(7)

7


In [8]:
i

45

In [9]:
myarr=np.array([3,6,32,7])

In [10]:
myarr

array([ 3,  6, 32,  7])

In [15]:
myarr=np.array([[3,6,32,7]],np.int32)

In [16]:
myarr

array([[ 3,  6, 32,  7]])

In [17]:
myarr[0]

array([ 3,  6, 32,  7])

In [18]:
myarr[0,1]

6

In [19]:
myarr.shape

(1, 4)

In [20]:
myarr.dtype

dtype('int32')

In [21]:
myarr[0,1]=45

In [22]:
myarr

array([[ 3, 45, 32,  7]])

In [23]:
listarry=np.array([[1,2,3],[5,6,7],[0,3,4]])

In [24]:
listarry

array([[1, 2, 3],
       [5, 6, 7],
       [0, 3, 4]])

In [25]:
listarry.dtype

dtype('int32')

In [26]:
listarry.shape

(3, 3)

In [27]:
listarry.size

9

In [28]:
rng=np.arange(14)

In [29]:
rng

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13])

In [30]:
lspc=np.linspace(1,50,10)

In [31]:
lspc

array([ 1.        ,  6.44444444, 11.88888889, 17.33333333, 22.77777778,
       28.22222222, 33.66666667, 39.11111111, 44.55555556, 50.        ])

In [33]:
emp=np.empty((4,6))

In [34]:
emp

array([[6.23042070e-307, 4.67296746e-307, 1.69121096e-306,
        3.11522054e-307, 1.33511018e-306, 1.33511969e-306],
       [6.23037996e-307, 6.23053954e-307, 9.34609790e-307,
        8.45593934e-307, 9.34600963e-307, 8.45606157e-307],
       [6.23061763e-307, 1.78019082e-306, 8.01091099e-307,
        1.78021527e-306, 9.34603679e-307, 1.60220393e-306],
       [9.79111946e-307, 8.34444713e-308, 9.45699680e-308,
        1.11261774e-306, 1.29060871e-306, 8.34424766e-308]])

In [35]:
like=np.empty_like(lspc)

In [36]:
like

array([ 1.        ,  6.44444444, 11.88888889, 17.33333333, 22.77777778,
       28.22222222, 33.66666667, 39.11111111, 44.55555556, 50.        ])

In [37]:
ide=np.identity(45)

In [38]:
ide

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [39]:
ide.shape

(45, 45)

In [40]:
arr=np.arange(99)

In [41]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98])

In [42]:
   arr.reshape(3,33)

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
        32],
       [33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
        49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
        65],
       [66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
        82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
        98]])

In [43]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98])

In [44]:
arr.shape

(99,)

In [45]:
x=[[1,2,3],[4,5,6],[6,7,8]]

In [46]:
ar=np.array(x)

In [47]:
ar

array([[1, 2, 3],
       [4, 5, 6],
       [6, 7, 8]])

In [48]:
ar.sum(axis=0)

array([11, 14, 17])

In [49]:
ar.sum(axis=1)

array([ 6, 15, 21])

In [51]:
ar.T

array([[1, 4, 6],
       [2, 5, 7],
       [3, 6, 8]])

In [52]:
ar.flat

<numpy.flatiter at 0x1a3a3cf8ca0>

In [53]:
for i in ar.flat:
    print(i)

1
2
3
4
5
6
6
7
8


In [55]:
ar.ndim

2

In [56]:
ar.size

9

In [57]:
ar.nbytes

36

In [58]:
one=np.array([1,25,45,85])

In [60]:
one.argmax()

3

In [62]:
one.argmin()

0

In [63]:
one.argsort()

array([0, 1, 2, 3], dtype=int64)

In [64]:
ar

array([[1, 2, 3],
       [4, 5, 6],
       [6, 7, 8]])

In [65]:
ar.argsort()

array([[0, 1, 2],
       [0, 1, 2],
       [0, 1, 2]], dtype=int64)

In [66]:
ar.argmin()

0

In [67]:
ar.argmax()

8

In [68]:
ar.argmax(axis=0)

array([2, 2, 2], dtype=int64)

In [69]:
ar.argmax(axis=1)

array([2, 2, 2], dtype=int64)

In [70]:
ar.argsort(axis=1)

array([[0, 1, 2],
       [0, 1, 2],
       [0, 1, 2]], dtype=int64)

In [71]:
ar.ravel()

array([1, 2, 3, 4, 5, 6, 6, 7, 8])

In [72]:
ar.reshape(9,1)

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [6],
       [7],
       [8]])

In [73]:
ar2=np.array([[1,5,6],[8,7,6],[6,5,8]])

In [74]:
ar2

array([[1, 5, 6],
       [8, 7, 6],
       [6, 5, 8]])

In [75]:
ar+ar2

array([[ 2,  7,  9],
       [12, 12, 12],
       [12, 12, 16]])

In [76]:
ar*ar2

array([[ 1, 10, 18],
       [32, 35, 36],
       [36, 35, 64]])

In [77]:
np.sqrt(ar)

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974],
       [2.44948974, 2.64575131, 2.82842712]])

In [78]:
ar.sum()

42

In [79]:
ar.min()

1

In [80]:
ar.max()

8

In [81]:
np.where(ar>5)

(array([1, 2, 2, 2], dtype=int64), array([2, 0, 1, 2], dtype=int64))

In [82]:
type(np.where(ar>5))

tuple

In [83]:
np.count_nonzero(ar)

9

In [86]:
np.nonzero(ar)

(array([0, 0, 0, 1, 1, 2, 2, 2], dtype=int64),
 array([0, 1, 2, 0, 1, 0, 1, 2], dtype=int64))

In [87]:
ar[1,2]=0

In [88]:
import sys

In [89]:
char=[0,4,55,2]

In [91]:
np_ar=np.array(char)

In [93]:
sys.getsizeof(1)*len(char)

112

In [95]:
ar.tolist()

[[1, 2, 3], [4, 5, 0], [6, 7, 8]]

In [96]:
#pandas

In [1]:
import numpy as np
import pandas as pd

In [2]:
dict1={
    "name":['ravi','munna','rohit'],
    "marks":[92,45,85],
    "city":['patna','pune','delhi']
}

In [3]:
df=pd.DataFrame(dict1)

In [4]:
df

Unnamed: 0,name,marks,city
0,ravi,92,patna
1,munna,45,pune
2,rohit,85,delhi


In [5]:
df.to_csv('frnd.csv')

In [6]:
df.head()

Unnamed: 0,name,marks,city
0,ravi,92,patna
1,munna,45,pune
2,rohit,85,delhi


In [7]:
df.tail(2)

Unnamed: 0,name,marks,city
1,munna,45,pune
2,rohit,85,delhi


In [8]:
df.head(1)

Unnamed: 0,name,marks,city
0,ravi,92,patna


In [9]:
df.describe()

Unnamed: 0,marks
count,3.0
mean,74.0
std,25.357445
min,45.0
25%,65.0
50%,85.0
75%,88.5
max,92.0


In [11]:
#import csv file in python
#data =pd.read_csv("filename.csv")

In [12]:
df.index=["first","second","third"]

In [13]:
df

Unnamed: 0,name,marks,city
first,ravi,92,patna
second,munna,45,pune
third,rohit,85,delhi


In [15]:
 ser=pd.Series(np.random.rand(34))

In [16]:
type(ser)

pandas.core.series.Series

In [17]:
newdf=pd.DataFrame(np.random.rand(334,5),index=np.arange(334))

In [19]:
type(newdf)

pandas.core.frame.DataFrame

In [20]:
newdf

Unnamed: 0,0,1,2,3,4
0,0.626495,0.699568,0.473429,0.662405,0.714909
1,0.183353,0.143068,0.746902,0.052799,0.695320
2,0.478089,0.331875,0.617352,0.299370,0.272164
3,0.105705,0.595177,0.330982,0.774893,0.568271
4,0.778950,0.470175,0.030163,0.239881,0.849696
...,...,...,...,...,...
329,0.818372,0.760110,0.127540,0.242004,0.146053
330,0.973011,0.179187,0.955850,0.888794,0.989683
331,0.408924,0.895383,0.598797,0.125212,0.509502
332,0.774419,0.126727,0.437638,0.237778,0.268240


In [22]:
newdf.describe()

Unnamed: 0,0,1,2,3,4
count,334.0,334.0,334.0,334.0,334.0
mean,0.512856,0.484842,0.490641,0.492893,0.539393
std,0.289319,0.303397,0.281808,0.290142,0.284358
min,0.005206,0.001393,0.000159,0.002328,0.002555
25%,0.251517,0.192733,0.242599,0.224276,0.280923
50%,0.523183,0.492268,0.486771,0.48747,0.563203
75%,0.755185,0.749316,0.732755,0.760225,0.787366
max,0.992642,0.999185,0.996814,0.993715,0.992543


In [23]:
newdf.dtypes

0    float64
1    float64
2    float64
3    float64
4    float64
dtype: object

In [25]:
newdf.head()

Unnamed: 0,0,1,2,3,4
0,0.626495,0.699568,0.473429,0.662405,0.714909
1,0.183353,0.143068,0.746902,0.052799,0.69532
2,0.478089,0.331875,0.617352,0.29937,0.272164
3,0.105705,0.595177,0.330982,0.774893,0.568271
4,0.77895,0.470175,0.030163,0.239881,0.849696


In [26]:
newdf[0][0]="rrk"

In [27]:
newdf

Unnamed: 0,0,1,2,3,4
0,rrk,0.699568,0.473429,0.662405,0.714909
1,0.183353,0.143068,0.746902,0.052799,0.695320
2,0.478089,0.331875,0.617352,0.299370,0.272164
3,0.105705,0.595177,0.330982,0.774893,0.568271
4,0.77895,0.470175,0.030163,0.239881,0.849696
...,...,...,...,...,...
329,0.818372,0.760110,0.127540,0.242004,0.146053
330,0.973011,0.179187,0.955850,0.888794,0.989683
331,0.408924,0.895383,0.598797,0.125212,0.509502
332,0.774419,0.126727,0.437638,0.237778,0.268240


In [28]:
newdf.index

Int64Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
            ...
            324, 325, 326, 327, 328, 329, 330, 331, 332, 333],
           dtype='int64', length=334)

In [29]:
newdf.columns

RangeIndex(start=0, stop=5, step=1)

In [30]:
newdf.to_numpy()

array([['rrk', 0.699567521809314, 0.4734290247901465, 0.6624051816712921,
        0.7149087496641818],
       [0.18335322127079834, 0.1430681051055367, 0.746901618556984,
        0.052798986994949115, 0.6953201311450902],
       [0.47808917282138685, 0.33187473352814134, 0.6173517307121996,
        0.2993698322905488, 0.27216386293221706],
       ...,
       [0.4089242118091103, 0.895383452249195, 0.5987966463467758,
        0.12521190904485602, 0.509502317380058],
       [0.7744185985510181, 0.12672736722122113, 0.4376377758965906,
        0.23777823633847295, 0.2682400068727434],
       [0.9173808411032017, 0.6814138143404256, 0.04775490134254068,
        0.4399261861656075, 0.5183712694721619]], dtype=object)

In [31]:
newdf[0][0]=0.124

In [33]:
newdf

Unnamed: 0,0,1,2,3,4
0,0.124,0.699568,0.473429,0.662405,0.714909
1,0.183353,0.143068,0.746902,0.052799,0.695320
2,0.478089,0.331875,0.617352,0.299370,0.272164
3,0.105705,0.595177,0.330982,0.774893,0.568271
4,0.77895,0.470175,0.030163,0.239881,0.849696
...,...,...,...,...,...
329,0.818372,0.760110,0.127540,0.242004,0.146053
330,0.973011,0.179187,0.955850,0.888794,0.989683
331,0.408924,0.895383,0.598797,0.125212,0.509502
332,0.774419,0.126727,0.437638,0.237778,0.268240


In [34]:
newdf.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,324,325,326,327,328,329,330,331,332,333
0,0.124,0.183353,0.478089,0.105705,0.77895,0.051915,0.326609,0.661635,0.340006,0.141794,...,0.923856,0.754591,0.506475,0.783996,0.980046,0.818372,0.973011,0.408924,0.774419,0.917381
1,0.699568,0.143068,0.331875,0.595177,0.470175,0.613612,0.055752,0.845038,0.78226,0.140574,...,0.311273,0.384309,0.726397,0.205222,0.999185,0.76011,0.179187,0.895383,0.126727,0.681414
2,0.473429,0.746902,0.617352,0.330982,0.030163,0.552699,0.190835,0.714143,0.202623,0.904036,...,0.055547,0.034271,0.98122,0.225949,0.846017,0.12754,0.95585,0.598797,0.437638,0.047755
3,0.662405,0.052799,0.29937,0.774893,0.239881,0.536194,0.935811,0.940912,0.765807,0.126277,...,0.192455,0.460465,0.621213,0.836645,0.421787,0.242004,0.888794,0.125212,0.237778,0.439926
4,0.714909,0.69532,0.272164,0.568271,0.849696,0.872788,0.761242,0.662104,0.328767,0.707662,...,0.690587,0.213112,0.878778,0.503215,0.760906,0.146053,0.989683,0.509502,0.26824,0.518371


In [35]:
newdf.head()

Unnamed: 0,0,1,2,3,4
0,0.124,0.699568,0.473429,0.662405,0.714909
1,0.183353,0.143068,0.746902,0.052799,0.69532
2,0.478089,0.331875,0.617352,0.29937,0.272164
3,0.105705,0.595177,0.330982,0.774893,0.568271
4,0.77895,0.470175,0.030163,0.239881,0.849696


In [37]:
newdf.sort_index(axis=0,ascending=False)

Unnamed: 0,0,1,2,3,4
333,0.917381,0.681414,0.047755,0.439926,0.518371
332,0.774419,0.126727,0.437638,0.237778,0.268240
331,0.408924,0.895383,0.598797,0.125212,0.509502
330,0.973011,0.179187,0.955850,0.888794,0.989683
329,0.818372,0.760110,0.127540,0.242004,0.146053
...,...,...,...,...,...
4,0.77895,0.470175,0.030163,0.239881,0.849696
3,0.105705,0.595177,0.330982,0.774893,0.568271
2,0.478089,0.331875,0.617352,0.299370,0.272164
1,0.183353,0.143068,0.746902,0.052799,0.695320


In [38]:
newdf.head()

Unnamed: 0,0,1,2,3,4
0,0.124,0.699568,0.473429,0.662405,0.714909
1,0.183353,0.143068,0.746902,0.052799,0.69532
2,0.478089,0.331875,0.617352,0.29937,0.272164
3,0.105705,0.595177,0.330982,0.774893,0.568271
4,0.77895,0.470175,0.030163,0.239881,0.849696


In [39]:
newdf.sort_index(axis=1,ascending=False)

Unnamed: 0,4,3,2,1,0
0,0.714909,0.662405,0.473429,0.699568,0.124
1,0.695320,0.052799,0.746902,0.143068,0.183353
2,0.272164,0.299370,0.617352,0.331875,0.478089
3,0.568271,0.774893,0.330982,0.595177,0.105705
4,0.849696,0.239881,0.030163,0.470175,0.77895
...,...,...,...,...,...
329,0.146053,0.242004,0.127540,0.760110,0.818372
330,0.989683,0.888794,0.955850,0.179187,0.973011
331,0.509502,0.125212,0.598797,0.895383,0.408924
332,0.268240,0.237778,0.437638,0.126727,0.774419


In [40]:
newdf[0]

0         0.124
1      0.183353
2      0.478089
3      0.105705
4       0.77895
         ...   
329    0.818372
330    0.973011
331    0.408924
332    0.774419
333    0.917381
Name: 0, Length: 334, dtype: object

In [41]:
type(newdf[0])

pandas.core.series.Series

In [42]:
newdf2=newdf

In [43]:
newdf2[0][0]=123


In [45]:
newdf

Unnamed: 0,0,1,2,3,4
0,123,0.699568,0.473429,0.662405,0.714909
1,0.183353,0.143068,0.746902,0.052799,0.695320
2,0.478089,0.331875,0.617352,0.299370,0.272164
3,0.105705,0.595177,0.330982,0.774893,0.568271
4,0.77895,0.470175,0.030163,0.239881,0.849696
...,...,...,...,...,...
329,0.818372,0.760110,0.127540,0.242004,0.146053
330,0.973011,0.179187,0.955850,0.888794,0.989683
331,0.408924,0.895383,0.598797,0.125212,0.509502
332,0.774419,0.126727,0.437638,0.237778,0.268240


In [46]:
newdf2=newdf.copy()

In [47]:
newdf2[0][0]=456

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  newdf2[0][0]=456


In [48]:
newdf

Unnamed: 0,0,1,2,3,4
0,123,0.699568,0.473429,0.662405,0.714909
1,0.183353,0.143068,0.746902,0.052799,0.695320
2,0.478089,0.331875,0.617352,0.299370,0.272164
3,0.105705,0.595177,0.330982,0.774893,0.568271
4,0.77895,0.470175,0.030163,0.239881,0.849696
...,...,...,...,...,...
329,0.818372,0.760110,0.127540,0.242004,0.146053
330,0.973011,0.179187,0.955850,0.888794,0.989683
331,0.408924,0.895383,0.598797,0.125212,0.509502
332,0.774419,0.126727,0.437638,0.237778,0.268240


In [49]:
newdf.columns = list("abcde")

In [50]:
newdf

Unnamed: 0,a,b,c,d,e
0,123,0.699568,0.473429,0.662405,0.714909
1,0.183353,0.143068,0.746902,0.052799,0.695320
2,0.478089,0.331875,0.617352,0.299370,0.272164
3,0.105705,0.595177,0.330982,0.774893,0.568271
4,0.77895,0.470175,0.030163,0.239881,0.849696
...,...,...,...,...,...
329,0.818372,0.760110,0.127540,0.242004,0.146053
330,0.973011,0.179187,0.955850,0.888794,0.989683
331,0.408924,0.895383,0.598797,0.125212,0.509502
332,0.774419,0.126727,0.437638,0.237778,0.268240


In [56]:
newdf.loc[0,'a']=4562

In [57]:
newdf

Unnamed: 0,a,b,c,d,e
0,4562,0.699568,0.473429,0.662405,0.714909
1,0.183353,0.143068,0.746902,0.052799,0.695320
2,0.478089,0.331875,0.617352,0.299370,0.272164
3,0.105705,0.595177,0.330982,0.774893,0.568271
4,0.77895,0.470175,0.030163,0.239881,0.849696
...,...,...,...,...,...
329,0.818372,0.760110,0.127540,0.242004,0.146053
330,0.973011,0.179187,0.955850,0.888794,0.989683
331,0.408924,0.895383,0.598797,0.125212,0.509502
332,0.774419,0.126727,0.437638,0.237778,0.268240


In [58]:
newdf.loc[0,0]=462

In [59]:
newdf

Unnamed: 0,a,b,c,d,e,0
0,4562,0.699568,0.473429,0.662405,0.714909,462.0
1,0.183353,0.143068,0.746902,0.052799,0.695320,
2,0.478089,0.331875,0.617352,0.299370,0.272164,
3,0.105705,0.595177,0.330982,0.774893,0.568271,
4,0.77895,0.470175,0.030163,0.239881,0.849696,
...,...,...,...,...,...,...
329,0.818372,0.760110,0.127540,0.242004,0.146053,
330,0.973011,0.179187,0.955850,0.888794,0.989683,
331,0.408924,0.895383,0.598797,0.125212,0.509502,
332,0.774419,0.126727,0.437638,0.237778,0.268240,


In [62]:
newdf=newdf.drop(0,axis=1)

In [63]:
newdf

Unnamed: 0,a,b,c,d,e
0,4562,0.699568,0.473429,0.662405,0.714909
1,0.183353,0.143068,0.746902,0.052799,0.695320
2,0.478089,0.331875,0.617352,0.299370,0.272164
3,0.105705,0.595177,0.330982,0.774893,0.568271
4,0.77895,0.470175,0.030163,0.239881,0.849696
...,...,...,...,...,...
329,0.818372,0.760110,0.127540,0.242004,0.146053
330,0.973011,0.179187,0.955850,0.888794,0.989683
331,0.408924,0.895383,0.598797,0.125212,0.509502
332,0.774419,0.126727,0.437638,0.237778,0.268240


In [64]:
newdf.loc[[1,2],['c','d']]

Unnamed: 0,c,d
1,0.746902,0.052799
2,0.617352,0.29937


In [66]:
newdf.loc[[1,2],:]

Unnamed: 0,a,b,c,d,e
1,0.183353,0.143068,0.746902,0.052799,0.69532
2,0.478089,0.331875,0.617352,0.29937,0.272164


In [67]:
newdf.loc[(newdf['a']<0.3)]

Unnamed: 0,a,b,c,d,e
1,0.183353,0.143068,0.746902,0.052799,0.695320
3,0.105705,0.595177,0.330982,0.774893,0.568271
5,0.051915,0.613612,0.552699,0.536194,0.872788
9,0.141794,0.140574,0.904036,0.126277,0.707662
11,0.160067,0.613769,0.182113,0.281039,0.720879
...,...,...,...,...,...
311,0.133542,0.184872,0.701894,0.873730,0.142364
312,0.245728,0.108647,0.478726,0.505032,0.946877
314,0.021705,0.299785,0.004103,0.144477,0.516393
317,0.240675,0.533145,0.219422,0.844830,0.094249


In [69]:
newdf.loc[(newdf['a']<0.3)&(newdf['c']>0.1)]

Unnamed: 0,a,b,c,d,e
1,0.183353,0.143068,0.746902,0.052799,0.695320
3,0.105705,0.595177,0.330982,0.774893,0.568271
5,0.051915,0.613612,0.552699,0.536194,0.872788
9,0.141794,0.140574,0.904036,0.126277,0.707662
11,0.160067,0.613769,0.182113,0.281039,0.720879
...,...,...,...,...,...
309,0.265586,0.213050,0.217616,0.273563,0.481634
311,0.133542,0.184872,0.701894,0.873730,0.142364
312,0.245728,0.108647,0.478726,0.505032,0.946877
317,0.240675,0.533145,0.219422,0.844830,0.094249


In [70]:
newdf.head(2)

Unnamed: 0,a,b,c,d,e
0,4562.0,0.699568,0.473429,0.662405,0.714909
1,0.183353,0.143068,0.746902,0.052799,0.69532


In [71]:
newdf.iloc[0,4]

0.7149087496641818

In [72]:
newdf.iloc[[0,5],[1,2]]

Unnamed: 0,b,c
0,0.699568,0.473429
5,0.613612,0.552699


In [73]:
newdf.head(3)

Unnamed: 0,a,b,c,d,e
0,4562.0,0.699568,0.473429,0.662405,0.714909
1,0.183353,0.143068,0.746902,0.052799,0.69532
2,0.478089,0.331875,0.617352,0.29937,0.272164


In [74]:
newdf.drop([0])

Unnamed: 0,a,b,c,d,e
1,0.183353,0.143068,0.746902,0.052799,0.695320
2,0.478089,0.331875,0.617352,0.299370,0.272164
3,0.105705,0.595177,0.330982,0.774893,0.568271
4,0.77895,0.470175,0.030163,0.239881,0.849696
5,0.051915,0.613612,0.552699,0.536194,0.872788
...,...,...,...,...,...
329,0.818372,0.760110,0.127540,0.242004,0.146053
330,0.973011,0.179187,0.955850,0.888794,0.989683
331,0.408924,0.895383,0.598797,0.125212,0.509502
332,0.774419,0.126727,0.437638,0.237778,0.268240


In [75]:
newdf.drop(['a'],axis=1)

Unnamed: 0,b,c,d,e
0,0.699568,0.473429,0.662405,0.714909
1,0.143068,0.746902,0.052799,0.695320
2,0.331875,0.617352,0.299370,0.272164
3,0.595177,0.330982,0.774893,0.568271
4,0.470175,0.030163,0.239881,0.849696
...,...,...,...,...
329,0.760110,0.127540,0.242004,0.146053
330,0.179187,0.955850,0.888794,0.989683
331,0.895383,0.598797,0.125212,0.509502
332,0.126727,0.437638,0.237778,0.268240


In [76]:
newdf

Unnamed: 0,a,b,c,d,e
0,4562,0.699568,0.473429,0.662405,0.714909
1,0.183353,0.143068,0.746902,0.052799,0.695320
2,0.478089,0.331875,0.617352,0.299370,0.272164
3,0.105705,0.595177,0.330982,0.774893,0.568271
4,0.77895,0.470175,0.030163,0.239881,0.849696
...,...,...,...,...,...
329,0.818372,0.760110,0.127540,0.242004,0.146053
330,0.973011,0.179187,0.955850,0.888794,0.989683
331,0.408924,0.895383,0.598797,0.125212,0.509502
332,0.774419,0.126727,0.437638,0.237778,0.268240


In [77]:
newdf.drop(['a','d'],axis=1,inplace=True)

In [78]:
newdf

Unnamed: 0,b,c,e
0,0.699568,0.473429,0.714909
1,0.143068,0.746902,0.695320
2,0.331875,0.617352,0.272164
3,0.595177,0.330982,0.568271
4,0.470175,0.030163,0.849696
...,...,...,...
329,0.760110,0.127540,0.146053
330,0.179187,0.955850,0.989683
331,0.895383,0.598797,0.509502
332,0.126727,0.437638,0.268240


In [79]:
newdf.reset_index()

Unnamed: 0,index,b,c,e
0,0,0.699568,0.473429,0.714909
1,1,0.143068,0.746902,0.695320
2,2,0.331875,0.617352,0.272164
3,3,0.595177,0.330982,0.568271
4,4,0.470175,0.030163,0.849696
...,...,...,...,...
329,329,0.760110,0.127540,0.146053
330,330,0.179187,0.955850,0.989683
331,331,0.895383,0.598797,0.509502
332,332,0.126727,0.437638,0.268240


In [80]:
newdf.reset_index(drop=True,inplace=True)

In [82]:
newdf.head()

Unnamed: 0,b,c,e
0,0.699568,0.473429,0.714909
1,0.143068,0.746902,0.69532
2,0.331875,0.617352,0.272164
3,0.595177,0.330982,0.568271
4,0.470175,0.030163,0.849696
