# Numpy vs Pandas
1. Pandas has a numpy core.
2. Extra structure and tools, but sometimes you have to strip it away

In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [6]:
data = df.to_numpy()

# above is similar to this below deprecated one
data = df.values
data

array([[63.,  1.,  3., ...,  0.,  1.,  1.],
       [37.,  1.,  2., ...,  0.,  2.,  1.],
       [41.,  0.,  1., ...,  0.,  2.,  1.],
       ...,
       [68.,  1.,  0., ...,  2.,  3.,  0.],
       [57.,  1.,  0., ...,  1.,  3.,  0.],
       [57.,  0.,  1., ...,  1.,  2.,  0.]])

In [7]:
print(data.dtype, data)

float64 [[63.  1.  3. ...  0.  1.  1.]
 [37.  1.  2. ...  0.  2.  1.]
 [41.  0.  1. ...  0.  2.  1.]
 ...
 [68.  1.  0. ...  2.  3.  0.]
 [57.  1.  0. ...  1.  3.  0.]
 [57.  0.  1. ...  1.  2.  0.]]


In [10]:
data[0, 0]

np.float64(63.0)

In [18]:
df2 = df[['age', 'sex', 'cp']]
data2 = df2.to_numpy().copy()
data2[0, 0] = 100
df2.head()

Unnamed: 0,age,sex,cp
0,63,1,3
1,37,1,2
2,41,0,1
3,56,1,1
4,57,0,0


## another way to change the cell

In [19]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [22]:
df.age.to_numpy()[0] = 100
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,100,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0


# mean

In [25]:
print(df.age.mean(), df.age.to_numpy().mean())

54.48844884488449 54.48844884488449


In [28]:
df.age.quantile(0.5)

np.float64(55.0)

In [30]:
df.age.to_numpy().reshape((3, -1))

array([[100,  37,  41,  56,  57,  57,  56,  44,  52,  57,  54,  48,  49,
         64,  58,  50,  58,  66,  43,  69,  59,  44,  42,  61,  40,  71,
         59,  51,  65,  53,  41,  65,  44,  54,  51,  46,  54,  54,  65,
         65,  51,  48,  45,  53,  39,  52,  44,  47,  53,  53,  51,  66,
         62,  44,  63,  52,  48,  45,  34,  57,  71,  54,  52,  41,  58,
         35,  51,  45,  44,  62,  54,  51,  29,  51,  43,  55,  51,  59,
         52,  58,  41,  45,  60,  52,  42,  67,  68,  46,  54,  58,  48,
         57,  52,  54,  45,  53,  62,  52,  43,  53,  42],
       [ 59,  63,  42,  50,  68,  69,  45,  50,  50,  64,  57,  64,  43,
         55,  37,  41,  56,  46,  46,  64,  59,  41,  54,  39,  34,  47,
         67,  52,  74,  54,  49,  42,  41,  41,  49,  60,  62,  57,  64,
         51,  43,  42,  67,  76,  70,  44,  60,  44,  42,  66,  71,  64,
         66,  39,  58,  47,  35,  58,  56,  56,  55,  41,  38,  38,  67,
         67,  62,  63,  53,  56,  48,  58,  58,  60,  40,  60,  6

# Recap:
- Work with pandas as much as you can, more functionality
- Sometimes you need to get the actual array, and use to_numpy()