In [1]:
import numpy as np
import pandas as pd

In [2]:
s = pd.Series([1, 2, 3, None])

In [3]:
s

0    1.0
1    2.0
2    3.0
3    NaN
dtype: float64

In [4]:
s.dtype #float64

dtype('float64')

In [5]:
#we can also make it with pd.Int64dtype
s = pd.Series([1, 2, 3, None], dtype=pd.Int64Dtype())

In [6]:
s

0       1
1       2
2       3
3    <NA>
dtype: Int64

In [7]:
s.isna()

0    False
1    False
2    False
3     True
dtype: bool

In [8]:
s.dtype

Int64Dtype()

In [9]:
#<NA> is the pandas sentinel value
s[3]

<NA>

In [10]:
s[3] is pd.NA

True

In [11]:
#we could also use 'Int64' as shorthand instead of pd.Int64Dtype()
s = pd.Series([1, 2, 3, None], dtype='Int64')

In [12]:
s

0       1
1       2
2       3
3    <NA>
dtype: Int64

In [13]:
#pandas also has a string dtype that does not use numpy. We must
#have pyarrow installed for it to work
s = pd.Series(['one', 'two', None, 'three'],
             dtype=pd.StringDtype())

In [14]:
s

0      one
1      two
2     <NA>
3    three
dtype: string

In [15]:
#pd.StringDtype is often more efficient than numpy
#and pd.CategoricalDtype is important and will be addressed later
#we can pass extension dtypes to a DataFrame constructor
df = pd.DataFrame({'A': [1, 2, None, 4],
                  'B': ['one', 'two', 'three', None],
                  'C': [False, None, False, True]})

In [16]:
df

Unnamed: 0,A,B,C
0,1.0,one,False
1,2.0,two,
2,,three,False
3,4.0,,True


In [17]:
df['A'] = df['A'].astype('Int64')
df['A']

0       1
1       2
2    <NA>
3       4
Name: A, dtype: Int64

In [18]:
df['B'] = df['B'].astype('string')
df['B']

0      one
1      two
2    three
3     <NA>
Name: B, dtype: string

In [19]:
df['C'] = df['C'].astype('boolean')
df['C']

0    False
1     <NA>
2    False
3     True
Name: C, dtype: boolean

In [20]:
df2 = pd.DataFrame({'A': [1, 2, None, 4],
                  'B': ['one', 'two', 'three', None],
                  'C': [False, None, False, True]})

In [21]:
df2['A'] = df2['A'].astype(pd.Int64Dtype())
df2['A']

0       1
1       2
2    <NA>
3       4
Name: A, dtype: Int64

In [24]:
df2['B'] = df2['B'].astype(pd.StringDtype())
df2['B']

0      one
1      two
2    three
3     <NA>
Name: B, dtype: string

In [25]:
df2['C'] = df2['C'].astype(pd.BooleanDtype())
df2['C']

0    False
1     <NA>
2    False
3     True
Name: C, dtype: boolean