In [1]:
import pandas as pd
pd.__version__

'0.24.0.dev0+763.g6030452c6'

### Why do we care?

In [2]:
s = Series([1, 2, 3])
s

0    1
1    2
2    3
dtype: int64

In [3]:
s.reindex([1, 2, 3])

1    2.0
2    3.0
3    NaN
dtype: float64

In [4]:
s = Series([1, 2, 3], dtype='Int64')
s

0    1
1    2
2    3
dtype: Int64

In [5]:
rs = s.reindex([1, 2, 3])
rs

1      2
2      3
3    NaN
dtype: Int64

### Operations

In [6]:
rs.sum()

5

In [7]:
df = pd.DataFrame({'Int': s, 'A': [1, 2, 1]})
df

Unnamed: 0,Int,A
0,1,1
1,2,2
2,3,1


In [8]:
df.dtypes

Int    Int64
A      int64
dtype: object

Grouping

In [9]:
grouped = df.groupby('A').Int.sum()
grouped

A
1    4
2    2
Name: Int, dtype: int64

In [10]:
grouped.index

Int64Index([1, 2], dtype='int64', name='A')

Is this right?

Shouldn't the dtype of the ``grouped`` be ``Int64`` ?

this is a limitation of the current interface
we don't have an ExtensionArray Index (yet)

In [11]:
df.groupby('Int').A.sum().index

Index([1, 2, 3], dtype='object', name='Int')

In [12]:
df + 1

Unnamed: 0,Int,A
0,2,2
1,3,3
2,4,2


In [13]:
(df + 1).dtypes

Int    Int64
A      int64
dtype: object

In [14]:
df.loc[2] = np.nan

In [15]:
df

Unnamed: 0,Int,A
0,1.0,1.0
1,2.0,2.0
2,,


In [16]:
df.fillna(0)

Unnamed: 0,Int,A
0,1,1.0
1,2,2.0
2,0,0.0


In [17]:
df.fillna(0).dtypes

Int      Int64
A      float64
dtype: object

### Indexing

In [18]:
df[df.Int.isin([2])]

Unnamed: 0,Int,A
1,2,2.0


In [19]:
df.iloc[1:3]

Unnamed: 0,Int,A
1,2.0,2.0
2,,


In [20]:
df.iloc[1:3].dtypes

Int      Int64
A      float64
dtype: object

### Implementation

In [21]:
s = df['Int']
s

0      1
1      2
2    NaN
Name: Int, dtype: Int64

In [35]:
s.values

IntegerArray([1, 2, nan], dtype='Int64')

In [23]:
s.values._data

array([1, 2, 1])

In [24]:
s.values._mask

array([False, False,  True])

In [25]:
s.nbytes

27

In [26]:
s.values._data.nbytes

24

In [27]:
s.values._mask.nbytes

3

### Interface

In [28]:
s.values.dropna()

IntegerArray([1, 2], dtype='Int64')

In [29]:
s.values.factorize()

(array([ 0,  1, -1]), IntegerArray([1, 2], dtype='Int64'))

In [30]:
s.values.isna()

array([False, False,  True])

In [31]:
s.values.take([0, 1, 2])

IntegerArray([1, 2, nan], dtype='Int64')

In [32]:
s.values.unique()

IntegerArray([1, 2, nan], dtype='Int64')