# Agenda

1. Extension dtypes
2. Files
    - CSV
    - JSON
    - Excel
    - Other formats

In [2]:
import numpy as np
a = np.array([10, 20, 30, 40, 50])
a

array([10, 20, 30, 40, 50])

In [3]:
a.dtype

dtype('int64')

In [4]:
a[2] = 2345
a

array([  10,   20, 2345,   40,   50])

In [5]:
a[2] = 23.45
a

array([10, 20, 23, 40, 50])

In [6]:
a[3] = '123'
a

array([ 10,  20,  23, 123,  50])

In [7]:
a[4] = np.nan

ValueError: cannot convert float NaN to integer

In [9]:
import pandas as pd
from pandas import Series, DataFrame

In [10]:
s = Series([10, 20, 30, 40, 50])
s

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [11]:
s.loc[2] = 2345
s

0      10
1      20
2    2345
3      40
4      50
dtype: int64

In [12]:
s.loc[2] = 23.45


In [13]:
s

0    10.00
1    20.00
2    23.45
3    40.00
4    50.00
dtype: float64

In [14]:
s = Series([10, 20, 30, 40, 50])
s.loc[2] = np.nan

In [15]:
s

0    10.0
1    20.0
2     NaN
3    40.0
4    50.0
dtype: float64

In [16]:
s = Series('this is a test'.split())
s

0    this
1      is
2       a
3    test
dtype: object

In [17]:
s.loc[2] = np.nan

In [18]:
s

0    this
1      is
2     NaN
3    test
dtype: object

In [19]:
s.str.len()

0    4.0
1    2.0
2    NaN
3    4.0
dtype: float64

In [20]:
s.str.get(0)

0      t
1      i
2    NaN
3      t
dtype: object

In [21]:
s = Series([10, 20, 30, 40, 50],
          dtype=pd.Int64Dtype())

In [22]:
s

0    10
1    20
2    30
3    40
4    50
dtype: Int64

In [23]:
s.loc[2] = np.nan

In [24]:
s

0      10
1      20
2    <NA>
3      40
4      50
dtype: Int64

In [25]:
np.nan

nan

In [29]:
pd.NA

<NA>

In [31]:
df = DataFrame({'a':[10, 20, 30],
               'b':[11.1, 22.2, 33.3],
               'c':'hello out there'.split()})
df

Unnamed: 0,a,b,c
0,10,11.1,hello
1,20,22.2,out
2,30,33.3,there


In [32]:
df.dtypes

a      int64
b    float64
c     object
dtype: object

In [33]:
df.convert_dtypes()

Unnamed: 0,a,b,c
0,10,11.1,hello
1,20,22.2,out
2,30,33.3,there


In [34]:
df.convert_dtypes().dtypes

a             Int64
b           Float64
c    string[python]
dtype: object

In [35]:
df = df.convert_dtypes()

In [36]:
df.loc[2] = [pd.NA, pd.NA, pd.NA]

In [37]:
df

Unnamed: 0,a,b,c
0,10.0,11.1,hello
1,20.0,22.2,out
2,,,


In [38]:
df.dtypes

a             Int64
b           Float64
c    string[python]
dtype: object

In [39]:
df = DataFrame({'a':[10, 20, 30],
               'b':[11.1, 22.2, 33.3],
               'c':'hello out there'.split()})
df = df.convert_dtypes()

In [40]:
df['b'] = df['b'].astype(np.float64)
df.dtypes

a             Int64
b           float64
c    string[python]
dtype: object

In [41]:
df.loc[0, 'a'] = 12.34

TypeError: Invalid value '12.34' for dtype Int64

In [50]:
%xmode Minimal

Exception reporting mode: Minimal


In [51]:
df.loc[0, 'a'] = 12.34

TypeError: Invalid value '12.34' for dtype Int64

In [53]:
%tb Plain

TypeError: Invalid value '12.34' for dtype Int64

In [54]:
df

Unnamed: 0,a,b,c
0,10,11.1,hello
1,20,22.2,out
2,30,33.3,there


In [55]:
df.to_clipboard()

In [56]:
df.to_dict()

{'a': {0: 10, 1: 20, 2: 30},
 'b': {0: 11.1, 1: 22.2, 2: 33.3},
 'c': {0: 'hello', 1: 'out', 2: 'there'}}

In [57]:
df

Unnamed: 0,a,b,c
0,10,11.1,hello
1,20,22.2,out
2,30,33.3,there


In [58]:
df.loc[3] = [40, 44.4, 'hi']

In [59]:
df

Unnamed: 0,a,b,c
0,10,11.1,hello
1,20,22.2,out
2,30,33.3,there
3,40,44.4,hi


In [60]:
df.index = [0, 1, 2, 2]

In [61]:
df

Unnamed: 0,a,b,c
0,10,11.1,hello
1,20,22.2,out
2,30,33.3,there
2,40,44.4,hi


In [62]:
df.to_dict()

{'a': {0: 10, 1: 20, 2: 40},
 'b': {0: 11.1, 1: 22.2, 2: 44.4},
 'c': {0: 'hello', 1: 'out', 2: 'hi'}}