In [17]:
import pandas as pd
import numpy as np

# Series.memory_usage(index=True, deep=False)

In [3]:
s = pd.Series(range(3))
s

0    0
1    1
2    2
dtype: int64

In [4]:
s.memory_usage()

152

In [5]:
s.memory_usage(index=False)

24

In [9]:
s = pd.Series(["a", "b"])
s

0    a
1    b
dtype: object

In [10]:
s.values

array(['a', 'b'], dtype=object)

In [7]:
s.memory_usage()

144

In [8]:
s.memory_usage(deep=True) # The memory footprint of object values is ignored by default:

244

# property Series.T

In [11]:
s = pd.Series(["a", "b"])
s

0    a
1    b
dtype: object

In [12]:
s.T

0    a
1    b
dtype: object

In [14]:
s.shape

(2,)

In [15]:
s.dtypes

dtype('O')

# Series.convert_dtypes

In [18]:
df = pd.DataFrame(
    {
        "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
        "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
        "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
        "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")),
        "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")),
        "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")),
    }
)
df

Unnamed: 0,a,b,c,d,e,f
0,1,x,True,h,10.0,
1,2,y,False,i,,100.5
2,3,z,,,20.0,200.0


In [19]:
df.dtypes

a      int32
b     object
c     object
d     object
e    float64
f    float64
dtype: object

In [23]:
df.memory_usage(deep=True)

Index    128
a         12
b        174
c        100
d        148
e         24
f         24
dtype: int64

In [21]:
dfn = df.convert_dtypes()
dfn

Unnamed: 0,a,b,c,d,e,f
0,1,x,True,h,10.0,
1,2,y,False,i,,100.5
2,3,z,,,20.0,200.0


In [22]:
dfn.dtypes

a      Int32
b     string
c    boolean
d     string
e      Int64
f    Float64
dtype: object

In [24]:
dfn.memory_usage(deep=True)

Index    128
a         15
b        174
c          6
d        156
e         27
f         27
dtype: int64

In [25]:
s = pd.Series(["a", "b", np.nan])
s

0      a
1      b
2    NaN
dtype: object

In [26]:
s.convert_dtypes()

0       a
1       b
2    <NA>
dtype: string

In [27]:
help(pd.NA)

Help on NAType in module pandas._libs.missing object:

class NAType(C_NAType)
 |  NAType(*args, **kwargs)
 |  
 |  NA ("not available") missing value indicator.
 |  
 |  
 |  
 |  .. versionadded:: 1.0.0
 |  
 |  The NA singleton is a missing value indicator defined by pandas. It is
 |  used in certain new extension dtypes (currently the "string" dtype).
 |  
 |  Method resolution order:
 |      NAType
 |      C_NAType
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __abs__(self)
 |  
 |  __add__(self, other)
 |  
 |  __and__(self, other)
 |  
 |  __array_ufunc__(self, ufunc, method, *inputs, **kwargs)
 |  
 |  __bool__(self)
 |  
 |  __divmod__(self, other)
 |  
 |  __eq__(self, other)
 |  
 |  __floordiv__(self, other)
 |  
 |  __format__(self, format_spec) -> 'unicode'
 |  
 |  __ge__(self, other)
 |  
 |  __gt__(self, other)
 |  
 |  __hash__(self)
 |  
 |  __invert__(self)
 |  
 |  __le__(self, other)
 |  
 |  __lt__(self, other)
 |  
 |  __matmul__(self, other)
 |

In [29]:
df = pd.DataFrame({"A": ["a", 1, 2, 3]})
df

Unnamed: 0,A
0,a
1,1
2,2
3,3


In [30]:
df.dtypes

A    object
dtype: object

In [34]:
df.infer_objects().dtypes

A    object
dtype: object

In [31]:
df_1 = df.iloc[1:]
df_1

Unnamed: 0,A
1,1
2,2
3,3


In [32]:
df_1.dtypes

A    object
dtype: object

In [35]:
df_1.infer_objects().dtypes

A    int64
dtype: object

In [36]:
s = pd.Series(['A', 'B', 'C'])
for index, value in s.items():
    print(f"Index : {index}, Value : {value}")

Index : 0, Value : A
Index : 1, Value : B
Index : 2, Value : C


In [38]:
d = {'num_legs': [4, 4, 2, 2],
     'num_wings': [0, 0, 2, 2],
     'class': ['mammal', 'mammal', 'mammal', 'bird'],
     'animal': ['cat', 'dog', 'bat', 'penguin'],
     'locomotion': ['walks', 'walks', 'flies', 'walks']}
df = pd.DataFrame(data=d)
df

Unnamed: 0,num_legs,num_wings,class,animal,locomotion
0,4,0,mammal,cat,walks
1,4,0,mammal,dog,walks
2,2,2,mammal,bat,flies
3,2,2,bird,penguin,walks


In [39]:
df = df.set_index(['class', 'animal', 'locomotion'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,num_legs,num_wings
class,animal,locomotion,Unnamed: 3_level_1,Unnamed: 4_level_1
mammal,cat,walks,4,0
mammal,dog,walks,4,0
mammal,bat,flies,2,2
bird,penguin,walks,2,2


In [42]:
df.index

MultiIndex([('mammal',     'cat', 'walks'),
            ('mammal',     'dog', 'walks'),
            ('mammal',     'bat', 'flies'),
            (  'bird', 'penguin', 'walks')],
           names=['class', 'animal', 'locomotion'])

In [43]:
# This method takes a key argument to select data at a particular level of a MultiIndex.
df.xs('mammal')

Unnamed: 0_level_0,Unnamed: 1_level_0,num_legs,num_wings
animal,locomotion,Unnamed: 2_level_1,Unnamed: 3_level_1
cat,walks,4,0
dog,walks,4,0
bat,flies,2,2


In [44]:
df.xs('bird')

Unnamed: 0_level_0,Unnamed: 1_level_0,num_legs,num_wings
animal,locomotion,Unnamed: 2_level_1,Unnamed: 3_level_1
penguin,walks,2,2


In [47]:
df.xs(('mammal', 'dog'))

  df.xs(('mammal', 'dog'))


Unnamed: 0_level_0,num_legs,num_wings
locomotion,Unnamed: 1_level_1,Unnamed: 2_level_1
walks,4,0


In [48]:
df.xs('cat', level=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,num_legs,num_wings
class,locomotion,Unnamed: 2_level_1,Unnamed: 3_level_1
mammal,walks,4,0


In [49]:
df.xs(('bird', 'walks'),
      level=[0, 'locomotion'])

Unnamed: 0_level_0,num_legs,num_wings
animal,Unnamed: 1_level_1,Unnamed: 2_level_1
penguin,2,2


In [50]:
df.xs('num_wings', axis=1)

class   animal   locomotion
mammal  cat      walks         0
        dog      walks         0
        bat      flies         2
bird    penguin  walks         2
Name: num_wings, dtype: int64

In [54]:
ser = pd.Series([390., 350., 30., 20.],
                index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
ser

Falcon    390.0
Falcon    350.0
Parrot     30.0
Parrot     20.0
Name: Max Speed, dtype: float64

In [55]:
ser.groupby(["a", "b", "a", "b"]).mean()

a    210.0
b    185.0
Name: Max Speed, dtype: float64

In [56]:
ser.groupby(level=0).mean()

Falcon    370.0
Parrot     25.0
Name: Max Speed, dtype: float64

In [57]:
arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
          ['Captive', 'Wild', 'Captive', 'Wild']]
index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))

ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
ser

Animal  Type   
Falcon  Captive    390.0
        Wild       350.0
Parrot  Captive     30.0
        Wild        20.0
Name: Max Speed, dtype: float64

In [58]:
ser.groupby(level=0).mean()

Animal
Falcon    370.0
Parrot     25.0
Name: Max Speed, dtype: float64

In [59]:
ser.groupby(level="Type").mean()

Type
Captive    210.0
Wild       185.0
Name: Max Speed, dtype: float64

In [60]:
ser = pd.Series([1, 2, 3, 3], index=["a", 'a', 'b', np.nan])
ser.groupby(level=0).sum()

a    3
b    3
dtype: int64

In [61]:
ser.groupby(level=0, dropna=False).sum()

a      3
b      3
NaN    3
dtype: int64

In [62]:
arrays = ['Falcon', 'Falcon', 'Parrot', 'Parrot']
ser = pd.Series([390., 350., 30., 20.], index=arrays, name="Max Speed")
ser.groupby(["a", "b", "a", np.nan]).mean()

a    210.0
b    350.0
Name: Max Speed, dtype: float64

In [63]:
ser.groupby(["a", "b", "a", np.nan], dropna=False).mean()

a      210.0
b      350.0
NaN     20.0
Name: Max Speed, dtype: float64