#### [pandas.pydata.org](https://pandas.pydata.org/pandas-docs/stable/index.html)

In [1]:
import pandas as pd
import numpy as np
from numpy import nan as NaN
from IPython.display import Image, SVG

# Intro

### Series(data, index, name)

In [2]:
# Docstring:
pd.Series?

[1;31mInit signature:[0m
[0mpd[0m[1;33m.[0m[0mSeries[0m[1;33m([0m[1;33m
[0m    [0mdata[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mindex[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mdtype[0m[1;33m:[0m [1;34m'Dtype | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mname[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mcopy[0m[1;33m:[0m [1;34m'bool | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mfastpath[0m[1;33m:[0m [1;34m'bool'[0m [1;33m=[0m [1;32mFalse[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m [1;33m->[0m [1;34m'None'[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m     
One-dimensional ndarray with axis labels (including time series).

Labels need not be unique but must be a hashable type. The object
supports both integer- and label-based indexing and provides a host of
methods for performing operations involving the index. Statistical
methods from ndarray hav

In [3]:
# Source:
#pd.Series??   

In [4]:
pd.Series()

Series([], dtype: object)

In [5]:
pd.Series().empty

True

In [6]:
pd.Series(5)

0    5
dtype: int64

In [7]:
pd.Series('ali')

0    ali
dtype: object

In [8]:
pd.Series([12, 8, 19])

0    12
1     8
2    19
dtype: int64

In [9]:
pd.Series(['ali', 'asma', 'fateme'])

0       ali
1      asma
2    fateme
dtype: object

In [10]:
pd.Series(data=['ali', 'asma', 'fateme'])

0       ali
1      asma
2    fateme
dtype: object

In [11]:
#---------------

In [12]:
s = pd.Series([12, 8, 19]); s

0    12
1     8
2    19
dtype: int64

In [13]:
s.values

array([12,  8, 19], dtype=int64)

In [14]:
s.index

RangeIndex(start=0, stop=3, step=1)

In [15]:
s = pd.Series(['ali', 'asma', 'fateme']); s

0       ali
1      asma
2    fateme
dtype: object

In [16]:
s.values

array(['ali', 'asma', 'fateme'], dtype=object)

In [17]:
s.index

RangeIndex(start=0, stop=3, step=1)

In [18]:
#---------------

In [19]:
s = pd.Series(data=[8, 12, 17, 19], index=['ali', 'taha', 'sara', 'omid'], name='myser'); s

ali      8
taha    12
sara    17
omid    19
Name: myser, dtype: int64

In [20]:
s.values

array([ 8, 12, 17, 19], dtype=int64)

In [21]:
s.index

Index(['ali', 'taha', 'sara', 'omid'], dtype='object')

In [22]:
len(s)

4

In [23]:
s.size

4

In [24]:
s.shape

(4,)

In [25]:
s.count()

4

In [26]:
s.is_monotonic_increasing

True

In [27]:
s.is_monotonic_decreasing

False

In [28]:
s.describe()

count     4.000000
mean     14.000000
std       4.966555
min       8.000000
25%      11.000000
50%      14.500000
75%      17.500000
max      19.000000
Name: myser, dtype: float64

In [29]:
s.name = 'first serie'; s

ali      8
taha    12
sara    17
omid    19
Name: first serie, dtype: int64

In [30]:
s.index.name = 'Index'; s

Index
ali      8
taha    12
sara    17
omid    19
Name: first serie, dtype: int64

In [31]:
s.index.name = 'f.name'; s

f.name
ali      8
taha    12
sara    17
omid    19
Name: first serie, dtype: int64

In [32]:
s.isin([8])

f.name
ali      True
taha    False
sara    False
omid    False
Name: first serie, dtype: bool

In [33]:
s.isin([12, 19])

f.name
ali     False
taha     True
sara    False
omid     True
Name: first serie, dtype: bool

In [34]:
8 in s

False

In [35]:
8 in s.values

True

In [36]:
'ali' in s

True

In [37]:
s.dtype

dtype('int64')

In [38]:
s['taha'] = None
display(s, s.dtype)

f.name
ali      8.0
taha     NaN
sara    17.0
omid    19.0
Name: first serie, dtype: float64

dtype('float64')

### same index & value

In [39]:
# same value

In [40]:
s = pd.Series(data=['ali', 'taha', 'ali', 'sara', 'omid', 'ali']); s

0     ali
1    taha
2     ali
3    sara
4    omid
5     ali
dtype: object

In [41]:
s == 'ali'

0     True
1    False
2     True
3    False
4    False
5     True
dtype: bool

In [42]:
s[s == 'ali']

0    ali
2    ali
5    ali
dtype: object

In [43]:
s[[True, False,  True, False, False,  True]]

0    ali
2    ali
5    ali
dtype: object

In [44]:
s.is_unique

False

In [45]:
s.unique()

array(['ali', 'taha', 'sara', 'omid'], dtype=object)

In [46]:
s.nunique()

4

In [47]:
s.count()

6

In [48]:
s.values

array(['ali', 'taha', 'ali', 'sara', 'omid', 'ali'], dtype=object)

In [49]:
s.value_counts()

ali     3
taha    1
sara    1
omid    1
Name: count, dtype: int64

In [50]:
s.mode()

0    ali
dtype: object

In [51]:
# same index

In [52]:
s = pd.Series(data=[12, 8, 19, 17], index=['ali', 'ali', 'sara', 'omid']); s

ali     12
ali      8
sara    19
omid    17
dtype: int64

In [53]:
s.index.is_unique

False

In [54]:
s['ali']

ali    12
ali     8
dtype: int64

### serie with other types data

In [55]:
pd.Series(['A', 'B', 'C'])

0    A
1    B
2    C
dtype: object

In [56]:
pd.Series(('A', 'B', 'C'))

0    A
1    B
2    C
dtype: object

In [57]:
pd.Series(np.array(['A', 'B', 'C']))

0    A
1    B
2    C
dtype: object

In [58]:
#pd.Series({'A', 'B', 'C'})       TypeError: 'set' type is unordered

pd.Series(list({'A', 'B', 'C'}))

0    B
1    A
2    C
dtype: object

In [59]:
pd.Series({0: 'A', 1: 'B', 2: 'C'})

0    A
1    B
2    C
dtype: object

In [60]:
d = {'ali' : 12, 'taha' : 8, 'sara' : 19 , 'omid' : 17}
pd.Series(data=d)

ali     12
taha     8
sara    19
omid    17
dtype: int64

In [61]:
d = {'Iran': 'Tehran', 'Germany': 'Berlin', 'France': 'Paris'}
pd.Series(d)

Iran       Tehran
Germany    Berlin
France      Paris
dtype: object

In [62]:
#---------------

In [63]:
s = pd.Series(['A', 'B', 'C']); s

0    A
1    B
2    C
dtype: object

In [64]:
list(s)

['A', 'B', 'C']

In [65]:
tuple(s)

('A', 'B', 'C')

In [66]:
dict(s)

{0: 'A', 1: 'B', 2: 'C'}

In [67]:
#---------------

In [68]:
s = pd.Series([12, 8, 19, 17], ['ali', 'taha', 'sara', 'omid']); s

ali     12
taha     8
sara    19
omid    17
dtype: int64

In [69]:
list(s)

[12, 8, 19, 17]

In [70]:
tuple(s)

(12, 8, 19, 17)

In [71]:
dict(s)

{'ali': 12, 'taha': 8, 'sara': 19, 'omid': 17}

### MultiIndex

In [72]:
s = pd.Series([12, 8, 19, 17], [['b', 'b', 'g', 'g'], ['ali', 'taha', 'sara', 'negar']]); s

b  ali      12
   taha      8
g  sara     19
   negar    17
dtype: int64

In [73]:
s.unstack()

Unnamed: 0,ali,negar,sara,taha
b,12.0,,,8.0
g,,17.0,19.0,


In [74]:
s = pd.Series([12, 8, 19, 17], [['ali', 'taha', 'sara', 'negar'], ['b', 'b', 'g', 'g']]); s

ali    b    12
taha   b     8
sara   g    19
negar  g    17
dtype: int64

In [75]:
s.unstack()

Unnamed: 0,b,g
ali,12.0,
negar,,17.0
sara,,19.0
taha,8.0,


In [76]:
s.index

MultiIndex([(  'ali', 'b'),
            ( 'taha', 'b'),
            ( 'sara', 'g'),
            ('negar', 'g')],
           )

In [77]:
dict(s)

{('ali', 'b'): 12, ('taha', 'b'): 8, ('sara', 'g'): 19, ('negar', 'g'): 17}

### index & slice

In [78]:
s = pd.Series(data=[12, 8, 19, 17], index=['ali', 'taha', 'sara', 'omid']); s

ali     12
taha     8
sara    19
omid    17
dtype: int64

In [79]:
s['taha']

8

In [80]:
s['taha':'taha']

taha    8
dtype: int64

In [81]:
s['taha':'sara']

taha     8
sara    19
dtype: int64

In [82]:
s.loc['taha']

8

In [83]:
s.loc['taha':'taha']

taha    8
dtype: int64

In [84]:
s.loc['taha':'sara']

taha     8
sara    19
dtype: int64

In [85]:
#---------------

In [86]:
s

ali     12
taha     8
sara    19
omid    17
dtype: int64

In [87]:
s[1]

  s[1]


8

In [88]:
s[1:2]

taha    8
dtype: int64

In [89]:
s[1:3]

taha     8
sara    19
dtype: int64

In [90]:
s[:]

ali     12
taha     8
sara    19
omid    17
dtype: int64

In [91]:
s.iloc[1]

8

In [92]:
s.iloc[1:2]

taha    8
dtype: int64

In [93]:
s.iloc[1:3]

taha     8
sara    19
dtype: int64

In [94]:
s.iloc[:]

ali     12
taha     8
sara    19
omid    17
dtype: int64

In [95]:
#---------------

In [96]:
s = pd.Series(data=[12, 8, 19, 17]); s

0    12
1     8
2    19
3    17
dtype: int64

In [97]:
s[1]

8

In [98]:
s[1:3]

1     8
2    19
dtype: int64

In [99]:
s.iloc[1]

8

In [100]:
s.iloc[1:3]

1     8
2    19
dtype: int64

### change & append value

In [101]:
# change

In [102]:
s = pd.Series([7, 14, 19, 17], ['ali', 'taha', 'sara', 'taha']); s

ali      7
taha    14
sara    19
taha    17
dtype: int64

In [103]:
s['ali'] = 10
s

ali     10
taha    14
sara    19
taha    17
dtype: int64

In [104]:
s['taha'] = 1
s

ali     10
taha     1
sara    19
taha     1
dtype: int64

In [105]:
s.iloc[0] = 11
s

ali     11
taha     1
sara    19
taha     1
dtype: int64

In [106]:
s.iloc[1] = 0
s

ali     11
taha     0
sara    19
taha     1
dtype: int64

In [107]:
s.iloc[2:] = 20
s

ali     11
taha     0
sara    20
taha    20
dtype: int64

In [108]:
# append

In [109]:
s = pd.Series([7, 14, 19, 17], ['ali', 'taha', 'sara', 'omid']); s

ali      7
taha    14
sara    19
omid    17
dtype: int64

In [110]:
s['asma'] = 20
s

ali      7
taha    14
sara    19
omid    17
asma    20
dtype: int64

In [111]:
s = pd.Series([7, 14, 19, 17]); s

0     7
1    14
2    19
3    17
dtype: int64

In [112]:
s[4] = 10
s

0     7
1    14
2    19
3    17
4    10
dtype: int64

In [113]:
s[5] = 13
s

0     7
1    14
2    19
3    17
4    10
5    13
dtype: int64

# Functions

## manage index

### set_axis (labels)

In [114]:
s = pd.Series([12, 8, 19, 17], ['ali', 'taha', 'sara', 'omid']); s

ali     12
taha     8
sara    19
omid    17
dtype: int64

In [115]:
s.set_axis(['ali', 'ali', 'sara', 'omid'])

ali     12
ali      8
sara    19
omid    17
dtype: int64

In [116]:
s.set_axis(['mahdi', 'taha', 'sara', 'omid'])

mahdi    12
taha      8
sara     19
omid     17
dtype: int64

In [117]:
s.set_axis([1, 2, 3, 4])

1    12
2     8
3    19
4    17
dtype: int64

In [118]:
s.set_axis(range(len(s)))

0    12
1     8
2    19
3    17
dtype: int64

### reset_index (level, drop, inplace)

In [119]:
s = pd.Series([12, 8, 19, 17], ['ali', 'taha', 'sara', 'negar']); s

ali      12
taha      8
sara     19
negar    17
dtype: int64

In [120]:
s.reset_index()

Unnamed: 0,index,0
0,ali,12
1,taha,8
2,sara,19
3,negar,17


In [121]:
s.reset_index(drop=True)

0    12
1     8
2    19
3    17
dtype: int64

MultiIndex

In [122]:
s = pd.Series([12, 8, 19, 17], [['ali', 'taha', 'sara', 'negar'], ['b', 'b', 'g', 'g']]); s

ali    b    12
taha   b     8
sara   g    19
negar  g    17
dtype: int64

In [123]:
s.reset_index(drop=True)

0    12
1     8
2    19
3    17
dtype: int64

In [124]:
s.reset_index(level=[0, 1], drop=True)

0    12
1     8
2    19
3    17
dtype: int64

In [125]:
s.reset_index(level=0, drop=True)

b    12
b     8
g    19
g    17
dtype: int64

In [126]:
s.reset_index(level=1, drop=True)

ali      12
taha      8
sara     19
negar    17
dtype: int64

### reindex (index, method, fill_value, limit, level)

In [127]:
s = pd.Series([12, 8, 19], ['ali', 'taha', 'sara']); s

ali     12
taha     8
sara    19
dtype: int64

In [128]:
s.reindex(['sara', 'ali', 'taha'])

sara    19
ali     12
taha     8
dtype: int64

In [129]:
s.reindex(['sara', 'ali', 'sara'])

sara    19
ali     12
sara    19
dtype: int64

In [130]:
s.reindex(['sara', 'omid', 'taha'])

sara    19.0
omid     NaN
taha     8.0
dtype: float64

In [131]:
s.reindex(['sara', 'ali'])

sara    19
ali     12
dtype: int64

In [132]:
s.reindex(['ali', 'taha', 'mahsa', 'omid'])

ali      12.0
taha      8.0
mahsa     NaN
omid      NaN
dtype: float64

In [133]:
s.reindex(['ali', 'taha', 'mahsa', 'omid'], fill_value=0)

ali      12
taha      8
mahsa     0
omid      0
dtype: int64

**method & limit**

In [134]:
i1 = pd.date_range('1/1/2010', periods=8, freq='D')
i2 = pd.date_range('12/29/2009', periods=13, freq='D')
s = pd.Series(data=[100, 101, 95, 100, 89, NaN, 92, 88], index=i1)
display(i1, i2, s)

DatetimeIndex(['2010-01-01', '2010-01-02', '2010-01-03', '2010-01-04',
               '2010-01-05', '2010-01-06', '2010-01-07', '2010-01-08'],
              dtype='datetime64[ns]', freq='D')

DatetimeIndex(['2009-12-29', '2009-12-30', '2009-12-31', '2010-01-01',
               '2010-01-02', '2010-01-03', '2010-01-04', '2010-01-05',
               '2010-01-06', '2010-01-07', '2010-01-08', '2010-01-09',
               '2010-01-10'],
              dtype='datetime64[ns]', freq='D')

2010-01-01    100.0
2010-01-02    101.0
2010-01-03     95.0
2010-01-04    100.0
2010-01-05     89.0
2010-01-06      NaN
2010-01-07     92.0
2010-01-08     88.0
Freq: D, dtype: float64

In [135]:
s.pop('2010-01-04'); s

2010-01-01    100.0
2010-01-02    101.0
2010-01-03     95.0
2010-01-05     89.0
2010-01-06      NaN
2010-01-07     92.0
2010-01-08     88.0
dtype: float64

In [136]:
s.reindex(index=i2)

2009-12-29      NaN
2009-12-30      NaN
2009-12-31      NaN
2010-01-01    100.0
2010-01-02    101.0
2010-01-03     95.0
2010-01-04      NaN
2010-01-05     89.0
2010-01-06      NaN
2010-01-07     92.0
2010-01-08     88.0
2010-01-09      NaN
2010-01-10      NaN
Freq: D, dtype: float64

In [137]:
s.reindex(index=i2, fill_value=0)

2009-12-29      0.0
2009-12-30      0.0
2009-12-31      0.0
2010-01-01    100.0
2010-01-02    101.0
2010-01-03     95.0
2010-01-04      0.0
2010-01-05     89.0
2010-01-06      NaN
2010-01-07     92.0
2010-01-08     88.0
2010-01-09      0.0
2010-01-10      0.0
Freq: D, dtype: float64

In [138]:
s.reindex(index=i2, method='nearest')

2009-12-29    100.0
2009-12-30    100.0
2009-12-31    100.0
2010-01-01    100.0
2010-01-02    101.0
2010-01-03     95.0
2010-01-04     89.0
2010-01-05     89.0
2010-01-06      NaN
2010-01-07     92.0
2010-01-08     88.0
2010-01-09     88.0
2010-01-10     88.0
Freq: D, dtype: float64

In [139]:
s.reindex(index=i2, method='ffill')

2009-12-29      NaN
2009-12-30      NaN
2009-12-31      NaN
2010-01-01    100.0
2010-01-02    101.0
2010-01-03     95.0
2010-01-04     95.0
2010-01-05     89.0
2010-01-06      NaN
2010-01-07     92.0
2010-01-08     88.0
2010-01-09     88.0
2010-01-10     88.0
Freq: D, dtype: float64

In [140]:
s.reindex(index=i2, method='bfill')

2009-12-29    100.0
2009-12-30    100.0
2009-12-31    100.0
2010-01-01    100.0
2010-01-02    101.0
2010-01-03     95.0
2010-01-04     89.0
2010-01-05     89.0
2010-01-06      NaN
2010-01-07     92.0
2010-01-08     88.0
2010-01-09      NaN
2010-01-10      NaN
Freq: D, dtype: float64

In [141]:
s.reindex(index=i2, method='bfill', limit=1)

2009-12-29      NaN
2009-12-30      NaN
2009-12-31    100.0
2010-01-01    100.0
2010-01-02    101.0
2010-01-03     95.0
2010-01-04     89.0
2010-01-05     89.0
2010-01-06      NaN
2010-01-07     92.0
2010-01-08     88.0
2010-01-09      NaN
2010-01-10      NaN
Freq: D, dtype: float64

In [142]:
s.reindex(index=i2, method='bfill', limit=2)

2009-12-29      NaN
2009-12-30    100.0
2009-12-31    100.0
2010-01-01    100.0
2010-01-02    101.0
2010-01-03     95.0
2010-01-04     89.0
2010-01-05     89.0
2010-01-06      NaN
2010-01-07     92.0
2010-01-08     88.0
2010-01-09      NaN
2010-01-10      NaN
Freq: D, dtype: float64

In [143]:
s = s.reindex(index=i2)
s.bfill(limit=2)

2009-12-29      NaN
2009-12-30    100.0
2009-12-31    100.0
2010-01-01    100.0
2010-01-02    101.0
2010-01-03     95.0
2010-01-04     89.0
2010-01-05     89.0
2010-01-06     92.0
2010-01-07     92.0
2010-01-08     88.0
2010-01-09      NaN
2010-01-10      NaN
Freq: D, dtype: float64

### take (indices)

In [144]:
s = pd.Series(range(10, 51, 10)); s

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [145]:
s.reindex([2, 4, 0, 1, 3])

2    30
4    50
0    10
1    20
3    40
dtype: int64

In [146]:
s.take([4, 3, 2, 1, 0])

4    50
3    40
2    30
1    20
0    10
dtype: int64

In [147]:
s.take([2, 4, 0, 1, 3])

2    30
4    50
0    10
1    20
3    40
dtype: int64

In [148]:
i = np.random.permutation(5); i

array([0, 3, 4, 2, 1])

In [149]:
s.take(i)

0    10
3    40
4    50
2    30
1    20
dtype: int64

In [150]:
#---------------

In [151]:
s = pd.Series(range(10, 51, 10), ['a', 'b', 'c', 'd', 'e']); s

a    10
b    20
c    30
d    40
e    50
dtype: int64

In [152]:
s.reindex(['e', 'd', 'c', 'b', 'a'])

e    50
d    40
c    30
b    20
a    10
dtype: int64

In [153]:
s.take([4, 3, 2, 1, 0])

e    50
d    40
c    30
b    20
a    10
dtype: int64

In [154]:
s.take([2, 4, 0, 1, 3])

c    30
e    50
a    10
b    20
d    40
dtype: int64

In [155]:
i = np.random.permutation(5); i

array([2, 4, 0, 3, 1])

In [156]:
s.take(i)

c    30
e    50
a    10
d    40
b    20
dtype: int64

### rename (index, inplace, level)

In [157]:
s = pd.Series([12, 8, 19], ['ali', 'taha', 'sara']); s

ali     12
taha     8
sara    19
dtype: int64

In [158]:
s.rename({'taha': "x"})

ali     12
x        8
sara    19
dtype: int64

In [159]:
s.rename({'ali': "x", 'taha': "y", 'sara': "z"})

x    12
y     8
z    19
dtype: int64

In [160]:
s.rename(str.upper)

ALI     12
TAHA     8
SARA    19
dtype: int64

In [161]:
l = lambda x: '|' + x + '|'
s.rename(l)

|ali|     12
|taha|     8
|sara|    19
dtype: int64

MultiIndex

In [162]:
s = pd.Series([12, 8, 19], [['ali', 'taha', 'sara'], ['x', 'y', 'z']]); s

ali   x    12
taha  y     8
sara  z    19
dtype: int64

In [163]:
s.rename({'ali': 'a'})

a     x    12
taha  y     8
sara  z    19
dtype: int64

In [164]:
s.rename({'x': '.'})

ali   .    12
taha  y     8
sara  z    19
dtype: int64

In [165]:
s.rename({'ali': 'a', 'x': '.'})

a     .    12
taha  y     8
sara  z    19
dtype: int64

In [166]:
l = lambda x: '|' + x + '|'
s.rename(l)

|ali|   |x|    12
|taha|  |y|     8
|sara|  |z|    19
dtype: int64

In [167]:
l = lambda x: '|' + x + '|'
s.rename(l, level=0)

|ali|   x    12
|taha|  y     8
|sara|  z    19
dtype: int64

In [168]:
l = lambda x: '|' + x + '|'
s.rename(l, level=1)

ali   |x|    12
taha  |y|     8
sara  |z|    19
dtype: int64

### rename_axis (index, inplace)

In [169]:
s = pd.Series([12, 8, 19], ['ali', 'taha', 'sara']); s

ali     12
taha     8
sara    19
dtype: int64

In [170]:
s.rename_axis(index='name')

name
ali     12
taha     8
sara    19
dtype: int64

In [171]:
s.rename_axis('name', inplace=True); s

name
ali     12
taha     8
sara    19
dtype: int64

In [172]:
s.rename_axis(index=str.upper)

NAME
ali     12
taha     8
sara    19
dtype: int64

MultiIndex

In [173]:
s = pd.Series([12, 8, 19], [['ali', 'taha', 'sara'], ['x', 'y', 'z']]); s

ali   x    12
taha  y     8
sara  z    19
dtype: int64

In [174]:
s.rename_axis(['name', 'num'], inplace=True); s

name  num
ali   x      12
taha  y       8
sara  z      19
dtype: int64

In [175]:
s.rename_axis(index={'name': 'esm'})

esm   num
ali   x      12
taha  y       8
sara  z      19
dtype: int64

In [176]:
s.rename_axis(index=str.upper)

NAME  NUM
ali   x      12
taha  y       8
sara  z      19
dtype: int64

### sort_index (level, ascending, inplace, na_position, sort_remaining, ignore_index, key)

In [177]:
s = pd.Series([12, 7, 9, 4], index=[None, 'a', 'c', 'b']); s

None    12
a        7
c        9
b        4
dtype: int64

In [178]:
s.sort_index()

a        7
b        4
c        9
None    12
dtype: int64

In [179]:
s.sort_index(ascending=False)

c        9
b        4
a        7
None    12
dtype: int64

In [180]:
s.sort_index()

a        7
b        4
c        9
None    12
dtype: int64

In [181]:
s.sort_index(na_position='first')

None    12
a        7
b        4
c        9
dtype: int64

In [182]:
s.sort_index(ascending=False, na_position='first')

None    12
c        9
b        4
a        7
dtype: int64

In [183]:
s.sort_index()

a        7
b        4
c        9
None    12
dtype: int64

In [184]:
s.sort_index(ignore_index=True)

0     7
1     4
2     9
3    12
dtype: int64

key

In [185]:
s = pd.Series([1, 2, 3, 4], ['A', 'b', 'C', 'd']); s

A    1
b    2
C    3
d    4
dtype: int64

In [186]:
s.sort_index()

A    1
C    3
b    2
d    4
dtype: int64

In [187]:
s.sort_index(key=lambda x: x.str.lower())

A    1
b    2
C    3
d    4
dtype: int64

level & sort_remaining

In [188]:
d = {('d', 'two'): 1,
     ('d', 'one'): 2,
     ('c', 'two'): 3,
     ('c', 'one'): 4,
     ('b', 'two'): 5,
     ('b', 'one'): 8,
     ('a', 'two'): 7,
     ('a', 'one'): 6}

In [189]:
s = pd.Series(d); s

d  two    1
   one    2
c  two    3
   one    4
b  two    5
   one    8
a  two    7
   one    6
dtype: int64

In [190]:
s.sort_index(level=1)

a  one    6
b  one    8
c  one    4
d  one    2
a  two    7
b  two    5
c  two    3
d  two    1
dtype: int64

In [191]:
s.sort_index(level=1, sort_remaining=False)

d  one    2
c  one    4
b  one    8
a  one    6
d  two    1
c  two    3
b  two    5
a  two    7
dtype: int64

In [192]:
s.sort_index(level=0)

a  one    6
   two    7
b  one    8
   two    5
c  one    4
   two    3
d  one    2
   two    1
dtype: int64

In [193]:
s.sort_index(level=0, sort_remaining=False)

a  two    7
   one    6
b  two    5
   one    8
c  two    3
   one    4
d  two    1
   one    2
dtype: int64

### add_...fix (...fix)

In [194]:
s = pd.Series([10, 20, 30, 40]); s

0    10
1    20
2    30
3    40
dtype: int64

In [195]:
s.add_prefix('item_')

item_0    10
item_1    20
item_2    30
item_3    40
dtype: int64

In [196]:
s.add_suffix('_item')

0_item    10
1_item    20
2_item    30
3_item    40
dtype: int64

### delete

drop (labels)

In [197]:
s = pd.Series([12, 8, 19, 17], index=['ali', 'taha', 'sara', 'omid']); s

ali     12
taha     8
sara    19
omid    17
dtype: int64

In [198]:
s.drop('omid')

ali     12
taha     8
sara    19
dtype: int64

In [199]:
s

ali     12
taha     8
sara    19
omid    17
dtype: int64

In [200]:
s.drop(['ali','taha'])

sara    19
omid    17
dtype: int64

In [201]:
s

ali     12
taha     8
sara    19
omid    17
dtype: int64

pop (item)

In [202]:
s = pd.Series([12, 8, 19, 17], index=['ali', 'taha', 'sara', 'omid']); s

ali     12
taha     8
sara    19
omid    17
dtype: int64

In [203]:
s.pop('ali')

12

In [204]:
s

taha     8
sara    19
omid    17
dtype: int64

## manage value

### NaN

is & not ()

In [205]:
s = pd.Series([12, NaN, 5, NaN, 7, 9, NaN], index=['a', 'b', 'c', 'd', 'e', NaN, 'f']); s

a      12.0
b       NaN
c       5.0
d       NaN
e       7.0
NaN     9.0
f       NaN
dtype: float64

In [206]:
s.isna()

a      False
b       True
c      False
d       True
e      False
NaN    False
f       True
dtype: bool

In [207]:
s.isnull()

a      False
b       True
c      False
d       True
e      False
NaN    False
f       True
dtype: bool

In [208]:
s.notna()

a       True
b      False
c       True
d      False
e       True
NaN     True
f      False
dtype: bool

In [209]:
s.notnull()

a       True
b      False
c       True
d      False
e       True
NaN     True
f      False
dtype: bool

size & count()

In [210]:
len(s)

7

In [211]:
s.size

7

In [212]:
s.count()

4

In [213]:
s[s.notna()]

a      12.0
c       5.0
e       7.0
NaN     9.0
dtype: float64

dropna (inplace, ignore_index)

In [214]:
s

a      12.0
b       NaN
c       5.0
d       NaN
e       7.0
NaN     9.0
f       NaN
dtype: float64

In [215]:
s.dropna()

a      12.0
c       5.0
e       7.0
NaN     9.0
dtype: float64

In [216]:
s.dropna(ignore_index=True)

0    12.0
1     5.0
2     7.0
3     9.0
dtype: float64

In [217]:
#---------------

In [218]:
s = pd.Series([15, NaN, 9, 18, NaN, 20]); s

0    15.0
1     NaN
2     9.0
3    18.0
4     NaN
5    20.0
dtype: float64

In [219]:
s.dropna()

0    15.0
2     9.0
3    18.0
5    20.0
dtype: float64

In [220]:
s.dropna(ignore_index=True)

0    15.0
1     9.0
2    18.0
3    20.0
dtype: float64

In [221]:
s

0    15.0
1     NaN
2     9.0
3    18.0
4     NaN
5    20.0
dtype: float64

In [222]:
s.dropna(inplace=True, ignore_index=True); s

0    15.0
1     9.0
2    18.0
3    20.0
dtype: float64

fillna (value, inplace, limit)

In [223]:
s = pd.Series([12, NaN, 5, NaN, 7, NaN], index=['a', 'b', 'c', 'd', 'e', 'f']); s

a    12.0
b     NaN
c     5.0
d     NaN
e     7.0
f     NaN
dtype: float64

In [224]:
s.fillna(0)

a    12.0
b     0.0
c     5.0
d     0.0
e     7.0
f     0.0
dtype: float64

In [225]:
# limit: maximam tedade maghadire motavalie NaN baraye por kardan be jelo/aghab.

In [226]:
s.fillna('o', limit=1)

a    12.0
b       o
c     5.0
d     NaN
e     7.0
f     NaN
dtype: object

In [227]:
s.fillna('o', limit=2)

a    12.0
b       o
c     5.0
d       o
e     7.0
f     NaN
dtype: object

In [228]:
s.fillna('o', limit=3)

a    12.0
b       o
c     5.0
d       o
e     7.0
f       o
dtype: object

In [229]:
#---------------

In [230]:
s

a    12.0
b     NaN
c     5.0
d     NaN
e     7.0
f     NaN
dtype: float64

In [231]:
s.mean()

8.0

In [232]:
s.fillna(s.mean())

a    12.0
b     8.0
c     5.0
d     8.0
e     7.0
f     8.0
dtype: float64

### ffill & bfill (inplace, limit)

In [233]:
s = pd.Series([NaN, 12, NaN, NaN, 5, NaN, 7, NaN], ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']); s

a     NaN
b    12.0
c     NaN
d     NaN
e     5.0
f     NaN
g     7.0
h     NaN
dtype: float64

In [234]:
s.ffill()

a     NaN
b    12.0
c    12.0
d    12.0
e     5.0
f     5.0
g     7.0
h     7.0
dtype: float64

In [235]:
s

a     NaN
b    12.0
c     NaN
d     NaN
e     5.0
f     NaN
g     7.0
h     NaN
dtype: float64

In [236]:
s.bfill()

a    12.0
b    12.0
c     5.0
d     5.0
e     5.0
f     7.0
g     7.0
h     NaN
dtype: float64

In [237]:
# limit

In [238]:
s

a     NaN
b    12.0
c     NaN
d     NaN
e     5.0
f     NaN
g     7.0
h     NaN
dtype: float64

In [239]:
s.ffill(limit=1)

a     NaN
b    12.0
c    12.0
d     NaN
e     5.0
f     5.0
g     7.0
h     7.0
dtype: float64

In [240]:
s.ffill(limit=2)

a     NaN
b    12.0
c    12.0
d    12.0
e     5.0
f     5.0
g     7.0
h     7.0
dtype: float64

In [241]:
s

a     NaN
b    12.0
c     NaN
d     NaN
e     5.0
f     NaN
g     7.0
h     NaN
dtype: float64

In [242]:
s.bfill(limit=1)

a    12.0
b    12.0
c     NaN
d     5.0
e     5.0
f     7.0
g     7.0
h     NaN
dtype: float64

In [243]:
s.bfill(limit=2)

a    12.0
b    12.0
c     5.0
d     5.0
e     5.0
f     7.0
g     7.0
h     NaN
dtype: float64

### replace (to_replace, value, inplace)

In [244]:
s = pd.Series([NaN, 15, 9, NaN, 18, 7, 20, NaN]); s

0     NaN
1    15.0
2     9.0
3     NaN
4    18.0
5     7.0
6    20.0
7     NaN
dtype: float64

In [245]:
s.replace(9, 10)

0     NaN
1    15.0
2    10.0
3     NaN
4    18.0
5     7.0
6    20.0
7     NaN
dtype: float64

In [246]:
s.replace({9: 10})

0     NaN
1    15.0
2    10.0
3     NaN
4    18.0
5     7.0
6    20.0
7     NaN
dtype: float64

In [247]:
s.replace(NaN, 0)

0     0.0
1    15.0
2     9.0
3     0.0
4    18.0
5     7.0
6    20.0
7     0.0
dtype: float64

In [248]:
s.replace([9, NaN], 0)

0     0.0
1    15.0
2     0.0
3     0.0
4    18.0
5     7.0
6    20.0
7     0.0
dtype: float64

In [249]:
s.replace([9, NaN], [10, 0])

0     0.0
1    15.0
2    10.0
3     0.0
4    18.0
5     7.0
6    20.0
7     0.0
dtype: float64

In [250]:
s.replace({9: 10, NaN: 0})

0     0.0
1    15.0
2    10.0
3     0.0
4    18.0
5     7.0
6    20.0
7     0.0
dtype: float64

### duplicate 

2 same value

In [251]:
s = pd.Series(['a','b','a','c','d','a','c']); s

0    a
1    b
2    a
3    c
4    d
5    a
6    c
dtype: object

In [252]:
s.unique()

array(['a', 'b', 'c', 'd'], dtype=object)

In [253]:
s.nunique()

4

In [254]:
s.values

array(['a', 'b', 'a', 'c', 'd', 'a', 'c'], dtype=object)

In [255]:
s.value_counts()

a    3
c    2
b    1
d    1
Name: count, dtype: int64

In [256]:
# duplicated(keep)

In [257]:
s.duplicated()

0    False
1    False
2     True
3    False
4    False
5     True
6     True
dtype: bool

In [258]:
s[s.duplicated()]

2    a
5    a
6    c
dtype: object

In [259]:
s[~s.duplicated()]

0    a
1    b
3    c
4    d
dtype: object

In [260]:
s.where(s.duplicated())

0    NaN
1    NaN
2      a
3    NaN
4    NaN
5      a
6      c
dtype: object

In [261]:
s.duplicated(keep='last')

0     True
1    False
2     True
3     True
4    False
5    False
6    False
dtype: bool

In [262]:
s.where(s.duplicated(keep='last'))

0      a
1    NaN
2      a
3      c
4    NaN
5    NaN
6    NaN
dtype: object

In [263]:
s[~s.duplicated(keep='last')]

1    b
4    d
5    a
6    c
dtype: object

In [264]:
# drop_duplicates(keep, inplace, ignore_index)

In [265]:
s

0    a
1    b
2    a
3    c
4    d
5    a
6    c
dtype: object

In [266]:
s.drop_duplicates()

0    a
1    b
3    c
4    d
dtype: object

In [267]:
s.drop_duplicates(ignore_index=True)

0    a
1    b
2    c
3    d
dtype: object

In [268]:
s

0    a
1    b
2    a
3    c
4    d
5    a
6    c
dtype: object

In [269]:
s.drop_duplicates(keep='last')

1    b
4    d
5    a
6    c
dtype: object

In [270]:
s.drop_duplicates(keep='last', ignore_index=True)

0    b
1    d
2    a
3    c
dtype: object

..........

In [271]:
s = pd.Series([12, 4, 12, 20, 7, 12, 7, 5, 4]); s

0    12
1     4
2    12
3    20
4     7
5    12
6     7
7     5
8     4
dtype: int64

In [272]:
s.unique()

array([12,  4, 20,  7,  5], dtype=int64)

In [273]:
s.value_counts()

12    3
4     2
7     2
20    1
5     1
Name: count, dtype: int64

In [274]:
s.duplicated()

0    False
1    False
2     True
3    False
4    False
5     True
6     True
7    False
8     True
dtype: bool

In [275]:
s.drop_duplicates()

0    12
1     4
3    20
4     7
7     5
dtype: int64

..........

In [276]:
s = pd.Series([12, 4, 12, 20, 7, 12, 7, 5], ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']); s

a    12
b     4
c    12
d    20
e     7
f    12
g     7
h     5
dtype: int64

In [277]:
s.unique()

array([12,  4, 20,  7,  5], dtype=int64)

In [278]:
s.values

array([12,  4, 12, 20,  7, 12,  7,  5], dtype=int64)

In [279]:
s.value_counts()

12    3
7     2
4     1
20    1
5     1
Name: count, dtype: int64

In [280]:
s.duplicated()

a    False
b    False
c     True
d    False
e    False
f     True
g     True
h    False
dtype: bool

In [281]:
s.duplicated(keep='last')

a     True
b    False
c     True
d    False
e     True
f    False
g    False
h    False
dtype: bool

In [282]:
s.drop_duplicates()

a    12
b     4
d    20
e     7
h     5
dtype: int64

In [283]:
s.drop_duplicates(keep='last')

b     4
d    20
f    12
g     7
h     5
dtype: int64

In [284]:
s.drop_duplicates(ignore_index=True)

0    12
1     4
2    20
3     7
4     5
dtype: int64

### value_counts (normalize, sort, ascending, bins, dropna)

In [285]:
s = pd.Series([12, 14, 18, 11, 14, 20, 19, None, 14, 19, 10]); s

0     12.0
1     14.0
2     18.0
3     11.0
4     14.0
5     20.0
6     19.0
7      NaN
8     14.0
9     19.0
10    10.0
dtype: float64

In [286]:
s.value_counts()

14.0    3
19.0    2
12.0    1
18.0    1
11.0    1
20.0    1
10.0    1
Name: count, dtype: int64

In [287]:
s.value_counts(sort=False)

12.0    1
14.0    3
18.0    1
11.0    1
20.0    1
19.0    2
10.0    1
Name: count, dtype: int64

In [288]:
s.value_counts(ascending=True)

12.0    1
18.0    1
11.0    1
20.0    1
10.0    1
19.0    2
14.0    3
Name: count, dtype: int64

In [289]:
s.value_counts(normalize=True)

14.0    0.3
19.0    0.2
12.0    0.1
18.0    0.1
11.0    0.1
20.0    0.1
10.0    0.1
Name: proportion, dtype: float64

In [290]:
s.value_counts(normalize=True).mul(100)

14.0    30.0
19.0    20.0
12.0    10.0
18.0    10.0
11.0    10.0
20.0    10.0
10.0    10.0
Name: proportion, dtype: float64

In [291]:
s.value_counts(dropna=False)

14.0    3
19.0    2
12.0    1
18.0    1
11.0    1
20.0    1
NaN     1
10.0    1
Name: count, dtype: int64

In [292]:
s.value_counts(bins=3)

(16.667, 20.0]      4
(9.989, 13.333]     3
(13.333, 16.667]    3
Name: count, dtype: int64

In [293]:
s.value_counts(bins=3).sort_index()

(9.989, 13.333]     3
(13.333, 16.667]    3
(16.667, 20.0]      4
Name: count, dtype: int64

In [294]:
s.value_counts(bins=[5, 10, 15, 20]).sort_index()

(4.999, 10.0]    1
(10.0, 15.0]     5
(15.0, 20.0]     4
Name: count, dtype: int64

In [295]:
#..........

In [296]:
pd.cut(s, 3)

0       (9.99, 13.333]
1     (13.333, 16.667]
2       (16.667, 20.0]
3       (9.99, 13.333]
4     (13.333, 16.667]
5       (16.667, 20.0]
6       (16.667, 20.0]
7                  NaN
8     (13.333, 16.667]
9       (16.667, 20.0]
10      (9.99, 13.333]
dtype: category
Categories (3, interval[float64, right]): [(9.99, 13.333] < (13.333, 16.667] < (16.667, 20.0]]

In [297]:
pd.cut(s, 3).value_counts()

(16.667, 20.0]      4
(9.99, 13.333]      3
(13.333, 16.667]    3
Name: count, dtype: int64

In [298]:
#..........

In [299]:
c = pd.cut(s.values, 3)
c

[(9.99, 13.333], (13.333, 16.667], (16.667, 20.0], (9.99, 13.333], (13.333, 16.667], ..., (16.667, 20.0], NaN, (13.333, 16.667], (16.667, 20.0], (9.99, 13.333]]
Length: 11
Categories (3, interval[float64, right]): [(9.99, 13.333] < (13.333, 16.667] < (16.667, 20.0]]

In [300]:
c.codes

array([ 0,  1,  2,  0,  1,  2,  2, -1,  1,  2,  0], dtype=int8)

In [301]:
c.categories

IntervalIndex([(9.99, 13.333], (13.333, 16.667], (16.667, 20.0]], dtype='interval[float64, right]')

In [302]:
c.value_counts()

(9.99, 13.333]      3
(13.333, 16.667]    3
(16.667, 20.0]      4
Name: count, dtype: int64

### sort_values (ascending, inplace, na_position, ignore_index, key)

In [303]:
s = pd.Series([12, 4, None, 7], ['b', 'a', 'd', 'c']); s

b    12.0
a     4.0
d     NaN
c     7.0
dtype: float64

In [304]:
sorted(s)

[4.0, 7.0, 12.0, nan]

In [305]:
s.sort_values()

a     4.0
c     7.0
b    12.0
d     NaN
dtype: float64

In [306]:
s.sort_values(ascending=False)

b    12.0
c     7.0
a     4.0
d     NaN
dtype: float64

In [307]:
s.sort_values(ignore_index=True)

0     4.0
1     7.0
2    12.0
3     NaN
dtype: float64

In [308]:
s.sort_values()

a     4.0
c     7.0
b    12.0
d     NaN
dtype: float64

In [309]:
s.sort_values(na_position='first')

d     NaN
a     4.0
c     7.0
b    12.0
dtype: float64

In [310]:
s.sort_values(ascending=False, na_position='first')

d     NaN
b    12.0
c     7.0
a     4.0
dtype: float64

key

In [311]:
s = pd.Series(['a', 'B', 'c', 'D', 'e']); s

0    a
1    B
2    c
3    D
4    e
dtype: object

In [312]:
s.sort_values()

1    B
3    D
0    a
2    c
4    e
dtype: object

In [313]:
s.sort_values(key=lambda x: x.str.lower())

0    a
1    B
2    c
3    D
4    e
dtype: object

In [314]:
#---------------

In [315]:
s = pd.Series([4, -3, 0, 2, -4]); s

0    4
1   -3
2    0
3    2
4   -4
dtype: int64

In [316]:
s.sort_values()

4   -4
1   -3
2    0
3    2
0    4
dtype: int64

In [317]:
s.sort_values(key=np.abs)

2    0
3    2
1   -3
0    4
4   -4
dtype: int64

rank: rank of values in s.sort_values()

In [318]:
s = pd.Series([12, 4, None, 7], ['b', 'a', 'd', 'c']); s

b    12.0
a     4.0
d     NaN
c     7.0
dtype: float64

In [319]:
s.sort_values()

a     4.0
c     7.0
b    12.0
d     NaN
dtype: float64

In [320]:
s.rank()        

b    3.0
a    1.0
d    NaN
c    2.0
dtype: float64

### nlargest (n, keep)

In [321]:
data = {'a':6, 'b':3, 'c':8, 'd':5, 'e':9, 'f':3, 'g':5, 'h':4, 'i':5}
s = pd.Series(data); s

a    6
b    3
c    8
d    5
e    9
f    3
g    5
h    4
i    5
dtype: int64

In [322]:
s.nlargest()

e    9
c    8
a    6
d    5
g    5
dtype: int64

In [323]:
s.nlargest(1)

e    9
dtype: int64

In [324]:
s.nlargest(4)

e    9
c    8
a    6
d    5
dtype: int64

In [325]:
s.nlargest(4, keep='all')

e    9
c    8
a    6
d    5
g    5
i    5
dtype: int64

In [326]:
s.nlargest(4, keep='last')

e    9
c    8
a    6
i    5
dtype: int64

In [327]:
s.nsmallest()

b    3
f    3
h    4
d    5
g    5
dtype: int64

In [328]:
#---------------

In [329]:
s = pd.Series([15, 9, 18, 20, 12, 5, 25, 17, 10, 15]); s

0    15
1     9
2    18
3    20
4    12
5     5
6    25
7    17
8    10
9    15
dtype: int64

In [330]:
s.nlargest()

6    25
3    20
2    18
7    17
0    15
dtype: int64

In [331]:
s.nsmallest()

5     5
1     9
8    10
4    12
0    15
dtype: int64

### astype (dtype)

In [332]:
s = pd.Series([0, 1, 3, -5]); s

0    0
1    1
2    3
3   -5
dtype: int64

In [333]:
s.astype(bool)

0    False
1     True
2     True
3     True
dtype: bool

In [334]:
#---------------

In [335]:
s = pd.Series([False,  True,  True,  True]); s

0    False
1     True
2     True
3     True
dtype: bool

In [336]:
s.astype(int)

0    0
1    1
2    1
3    1
dtype: int32

In [337]:
#---------------

In [338]:
n = ['ali', 'ali', 'ali', 'ali', 'sara', 'sara', 'sara', 'taha', 'taha']
s = pd.Series(n); s

0     ali
1     ali
2     ali
3     ali
4    sara
5    sara
6    sara
7    taha
8    taha
dtype: object

In [339]:
c = s.astype('category'); c

0     ali
1     ali
2     ali
3     ali
4    sara
5    sara
6    sara
7    taha
8    taha
dtype: category
Categories (3, object): ['ali', 'sara', 'taha']

In [340]:
c.value_counts()

ali     4
sara    3
taha    2
Name: count, dtype: int64

In [341]:
c.values.categories

Index(['ali', 'sara', 'taha'], dtype='object')

In [342]:
c.values.codes

array([0, 0, 0, 0, 1, 1, 1, 2, 2], dtype=int8)

In [343]:
c.isin(['sara'])

0    False
1    False
2    False
3    False
4     True
5     True
6     True
7    False
8    False
dtype: bool

In [344]:
c[c.isin(['sara'])]

4    sara
5    sara
6    sara
dtype: category
Categories (3, object): ['ali', 'sara', 'taha']

In [345]:
a = c[c.isin(['sara'])]; a

4    sara
5    sara
6    sara
dtype: category
Categories (3, object): ['ali', 'sara', 'taha']

In [346]:
a.cat.remove_unused_categories()

4    sara
5    sara
6    sara
dtype: category
Categories (1, object): ['sara']

## miscellaneous

### copy ()

In [347]:
s = pd.Series([12, 4, 5], index=['a', 'b', 'c']); s

a    12
b     4
c     5
dtype: int64

In [348]:
sc = s.copy()

In [349]:
s['b'] = 89
s

a    12
b    89
c     5
dtype: int64

In [350]:
sc

a    12
b     4
c     5
dtype: int64

### squeeze ()

In [351]:
s = pd.Series(data=[12, 8, 19, 17], index=['ali', 'ali', 'sara', 'omid']); s

ali     12
ali      8
sara    19
omid    17
dtype: int64

In [352]:
s.squeeze()

ali     12
ali      8
sara    19
omid    17
dtype: int64

In [353]:
s['sara']

19

In [354]:
s['sara':'sara']

sara    19
dtype: int64

In [355]:
s['sara':'sara'].squeeze()

19

In [356]:
#---------------

In [357]:
s = pd.Series([12], index=['ali']); s

ali    12
dtype: int64

In [358]:
s.squeeze()

12

### head & tail (n=5)

In [359]:
s = pd.Series(range(10, 101, 10)); s

0     10
1     20
2     30
3     40
4     50
5     60
6     70
7     80
8     90
9    100
dtype: int64

In [360]:
# head: Return the first n rows.

In [361]:
s.head()

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [362]:
s.head(3)

0    10
1    20
2    30
dtype: int64

In [363]:
s.head(7)

0    10
1    20
2    30
3    40
4    50
5    60
6    70
dtype: int64

In [364]:
# tail: Return the last n rows.

In [365]:
s.tail()

5     60
6     70
7     80
8     90
9    100
dtype: int64

In [366]:
s.tail(2)

8     90
9    100
dtype: int64

In [367]:
s.tail(6)

4     50
5     60
6     70
7     80
8     90
9    100
dtype: int64

### sample(n, frac, replace, random_state, ignore_index)

In [368]:
s = pd.Series(range(10, 101, 10)); s

0     10
1     20
2     30
3     40
4     50
5     60
6     70
7     80
8     90
9    100
dtype: int64

In [369]:
s.sample()

5    60
dtype: int64

In [370]:
s.sample(3)

7     80
9    100
3     40
dtype: int64

In [371]:
s.sample(n=3)

0    10
2    30
8    90
dtype: int64

In [372]:
s.sample(n=7)

4     50
6     70
3     40
5     60
8     90
9    100
7     80
dtype: int64

frac

In [373]:
s.sample(frac=0.4)

8    90
7    80
2    30
1    20
dtype: int64

In [374]:
s.sample(frac=4/10)

3    40
2    30
8    90
0    10
dtype: int64

replace

In [375]:
#s.sample(n=12)      # ValueError: Cannot take a larger sample than population when 'replace=False'

In [376]:
s.sample(n=12, replace=True)

8    90
3    40
6    70
5    60
3    40
3    40
2    30
7    80
8    90
0    10
6    70
8    90
dtype: int64

In [377]:
s.sample(frac=1.5, replace=True)

6     70
2     30
3     40
3     40
5     60
8     90
2     30
8     90
0     10
5     60
6     70
9    100
5     60
9    100
9    100
dtype: int64

In [378]:
#---------------

In [379]:
s.sample(n=3)                 # entekhabe 3 nemoone bedoone jaygozari.

0    10
5    60
3    40
dtype: int64

In [380]:
s.sample(n=3, replace=True)   # entekhabe 3 nemoone ba jaygozari (ehtemale entekhabe nemoone tekrari).

8    90
2    30
1    20
dtype: int64

random_state

In [381]:
# dar chand bar run kardane in selloole, har bar natayeje motafaveti migirim:

s.sample(n=3)                      

6    70
0    10
1    20
dtype: int64

In [382]:
# ba dastoore random_state, ba har bar run kardane sellool, haman natije bare avval hasel mishavad:

s.sample(n=3, random_state=1)   

2     30
9    100
6     70
dtype: int64

### where ()

In [383]:
s = pd.Series([12, 4, 5, np.NaN, 7, 2], ['a', 'b', 'c', 'a', 'e', 'b']); s

a    12.0
b     4.0
c     5.0
a     NaN
e     7.0
b     2.0
dtype: float64

In [384]:
s >= 5

a     True
b    False
c     True
a    False
e     True
b    False
dtype: bool

In [385]:
s[s >= 5]

a    12.0
c     5.0
e     7.0
dtype: float64

In [386]:
s.where(s >= 5)

a    12.0
b     NaN
c     5.0
a     NaN
e     7.0
b     NaN
dtype: float64

In [387]:
#---------------

In [388]:
s.lt(7)

a    False
b     True
c     True
a    False
e    False
b     True
dtype: bool

In [389]:
s[s.lt(7)]

b    4.0
c    5.0
b    2.0
dtype: float64

In [390]:
s.where(s.lt(7))

a    NaN
b    4.0
c    5.0
a    NaN
e    NaN
b    2.0
dtype: float64

In [391]:
#---------------

In [392]:
s.between(5, 20)

a     True
b    False
c     True
a    False
e     True
b    False
dtype: bool

In [393]:
s[s.between(5, 20)]

a    12.0
c     5.0
e     7.0
dtype: float64

In [394]:
s.where(s.between(5, 20))

a    12.0
b     NaN
c     5.0
a     NaN
e     7.0
b     NaN
dtype: float64

### to_...()

to_list

In [395]:
s = pd.Series([15, 90, 18, 20]); s

0    15
1    90
2    18
3    20
dtype: int64

In [396]:
s.to_list()

[15, 90, 18, 20]

In [397]:
s.tolist()

[15, 90, 18, 20]

In [398]:
list(s)

[15, 90, 18, 20]

to_dict

In [399]:
s = pd.Series([15, 9, 18, 20]); s

0    15
1     9
2    18
3    20
dtype: int64

In [400]:
s.to_dict()

{0: 15, 1: 9, 2: 18, 3: 20}

In [401]:
dict(s)

{0: 15, 1: 9, 2: 18, 3: 20}

In [402]:
d = {0: 15, 1: 9, 2: 18, 3: 20}
pd.Series(d)

0    15
1     9
2    18
3    20
dtype: int64

In [403]:
#---------------

In [404]:
s = pd.Series([15, 9, 18, 20], ['a', 'b', 'c', 'c']); s

a    15
b     9
c    18
c    20
dtype: int64

In [405]:
s.to_dict()

{'a': 15, 'b': 9, 'c': 20}

to_frame

In [406]:
s = pd.Series([15, 9, 18, 20], ['a', 'b', 'c', 'c']); s

a    15
b     9
c    18
c    20
dtype: int64

In [407]:
s.to_frame()

Unnamed: 0,0
a,15
b,9
c,18
c,20


to_numpy

In [408]:
s = pd.Series([15, 90, 18, 20]); s

0    15
1    90
2    18
3    20
dtype: int64

In [409]:
s.to_numpy()

array([15, 90, 18, 20], dtype=int64)

In [410]:
np.array(s)

array([15, 90, 18, 20], dtype=int64)

In [411]:
#---------------

In [412]:
s = pd.Series([15, 90, 18, 20], ['a', 'b', 'c', 'c']); s

a    15
b    90
c    18
c    20
dtype: int64

In [413]:
s.to_numpy()

array([15, 90, 18, 20], dtype=int64)

## arithmetic operation

### math

In [414]:
a = pd.Series([1, 10, np.nan, 3])

In [415]:
a + 5

0     6.0
1    15.0
2     NaN
3     8.0
dtype: float64

In [416]:
a / 2

0    0.5
1    5.0
2    NaN
3    1.5
dtype: float64

In [417]:
(a / 2).round()

0    0.0
1    5.0
2    NaN
3    2.0
dtype: float64

In [418]:
a.add(5)

0     6.0
1    15.0
2     NaN
3     8.0
dtype: float64

In [419]:
a.add(5, fill_value=0)

0     6.0
1    15.0
2     5.0
3     8.0
dtype: float64

In [420]:
a.subtract(3)

0   -2.0
1    7.0
2    NaN
3    0.0
dtype: float64

In [421]:
a.multiply(4)

0     4.0
1    40.0
2     NaN
3    12.0
dtype: float64

In [422]:
a.divide(2)

0    0.5
1    5.0
2    NaN
3    1.5
dtype: float64

In [423]:
a.pow(3)

0       1.0
1    1000.0
2       NaN
3      27.0
dtype: float64

In [424]:
a.mod(5)

0    1.0
1    0.0
2    NaN
3    3.0
dtype: float64

two series

In [425]:
a = pd.Series([1, 10, None, 7,  3])
b = pd.Series([4,  5,   6,  2, None, 9])

In [426]:
a + b

0     5.0
1    15.0
2     NaN
3     9.0
4     NaN
5     NaN
dtype: float64

In [427]:
a // b

0    0.0
1    2.0
2    NaN
3    3.0
4    NaN
5    NaN
dtype: float64

In [428]:
a.add(b)

0     5.0
1    15.0
2     NaN
3     9.0
4     NaN
5     NaN
dtype: float64

In [429]:
a.add(b, fill_value=0)

0     5.0
1    15.0
2     6.0
3     9.0
4     3.0
5     9.0
dtype: float64

In [430]:
a.sub(b, fill_value=0)

0   -3.0
1    5.0
2   -6.0
3    5.0
4    3.0
5   -9.0
dtype: float64

In [431]:
a.mul(b)

0     4.0
1    50.0
2     NaN
3    14.0
4     NaN
5     NaN
dtype: float64

In [432]:
a.mul(b, fill_value=1)

0     4.0
1    50.0
2     6.0
3    14.0
4     3.0
5     9.0
dtype: float64

In [433]:
a.div(b)

0    0.25
1    2.00
2     NaN
3    3.50
4     NaN
5     NaN
dtype: float64

In [434]:
a.floordiv(b)

0    0.0
1    2.0
2    NaN
3    3.0
4    NaN
5    NaN
dtype: float64

In [435]:
a.pow(b)

0         1.0
1    100000.0
2         NaN
3        49.0
4         NaN
5         NaN
dtype: float64

In [436]:
a.mod(b)

0    1.0
1    0.0
2    NaN
3    1.0
4    NaN
5    NaN
dtype: float64

In [437]:
#---------------

In [438]:
a = pd.Series([1, 10, 3], index=['a','b','c'] )
b = pd.Series([4, 5, 6], index=['a','b','d'] )

In [439]:
display(a, b)

a     1
b    10
c     3
dtype: int64

a    4
b    5
d    6
dtype: int64

In [440]:
a.add(b)

a     5.0
b    15.0
c     NaN
d     NaN
dtype: float64

In [441]:
a.add(b, fill_value=0)

a     5.0
b    15.0
c     3.0
d     6.0
dtype: float64

### describe()

In [442]:
s = pd.Series([12, 4, 5, np.NaN, 7, 2], ['a', 'b', 'c', 'a', 'e', 'b']); s

a    12.0
b     4.0
c     5.0
a     NaN
e     7.0
b     2.0
dtype: float64

In [443]:
s.describe()

count     5.000000
mean      6.000000
std       3.807887
min       2.000000
25%       4.000000
50%       5.000000
75%       7.000000
max      12.000000
dtype: float64

In [444]:
s.sum()

30.0

In [445]:
s.count()

5

In [446]:
s.mean()

6.0

In [447]:
s.var()

14.5

In [448]:
s.std()

3.8078865529319543

In [449]:
s.min()

2.0

In [450]:
s.quantile(.25)

4.0

In [451]:
s.quantile([.5, .75])

0.50    5.0
0.75    7.0
dtype: float64

In [452]:
s.max()

12.0

In [453]:
s.median()

5.0

In [454]:
s.mode()

0     2.0
1     4.0
2     5.0
3     7.0
4    12.0
dtype: float64

### eq , ne , gt , ge , lt , le , between

In [455]:
s = pd.Series([8, 2, 12, 6, 5, 4]); s

0     8
1     2
2    12
3     6
4     5
5     4
dtype: int64

In [456]:
s == 8

0     True
1    False
2    False
3    False
4    False
5    False
dtype: bool

In [457]:
s.eq(8)

0     True
1    False
2    False
3    False
4    False
5    False
dtype: bool

In [458]:
s != 8

0    False
1     True
2     True
3     True
4     True
5     True
dtype: bool

In [459]:
s.ne(8)

0    False
1     True
2     True
3     True
4     True
5     True
dtype: bool

In [460]:
s > 6

0     True
1    False
2     True
3    False
4    False
5    False
dtype: bool

In [461]:
s.gt(6)

0     True
1    False
2     True
3    False
4    False
5    False
dtype: bool

In [462]:
s >= 6

0     True
1    False
2     True
3     True
4    False
5    False
dtype: bool

In [463]:
s.ge(6)

0     True
1    False
2     True
3     True
4    False
5    False
dtype: bool

In [464]:
s < 8

0    False
1     True
2    False
3     True
4     True
5     True
dtype: bool

In [465]:
s.lt(8)

0    False
1     True
2    False
3     True
4     True
5     True
dtype: bool

In [466]:
s <= 8

0     True
1     True
2    False
3     True
4     True
5     True
dtype: bool

In [467]:
s.le(8)

0     True
1     True
2    False
3     True
4     True
5     True
dtype: bool

In [468]:
(s >= 6) & (s <= 12)

0     True
1    False
2     True
3     True
4    False
5    False
dtype: bool

In [469]:
s.between(6, 12)

0     True
1    False
2     True
3     True
4    False
5    False
dtype: bool

two series

In [470]:
s1 = pd.Series([8, 2, 12, 6, 5, 4])
s2 = pd.Series([20, 2, 7, 6, 2, 17])

In [471]:
s1.eq(s2)

0    False
1     True
2    False
3     True
4    False
5    False
dtype: bool

In [472]:
s1.ge(s2)

0    False
1     True
2     True
3     True
4     True
5    False
dtype: bool

In [473]:
s1.lt(s2)

0     True
1    False
2    False
3    False
4    False
5     True
dtype: bool

### argmax , argmin() , idxmin() , idxmax()

In [474]:
s = pd.Series({'Java':15, 'C++':20, 'Python':12, 'Pascal':9}); s

Java      15
C++       20
Python    12
Pascal     9
dtype: int64

In [475]:
s.idxmax()

'C++'

In [476]:
s.argmax()

1

In [477]:
s.idxmin()

'Pascal'

In [478]:
s.argmin()

3

### cumsum , cumprod , cummax , cummin

In [479]:
s = pd.Series([3, 2, np.nan, 5, 0, 1]); s

0    3.0
1    2.0
2    NaN
3    5.0
4    0.0
5    1.0
dtype: float64

In [480]:
s.cumsum()

0     3.0
1     5.0
2     NaN
3    10.0
4    10.0
5    11.0
dtype: float64

In [481]:
s.cumprod()

0     3.0
1     6.0
2     NaN
3    30.0
4     0.0
5     0.0
dtype: float64

In [482]:
s.cummax()

0    3.0
1    3.0
2    NaN
3    5.0
4    5.0
5    5.0
dtype: float64

In [483]:
s.cummin()

0    3.0
1    2.0
2    NaN
3    2.0
4    0.0
5    0.0
dtype: float64

## applying function

### combine (other, func, fill_value)

In [484]:
s1 = pd.Series([8, 2, 12, 6, 5, 4])
s2 = pd.Series([20, 2, 7, 6, 2, 17])

In [485]:
s1.combine(s2, max)

0    20
1     2
2    12
3     6
4     5
5    17
dtype: int64

In [486]:
#---------------

In [487]:
s1 = pd.Series({'ali' : 16 , 'sara' : 17})
s2 = pd.Series({'ali' : 19 , 'sara' : 15 , 'taha' : 18})

In [488]:
s1.combine(s2, max)

ali     19.0
sara    17.0
taha     NaN
dtype: float64

In [489]:
s1.combine(s2, max, fill_value=0)

ali     19
sara    17
taha    18
dtype: int64

In [490]:
s1.combine(s2, max, fill_value=20)

ali     19
sara    17
taha    20
dtype: int64

In [491]:
myfunc = lambda x, y: x + y
s1.combine(s2, myfunc)

ali     35.0
sara    32.0
taha     NaN
dtype: float64

In [492]:
myfunc = lambda x, y: x + y
s1.combine(s2, myfunc, fill_value=0)

ali     35
sara    32
taha    18
dtype: int64

### apply (func, args, kwargs)

In [493]:
pd.Series([25.4, 7.7, 80]).apply(round)

0    25
1     8
2    80
dtype: int64

In [494]:
pd.Series([25.4, 7.7, 80]).apply(np.ceil)

0    26.0
1     8.0
2    80.0
dtype: float64

In [495]:
#---------------

In [496]:
s = pd.Series([25, 7, 80])

In [497]:
s.apply(np.sqrt)

0    5.000000
1    2.645751
2    8.944272
dtype: float64

In [498]:
s.apply(np.square)

0     625
1      49
2    6400
dtype: int64

In [499]:
def f(x):
    return x**2
s.apply(f)

0     625
1      49
2    6400
dtype: int64

In [500]:
lam = lambda x: x**2
s.apply(lam)

0     625
1      49
2    6400
dtype: int64

In [501]:
#---------------

In [502]:
s = pd.Series([25, 7, 80])

In [503]:
myfunc = lambda x: x - 2
s.apply(myfunc)

0    23
1     5
2    78
dtype: int64

In [504]:
myfunc = lambda x, y: x - y
s.apply(myfunc, args=(2,))

0    23
1     5
2    78
dtype: int64

In [505]:
#---------------

In [506]:
s = pd.Series([25, 7, 80])

In [507]:
def f(r, **kwargs):
    for i in kwargs:
        r += kwargs[i]
    return r       
s.apply(f, x=3, y=2)

0    30
1    12
2    85
dtype: int64

In [508]:
def f(r, **kwargs):
    r += list(d.values())[0]
    r *= list(d.values())[1]
    return r 
f(s, x=3, y=2)

0    360
1    198
2    855
dtype: int64

### map(arg, na_action=None)

In [509]:
pd.Series([25.4, 7.7, 80]).map(round)

0    25
1     8
2    80
dtype: int64

In [510]:
#---------------

In [511]:
s = pd.Series([25, 7, 80])

In [512]:
s.map(np.sqrt)

0    5.000000
1    2.645751
2    8.944272
dtype: float64

In [513]:
lam = lambda x: x**2
s.map(lam)

0     625
1      49
2    6400
dtype: int64

dict

In [514]:
s = pd.Series(['cat', 'dog', 'cat', 'rabbit']); s

0       cat
1       dog
2       cat
3    rabbit
dtype: object

In [515]:
s.map({'cat': 'feline', 'dog': 'canine'})

0    feline
1    canine
2    feline
3       NaN
dtype: object

In [516]:
#---------------

In [517]:
s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])

In [518]:
s.map('I am a {}'.format)

0       I am a cat
1       I am a dog
2       I am a nan
3    I am a rabbit
dtype: object

In [519]:
s.map('I am a {}'.format, na_action='ignore')

0       I am a cat
1       I am a dog
2              NaN
3    I am a rabbit
dtype: object

### transform (func, args, kwargs)

In [520]:
s = pd.Series([10, 4, 9])

In [521]:
s.transform('sqrt')

0    3.162278
1    2.000000
2    3.000000
dtype: float64

In [522]:
s.transform(['sqrt'])

Unnamed: 0,sqrt
0,3.162278
1,2.0
2,3.0


In [523]:
s.transform(['sqrt', 'square'])

Unnamed: 0,sqrt,square
0,3.162278,100
1,2.0,16
2,3.0,81


In [524]:
#---------------

In [525]:
s.transform(np.sqrt)

0    3.162278
1    2.000000
2    3.000000
dtype: float64

In [526]:
s.transform([np.sqrt])

Unnamed: 0,sqrt
0,3.162278
1,2.0
2,3.0


In [527]:
s.transform([np.sqrt, np.square])

Unnamed: 0,sqrt,square
0,3.162278,100
1,2.0,16
2,3.0,81


In [528]:
lam = lambda x: x**2
s.transform([lam])

Unnamed: 0,<lambda>
0,100
1,16
2,81


In [529]:
s.transform([np.sqrt, np.square, lam])

Unnamed: 0,sqrt,square,<lambda>
0,3.162278,100,100
1,2.0,16,16
2,3.0,81,81


In [530]:
#---------------

In [531]:
#s.transform('min')       ValueError: Function did not transform

### agg=aggregate (func, args, kwargs)

In [532]:
s = pd.Series([10, 4, 9, 2, 18, 6])

In [533]:
s.agg('sqrt')

0    3.162278
1    2.000000
2    3.000000
3    1.414214
4    4.242641
5    2.449490
dtype: float64

In [534]:
s.agg(['sqrt'])

Unnamed: 0,sqrt
0,3.162278
1,2.0
2,3.0
3,1.414214
4,4.242641
5,2.44949


In [535]:
s.agg(['sqrt', 'square'])

Unnamed: 0,sqrt,square
0,3.162278,100
1,2.0,16
2,3.0,81
3,1.414214,4
4,4.242641,324
5,2.44949,36


In [536]:
#---------------

In [537]:
#s.agg(np.sqrt)     FutureWarning: using <ufunc 'sqrt'> in Series.agg cannot aggregate and has been deprecated. Use Series.transform to keep behavior unchanged.

In [538]:
#---------------

In [539]:
s = pd.Series([10, 4, 9, 2, 18, 6])

In [540]:
s.agg('min')

2

In [541]:
s.agg(['min'])

min    2
dtype: int64

In [542]:
s.agg(['min', 'max', 'sum', 'average'])

min         2.000000
max        18.000000
sum        49.000000
average     8.166667
dtype: float64

### groupby (by, level, dropna)

by and level

In [543]:
d = [220, 180, 230, 200]
i = ['BMW', 'BMW', 'Benz', 'Benz']
s = pd.Series(d, i, name='MaxSpeed')

In [544]:
s

BMW     220
BMW     180
Benz    230
Benz    200
Name: MaxSpeed, dtype: int64

In [545]:
s.groupby(by=i)

<pandas.core.groupby.generic.SeriesGroupBy object at 0x000001D5393E4810>

In [546]:
list(s.groupby(by=i))

[('BMW',
  BMW    220
  BMW    180
  Name: MaxSpeed, dtype: int64),
 ('Benz',
  Benz    230
  Benz    200
  Name: MaxSpeed, dtype: int64)]

In [547]:
# seri.groupby().func()

In [548]:
s

BMW     220
BMW     180
Benz    230
Benz    200
Name: MaxSpeed, dtype: int64

In [549]:
s.groupby(level=0).apply(lambda x: x)

BMW   BMW     220
      BMW     180
Benz  Benz    230
      Benz    200
Name: MaxSpeed, dtype: int64

In [550]:
s.groupby(by=['BMW', 'BMW', 'Benz', 'Benz']).apply(lambda x: x)

BMW   BMW     220
      BMW     180
Benz  Benz    230
      Benz    200
Name: MaxSpeed, dtype: int64

In [551]:
s.groupby(i).apply(lambda x: x)

BMW   BMW     220
      BMW     180
Benz  Benz    230
      Benz    200
Name: MaxSpeed, dtype: int64

In [552]:
s.groupby(i).describe()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
BMW,2.0,200.0,28.284271,180.0,190.0,200.0,210.0,220.0
Benz,2.0,215.0,21.213203,200.0,207.5,215.0,222.5,230.0


In [553]:
s.groupby(i).max()

BMW     220
Benz    230
Name: MaxSpeed, dtype: int64

In [554]:
s.groupby(i).mean()

BMW     200.0
Benz    215.0
Name: MaxSpeed, dtype: float64

In [555]:
s.groupby(i).sum()

BMW     400
Benz    430
Name: MaxSpeed, dtype: int64

In [556]:
#---------------

In [557]:
s.groupby(s>200).apply(lambda x: x)

MaxSpeed      
False     BMW     180
          Benz    200
True      BMW     220
          Benz    230
Name: MaxSpeed, dtype: int64

In [558]:
s.groupby(s>200).describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
MaxSpeed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
False,2.0,190.0,14.142136,180.0,185.0,190.0,195.0,200.0
True,2.0,225.0,7.071068,220.0,222.5,225.0,227.5,230.0


In [559]:
s.groupby(s>200).max()

MaxSpeed
False    200
True     230
Name: MaxSpeed, dtype: int64

In [560]:
#---------------

In [561]:
s.groupby(["a", "b", "a", "b"]).apply(lambda x: x)

a  BMW     220
   Benz    230
b  BMW     180
   Benz    200
Name: MaxSpeed, dtype: int64

In [562]:
#---------------

In [563]:
s.index.name = 'Brand'; s

Brand
BMW     220
BMW     180
Benz    230
Benz    200
Name: MaxSpeed, dtype: int64

In [564]:
s.groupby('Brand').apply(lambda x: x)

Brand  Brand
BMW    BMW      220
       BMW      180
Benz   Benz     230
       Benz     200
Name: MaxSpeed, dtype: int64

In [565]:
s.groupby('Brand').max()

Brand
BMW     220
Benz    230
Name: MaxSpeed, dtype: int64

dropna

In [566]:
s = pd.Series([1, 2, 3, 3], ['a', 'a', 'b', NaN]); s

a      1
a      2
b      3
NaN    3
dtype: int64

In [567]:
s.groupby(level=0).apply(lambda x: x)

a  a    1
   a    2
b  b    3
dtype: int64

In [568]:
s.groupby(level=0, dropna=False).apply(lambda x: x)

a    a      1
     a      2
b    b      3
NaN  NaN    3
dtype: int64

In [569]:
s.groupby(level=0).sum()

a    3
b    3
dtype: int64

In [570]:
s.groupby(level=0, dropna=False).sum()

a      3
b      3
NaN    3
dtype: int64

..........

In [571]:
city = ['Mashad', 'Yazd', 'Kerman','Zabol', 'Kermanshah', 'Hamedan', 'Oromieh']
s = pd.Series([2, 3, 1, 6, 4, 5, 1], index=city); s

Mashad        2
Yazd          3
Kerman        1
Zabol         6
Kermanshah    4
Hamedan       5
Oromieh       1
dtype: int64

In [572]:
k = ['E', 'E', 'E', 'E', 'W', 'W', 'W']

In [573]:
s.groupby(k).apply(lambda x: x)

E  Mashad        2
   Yazd          3
   Kerman        1
   Zabol         6
W  Kermanshah    4
   Hamedan       5
   Oromieh       1
dtype: int64

In [574]:
s.groupby(k).describe()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
E,4.0,3.0,2.160247,1.0,1.75,2.5,3.75,6.0
W,3.0,3.333333,2.081666,1.0,2.5,4.0,4.5,5.0


In [575]:
s.groupby(k).mean()

E    3.000000
W    3.333333
dtype: float64

In [576]:
f = lambda x: x+x.mean()
s.groupby(k).apply(f)

E  Mashad        5.000000
   Yazd          6.000000
   Kerman        4.000000
   Zabol         9.000000
W  Kermanshah    7.333333
   Hamedan       8.333333
   Oromieh       4.333333
dtype: float64

In [577]:
s[['Oromieh', 'Yazd', 'Zabol']] = NaN; s

Mashad        2.0
Yazd          NaN
Kerman        1.0
Zabol         NaN
Kermanshah    4.0
Hamedan       5.0
Oromieh       NaN
dtype: float64

In [578]:
s.groupby(k).mean()

E    1.5
W    4.5
dtype: float64

In [579]:
f = lambda x: x.fillna(x.mean())
s.groupby(k).apply(f)

E  Mashad        2.0
   Yazd          1.5
   Kerman        1.0
   Zabol         1.5
W  Kermanshah    4.0
   Hamedan       5.0
   Oromieh       4.5
dtype: float64

In [580]:
d = {'W': 10, 'E': 20}
f = lambda x: x.fillna(d[x.name])
s.groupby(k).apply(f)

E  Mashad         2.0
   Yazd          20.0
   Kerman         1.0
   Zabol         20.0
W  Kermanshah     4.0
   Hamedan        5.0
   Oromieh       10.0
dtype: float64

..........

In [581]:
score = [160, 120, 130, 140, 200, 160, 170, 50, 190, 70]
sc = pd.cut(score, bins=4, labels=['Q1', 'Q2', 'Q3', 'Q4']); sc

['Q3', 'Q2', 'Q3', 'Q3', 'Q4', 'Q3', 'Q4', 'Q1', 'Q4', 'Q1']
Categories (4, object): ['Q1' < 'Q2' < 'Q3' < 'Q4']

In [582]:
s1 = pd.Series(score)
s2 = pd.Series(sc)
display(s1, s2)

0    160
1    120
2    130
3    140
4    200
5    160
6    170
7     50
8    190
9     70
dtype: int64

0    Q3
1    Q2
2    Q3
3    Q3
4    Q4
5    Q3
6    Q4
7    Q1
8    Q4
9    Q1
dtype: category
Categories (4, object): ['Q1' < 'Q2' < 'Q3' < 'Q4']

In [583]:
s1.groupby(s2).apply(lambda x: x)

  s1.groupby(s2).apply(lambda x: x)


Q1  7     50
    9     70
Q2  1    120
Q3  0    160
    2    130
    3    140
    5    160
Q4  4    200
    6    170
    8    190
dtype: int64

In [584]:
s1.groupby(s2, observed=True).apply(lambda x: x)

Q1  7     50
    9     70
Q2  1    120
Q3  0    160
    2    130
    3    140
    5    160
Q4  4    200
    6    170
    8    190
dtype: int64

In [585]:
s1.groupby(s2, observed=True).apply(lambda x: [x.max(), x.count()])

Q1     [70, 2]
Q2    [120, 1]
Q3    [160, 4]
Q4    [200, 3]
dtype: object

In [586]:
s1.groupby(s2, observed=True).agg(['min', 'count'])

Unnamed: 0,min,count
Q1,50,2
Q2,120,1
Q3,130,4
Q4,170,3


In [587]:
s1.groupby(s2, observed=True).agg(['min', 'count']).reset_index()

Unnamed: 0,index,min,count
0,Q1,50,2
1,Q2,120,1
2,Q3,130,4
3,Q4,170,3


MultiIndex

In [588]:
d = [220, 180, 230, 200]
l = [['BMW','BMW','Benz','Benz'] , ['A','B','A','B']]
mi = pd.MultiIndex.from_arrays(l, names=('Brand', 'Class')); mi

MultiIndex([( 'BMW', 'A'),
            ( 'BMW', 'B'),
            ('Benz', 'A'),
            ('Benz', 'B')],
           names=['Brand', 'Class'])

In [589]:
s = pd.Series(d, mi); s

Brand  Class
BMW    A        220
       B        180
Benz   A        230
       B        200
dtype: int64

In [590]:
s.groupby('Brand').apply(lambda x: x)

Brand  Brand  Class
BMW    BMW    A        220
              B        180
Benz   Benz   A        230
              B        200
dtype: int64

In [591]:
s.groupby(by='Brand').describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BMW,2.0,200.0,28.284271,180.0,190.0,200.0,210.0,220.0
Benz,2.0,215.0,21.213203,200.0,207.5,215.0,222.5,230.0


In [592]:
s.groupby(level='Brand').max()

Brand
BMW     220
Benz    230
dtype: int64

In [593]:
s.groupby(level=0).max()

Brand
BMW     220
Benz    230
dtype: int64

In [594]:
#---------------

In [595]:
s.groupby(by='Class').apply(lambda x: x)

Class  Brand  Class
A      BMW    A        220
       Benz   A        230
B      BMW    B        180
       Benz   B        200
dtype: int64

In [596]:
s.groupby(level='Class').max()

Class
A    230
B    200
dtype: int64

In [597]:
s.groupby(level=1).max()

Class
A    230
B    200
dtype: int64