#### [pandas.pydata.org](https://pandas.pydata.org/pandas-docs/stable/index.html)

In [1]:
import pandas as pd
import numpy as np
from numpy import nan as NaN 

# Intro

### DataFrame(data, index, columns)

In [2]:
score = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
pd.DataFrame(data=score)

Unnamed: 0,0,1,2
0,12,20,18
1,13,14,6
2,12,8,19
3,20,16,9


In [3]:
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
pd.DataFrame(data=score, index=name)

Unnamed: 0,0,1,2
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [4]:
dars = ['Python', 'C++', 'Java']
pd.DataFrame(data=score, columns=dars)

Unnamed: 0,Python,C++,Java
0,12,20,18
1,13,14,6
2,12,8,19
3,20,16,9


In [5]:
pd.DataFrame(data=score, index=name, columns=dars)

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [6]:
df = pd.DataFrame(score, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [7]:
len(df)

4

In [8]:
df.shape

(4, 3)

In [9]:
df.shape[0]

4

In [10]:
df.shape[1]

3

In [11]:
df.size

12

In [12]:
df.dtypes

Python    int64
C++       int64
Java      int64
dtype: object

In [13]:
df.values

array([[12, 20, 18],
       [13, 14,  6],
       [12,  8, 19],
       [20, 16,  9]], dtype=int64)

In [14]:
df.index

Index(['Ali', 'Sara', 'Taha', 'Mahsa'], dtype='object')

In [15]:
df.columns

Index(['Python', 'C++', 'Java'], dtype='object')

In [16]:
df.axes

[Index(['Ali', 'Sara', 'Taha', 'Mahsa'], dtype='object'),
 Index(['Python', 'C++', 'Java'], dtype='object')]

In [17]:
df.index.name = 'name'
df

Unnamed: 0_level_0,Python,C++,Java
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [18]:
df.columns.name = 'dars'
df

dars,Python,C++,Java
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [19]:
df.T

name,Ali,Sara,Taha,Mahsa
dars,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Python,12,13,12,20
C++,20,14,8,16
Java,18,6,19,9


In [20]:
df

dars,Python,C++,Java
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [21]:
df.unstack()

dars    name 
Python  Ali      12
        Sara     13
        Taha     12
        Mahsa    20
C++     Ali      20
        Sara     14
        Taha      8
        Mahsa    16
Java    Ali      18
        Sara      6
        Taha     19
        Mahsa     9
dtype: int64

In [22]:
df.unstack().unstack()

name,Ali,Sara,Taha,Mahsa
dars,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Python,12,13,12,20
C++,20,14,8,16
Java,18,6,19,9


In [23]:
df.describe()

dars,Python,C++,Java
count,4.0,4.0,4.0
mean,14.25,14.5,13.0
std,3.86221,5.0,6.480741
min,12.0,8.0,6.0
25%,12.0,12.5,8.25
50%,12.5,15.0,13.5
75%,14.75,17.0,18.25
max,20.0,20.0,19.0


In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, Ali to Mahsa
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Python  4 non-null      int64
 1   C++     4 non-null      int64
 2   Java    4 non-null      int64
dtypes: int64(3)
memory usage: 300.0+ bytes


### MultiIndex

In [25]:
d = np.arange(12).reshape((4, 3))
i = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]]
c = [['Ohio', 'Colorado', 'Ohio'], ['Green', 'Red', 'Green']]

In [26]:
frame = pd.DataFrame(data=d, index=i, columns=c)
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Colorado,Ohio
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green.1
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [27]:
frame.reset_index()

  frame.reset_index()
  frame.reset_index()


Unnamed: 0_level_0,level_0,level_1,Ohio,Colorado,Ohio
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Green,Red,Green.1
0,a,1,0,1,2
1,a,2,3,4,5
2,b,1,6,7,8
3,b,2,9,10,11


In [28]:
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
frame

Unnamed: 0_level_0,state,Ohio,Colorado,Ohio
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [29]:
frame.reset_index()

  frame.reset_index()
  frame.reset_index()


state,key1,key2,Ohio,Colorado,Ohio
color,Unnamed: 1_level_1,Unnamed: 2_level_1,Green,Red,Green.1
0,a,1,0,1,2
1,a,2,3,4,5
2,b,1,6,7,8
3,b,2,9,10,11


In [30]:
frame.index

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           names=['key1', 'key2'])

In [31]:
frame.columns

MultiIndex([(    'Ohio', 'Green'),
            ('Colorado',   'Red'),
            (    'Ohio', 'Green')],
           names=['state', 'color'])

In [32]:
frame.axes

[MultiIndex([('a', 1),
             ('a', 2),
             ('b', 1),
             ('b', 2)],
            names=['key1', 'key2']),
 MultiIndex([(    'Ohio', 'Green'),
             ('Colorado',   'Red'),
             (    'Ohio', 'Green')],
            names=['state', 'color'])]

In [33]:
frame

Unnamed: 0_level_0,state,Ohio,Colorado,Ohio
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [34]:
frame.swaplevel()

Unnamed: 0_level_0,state,Ohio,Colorado,Ohio
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


In [35]:
frame.T

Unnamed: 0_level_0,key1,a,a,b,b
Unnamed: 0_level_1,key2,1,2,1,2
state,color,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Ohio,Green,0,3,6,9
Colorado,Red,1,4,7,10
Ohio,Green,2,5,8,11


In [36]:
frame.unstack()

state,Ohio,Ohio,Colorado,Colorado,Ohio,Ohio
color,Green,Green,Red,Red,Green,Green
key2,1,2,1,2,1,2
key1,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
a,0,3,1,4,2,5
b,6,9,7,10,8,11


pd.MultiIndex.from_arrays()

In [37]:
d = np.arange(12).reshape((4, 3))
i = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]]
c = [['Ohio', 'Colorado', 'Ohio'], ['Green', 'Red', 'Green']]

In [38]:
frame = pd.DataFrame(data=d, index=i, columns=c)
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Colorado,Ohio
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green.1
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [39]:
mi = pd.MultiIndex.from_arrays(i, names=['key1', 'key2'])
mc = pd.MultiIndex.from_arrays(c, names=['state', 'color'])

In [40]:
frame = pd.DataFrame(data=d, index=mi, columns=mc)
frame

Unnamed: 0_level_0,state,Ohio,Colorado,Ohio
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


### data with dict {} 

In [41]:
# sotooni:

In [42]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}

In [43]:
pd.DataFrame(d)

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [44]:
#---------------

In [45]:
d = {'Python': [12, 13, 12],
     'C++':    [20, 14, 8],
     'Java':   [18, 6, 19]}
name = ['Ali', 'Sara', 'Taha']

In [46]:
pd.DataFrame(d)

Unnamed: 0,Python,C++,Java
0,12,20,18
1,13,14,6
2,12,8,19


In [47]:
pd.DataFrame(data=d, index=name)

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [48]:
#---------------

In [49]:
d = {'name':   ['Ali', 'Sara', 'Taha'],
     'Python': [12, 13, 12],
     'C++':    [20, 14, 8],
     'Java':   [18, 6, 19]}

In [50]:
pd.DataFrame(d)

Unnamed: 0,name,Python,C++,Java
0,Ali,12,20,18
1,Sara,13,14,6
2,Taha,12,8,19


In [51]:
pd.DataFrame(d).set_index('name')

Unnamed: 0_level_0,Python,C++,Java
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


..........

In [52]:
d = {"Name":  ["Python", "C++"],
     "Score": [18, 17]}
pd.DataFrame(d)

Unnamed: 0,Name,Score
0,Python,18
1,C++,17


In [53]:
d = {"Name":  {"0":"Python", "1":"C++"},
     "Score": {"0":18, "1":17}}
pd.DataFrame(d)

Unnamed: 0,Name,Score
0,Python,18
1,C++,17


In [54]:
# satri:

In [55]:
l = [['Python', 18], ['C++', 17]]
pd.DataFrame(l)

Unnamed: 0,0,1
0,Python,18
1,C++,17


In [56]:
l = [{'Name':'Python', 'Score':18}, {'Name':'C++', 'Score':17}]
pd.DataFrame(l)

Unnamed: 0,Name,Score
0,Python,18
1,C++,17


..........

In [57]:
d = [3, 4, 2, 7]
i = ['a', 'b', 'c', 'd']

In [58]:
pd.DataFrame(index=i, data=d)

Unnamed: 0,0
a,3
b,4
c,2
d,7


In [59]:
pd.DataFrame(index=i, data={'score': [3, 4, 2, 7]})

Unnamed: 0,score
a,3
b,4
c,2
d,7


In [60]:
pd.DataFrame(index=i, data={'score': 5})

Unnamed: 0,score
a,5
b,5
c,5
d,5


### Indexing & Filtering

#### intro

In [61]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12, 'Mahsa': 20},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8,  'Mahsa': 16},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19, 'Mahsa': 9}}
df = pd.DataFrame(d)

In [62]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [63]:
# dastresie sotooni (seri va dataframe)

In [64]:
df.Java                 # type: Series

Ali      18
Sara      6
Taha     19
Mahsa     9
Name: Java, dtype: int64

In [65]:
df['Java']              # type: Series

Ali      18
Sara      6
Taha     19
Mahsa     9
Name: Java, dtype: int64

In [66]:
df[['Java']]            # type: DataFrame

Unnamed: 0,Java
Ali,18
Sara,6
Taha,19
Mahsa,9


In [67]:
df[['Python', 'Java']]

Unnamed: 0,Python,Java
Ali,12,18
Sara,13,6
Taha,12,19
Mahsa,20,9


In [68]:
# dastresie satri (dataframe)

In [69]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [70]:
df[1:2]

Unnamed: 0,Python,C++,Java
Sara,13,14,6


In [71]:
df[:2]

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6


In [72]:
df[:]

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [73]:
# afzoodan be dataframe (faghat sotoon)

In [74]:
df['R'] = np.nan 
df

Unnamed: 0,Python,C++,Java,R
Ali,12,20,18,
Sara,13,14,6,
Taha,12,8,19,
Mahsa,20,16,9,


In [75]:
df[['C']] = np.nan 
df

Unnamed: 0,Python,C++,Java,R,C
Ali,12,20,18,,
Sara,13,14,6,,
Taha,12,8,19,,
Mahsa,20,16,9,,


In [76]:
df[['a', 'b']] = np.nan 
df

Unnamed: 0,Python,C++,Java,R,C,a,b
Ali,12,20,18,,,,
Sara,13,14,6,,,,
Taha,12,8,19,,,,
Mahsa,20,16,9,,,,


..........

In [77]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
df = pd.DataFrame(d)

In [78]:
df

Unnamed: 0,0,1,2
0,12,20,18
1,13,14,6
2,12,8,19
3,20,16,9


In [79]:
df[1]

0    20
1    14
2     8
3    16
Name: 1, dtype: int64

In [80]:
df[[1]]

Unnamed: 0,1
0,20
1,14
2,8
3,16


In [81]:
df[[0, 2]]

Unnamed: 0,0,2
0,12,18
1,13,6
2,12,19
3,20,9


In [82]:
#---------------

In [83]:
df

Unnamed: 0,0,1,2
0,12,20,18
1,13,14,6
2,12,8,19
3,20,16,9


In [84]:
df[1:2]

Unnamed: 0,0,1,2
1,13,14,6


In [85]:
df[:2]

Unnamed: 0,0,1,2
0,12,20,18
1,13,14,6


In [86]:
df[:]

Unnamed: 0,0,1,2
0,12,20,18
1,13,14,6
2,12,8,19
3,20,16,9


In [87]:
#---------------

In [88]:
df[3] = np.nan
df

Unnamed: 0,0,1,2,3
0,12,20,18,
1,13,14,6,
2,12,8,19,
3,20,16,9,


In [89]:
df[[4]] = np.nan
df

Unnamed: 0,0,1,2,3,4
0,12,20,18,,
1,13,14,6,,
2,12,8,19,,
3,20,16,9,,


In [90]:
df[[5, 6]] = np.nan
df

Unnamed: 0,0,1,2,3,4,5,6
0,12,20,18,,,,
1,13,14,6,,,,
2,12,8,19,,,,
3,20,16,9,,,,


#### loc

satr:

In [91]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12, 'Mahsa': 20},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8,  'Mahsa': 16},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19, 'Mahsa': 9}}

In [92]:
df = pd.DataFrame(d); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [93]:
# yek satr

In [94]:
df.loc['Sara']

Python    13
C++       14
Java       6
Name: Sara, dtype: int64

In [95]:
df.loc['Sara':'Sara']

Unnamed: 0,Python,C++,Java
Sara,13,14,6


In [96]:
df.loc[['Sara']]

Unnamed: 0,Python,C++,Java
Sara,13,14,6


In [97]:
# bakhshi az yek satr

In [98]:
df.loc['Sara', 'C++':]

C++     14
Java     6
Name: Sara, dtype: int64

In [99]:
df.loc['Sara':'Sara', 'C++':]

Unnamed: 0,C++,Java
Sara,14,6


In [100]:
df.loc[['Sara'], 'C++':]

Unnamed: 0,C++,Java
Sara,14,6


In [101]:
df.loc['Sara', ['Python', 'Java']]

Python    13
Java       6
Name: Sara, dtype: int64

In [102]:
df.loc['Sara':'Sara', ['Python', 'Java']]

Unnamed: 0,Python,Java
Sara,13,6


In [103]:
df.loc[['Sara'], ['Python', 'Java']]

Unnamed: 0,Python,Java
Sara,13,6


In [104]:
# chand satr

In [105]:
df.loc['Sara':'Mahsa']

Unnamed: 0,Python,C++,Java
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [106]:
df.loc[['Sara', 'Ali', 'Mahsa']]

Unnamed: 0,Python,C++,Java
Sara,13,14,6
Ali,12,20,18
Mahsa,20,16,9


In [107]:
df.index != 'Sara'

array([ True, False,  True,  True])

In [108]:
df.loc[df.index != 'Sara']  

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Taha,12,8,19
Mahsa,20,16,9


sotoon:

In [109]:
# yek sotoon

In [110]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [111]:
df.loc[:, 'Python']

Ali      12
Sara     13
Taha     12
Mahsa    20
Name: Python, dtype: int64

In [112]:
df.loc[:, 'Python':'Python']

Unnamed: 0,Python
Ali,12
Sara,13
Taha,12
Mahsa,20


In [113]:
df.loc[:, ['Python']]

Unnamed: 0,Python
Ali,12
Sara,13
Taha,12
Mahsa,20


In [114]:
# bakhshi az yek sotoon

In [115]:
df.loc['Taha':, 'Python']

Taha     12
Mahsa    20
Name: Python, dtype: int64

In [116]:
df.loc['Taha':, 'Python':'Python']

Unnamed: 0,Python
Taha,12
Mahsa,20


In [117]:
df.loc['Taha':, ['Python']]

Unnamed: 0,Python
Taha,12
Mahsa,20


In [118]:
df.loc[['Sara', 'Mahsa'], 'Python']

Sara     13
Mahsa    20
Name: Python, dtype: int64

In [119]:
df.loc[['Sara', 'Mahsa'], 'Python':'Python']

Unnamed: 0,Python
Sara,13
Mahsa,20


In [120]:
df.loc[['Sara', 'Mahsa'], ['Python']]

Unnamed: 0,Python
Sara,13
Mahsa,20


In [121]:
# chand sotoon

In [122]:
df.loc[:, 'Python':'C++']

Unnamed: 0,Python,C++
Ali,12,20
Sara,13,14
Taha,12,8
Mahsa,20,16


In [123]:
df.loc[:, ['Python', 'Java']]

Unnamed: 0,Python,Java
Ali,12,18
Sara,13,6
Taha,12,19
Mahsa,20,9


In [124]:
df.columns != 'Java'

array([ True,  True, False])

In [125]:
df.loc[:, (df.columns != 'Java')]  

Unnamed: 0,Python,C++
Ali,12,20
Sara,13,14
Taha,12,8
Mahsa,20,16


onsor:

In [126]:
# yek onsor

In [127]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [128]:
df.loc['Sara', 'Python']                   # type: int

13

In [129]:
df.loc['Sara']['Python']                   

13

In [130]:
df.loc['Sara':'Sara', 'Python']            # type: serie

Sara    13
Name: Python, dtype: int64

In [131]:
df.loc['Sara', 'Python':'Python']

Python    13
Name: Sara, dtype: int64

In [132]:
df.loc[['Sara'], 'Python']

Sara    13
Name: Python, dtype: int64

In [133]:
df.loc['Sara', ['Python']]

Python    13
Name: Sara, dtype: int64

In [134]:
df.loc['Sara':'Sara', ['Python']]          # type: dataframe

Unnamed: 0,Python
Sara,13


In [135]:
df.loc[['Sara'], 'Python':'Python'] 

Unnamed: 0,Python
Sara,13


In [136]:
df.loc['Sara':'Sara', 'Python':'Python']

Unnamed: 0,Python
Sara,13


In [137]:
df.loc[['Sara'], ['Python']]

Unnamed: 0,Python
Sara,13


bakhsh

In [138]:
# bakhshi az dataframe

In [139]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [140]:
df.loc['Ali':'Sara', 'Python':'C++']

Unnamed: 0,Python,C++
Ali,12,20
Sara,13,14


In [141]:
df.loc[['Ali', 'Mahsa'], ['Python', 'Java']]

Unnamed: 0,Python,Java
Ali,12,18
Mahsa,20,9


In [142]:
df.loc[['Ali', 'Mahsa'], ['Python', 'Java']]

Unnamed: 0,Python,Java
Ali,12,18
Mahsa,20,9


In [143]:
df.loc[df.index != 'Sara', df.columns != 'C++']

Unnamed: 0,Python,Java
Ali,12,18
Taha,12,19
Mahsa,20,9


boolean

In [144]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [145]:
df.loc[[True, False, True, True]]

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Taha,12,8,19
Mahsa,20,16,9


In [146]:
df.loc[:, [False, True, True]]

Unnamed: 0,C++,Java
Ali,20,18
Sara,14,6
Taha,8,19
Mahsa,16,9


In [147]:
df.loc[[True, False, True, True], [True, False, True]]

Unnamed: 0,Python,Java
Ali,12,18
Taha,12,19
Mahsa,20,9


afzoodan:

In [148]:
# afzoodan be dataframe (faghat sotoon)

In [149]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [150]:
#df.loc[['Amin']] = np.nan       KeyError: "None of [Index(['Amin'], dtype='object')] are in the [index]"

In [151]:
df.loc[:, ['R']] = np.nan
df

Unnamed: 0,Python,C++,Java,R
Ali,12,20,18,
Sara,13,14,6,
Taha,12,8,19,
Mahsa,20,16,9,


In [152]:
df.loc[:, [2]] = 15
df

Unnamed: 0,Python,C++,Java,R,2
Ali,12,20,18,,15
Sara,13,14,6,,15
Taha,12,8,19,,15
Mahsa,20,16,9,,15


In [153]:
#---------------

In [154]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
df = pd.DataFrame(d)
df

Unnamed: 0,0,1,2
0,12,20,18
1,13,14,6
2,12,8,19
3,20,16,9


In [155]:
df.loc[[3]]

Unnamed: 0,0,1,2
3,20,16,9


In [156]:
#df.loc[[4]] = np.nan        KeyError: "None of [Index([4], dtype='int32')] are in the [index]"

In [157]:
df.loc[:, [2]]

Unnamed: 0,2
0,18
1,6
2,19
3,9


In [158]:
df.loc[:, [3]] = np.nan
df

Unnamed: 0,0,1,2,3
0,12,20,18,
1,13,14,6,
2,12,8,19,
3,20,16,9,


In [159]:
df.loc[:, ['a']] = np.nan
df

Unnamed: 0,0,1,2,3,a
0,12,20,18,,
1,13,14,6,,
2,12,8,19,,
3,20,16,9,,


#### iloc

satr:

In [160]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12, 'Mahsa': 20},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8,  'Mahsa': 16},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19, 'Mahsa': 9}}
df = pd.DataFrame(d)

In [161]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [162]:
# yek satr

In [163]:
df.iloc[1]

Python    13
C++       14
Java       6
Name: Sara, dtype: int64

In [164]:
df.iloc[-1]

Python    20
C++       16
Java       9
Name: Mahsa, dtype: int64

In [165]:
df.iloc[1:2]

Unnamed: 0,Python,C++,Java
Sara,13,14,6


In [166]:
df.iloc[[1]]

Unnamed: 0,Python,C++,Java
Sara,13,14,6


In [167]:
df.iloc[[-1]]

Unnamed: 0,Python,C++,Java
Mahsa,20,16,9


In [168]:
# bakhshi az yek satr

In [169]:
df.iloc[1, 1:]

C++     14
Java     6
Name: Sara, dtype: int64

In [170]:
df.iloc[1, [0, 2]]

Python    13
Java       6
Name: Sara, dtype: int64

In [171]:
df.iloc[1:2, 1:]

Unnamed: 0,C++,Java
Sara,14,6


In [172]:
df.iloc[[1], 1:]

Unnamed: 0,C++,Java
Sara,14,6


In [173]:
df.iloc[1:2, [0, 2]]

Unnamed: 0,Python,Java
Sara,13,6


In [174]:
df.iloc[[1], [0, 2]]

Unnamed: 0,Python,Java
Sara,13,6


In [175]:
# chand satr

In [176]:
df.iloc[1:3]

Unnamed: 0,Python,C++,Java
Sara,13,14,6
Taha,12,8,19


In [177]:
df.iloc[[0, 2, 1]]

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Taha,12,8,19
Sara,13,14,6


sotoon

In [178]:
# yek sotoon

In [179]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [180]:
df.iloc[:, 0]

Ali      12
Sara     13
Taha     12
Mahsa    20
Name: Python, dtype: int64

In [181]:
df.iloc[:, 0:1]

Unnamed: 0,Python
Ali,12
Sara,13
Taha,12
Mahsa,20


In [182]:
df.iloc[:, [0]]

Unnamed: 0,Python
Ali,12
Sara,13
Taha,12
Mahsa,20


In [183]:
# bakhshi az yek sotoon

In [184]:
df.iloc[2:, 0]

Taha     12
Mahsa    20
Name: Python, dtype: int64

In [185]:
df.iloc[2:, 0:1]

Unnamed: 0,Python
Taha,12
Mahsa,20


In [186]:
df.iloc[2:, [0]]

Unnamed: 0,Python
Taha,12
Mahsa,20


In [187]:
df.iloc[[1, 3], 0]

Sara     13
Mahsa    20
Name: Python, dtype: int64

In [188]:
df.iloc[[1, 3], 0:1]

Unnamed: 0,Python
Sara,13
Mahsa,20


In [189]:
df.iloc[[1, 3], [0]]

Unnamed: 0,Python
Sara,13
Mahsa,20


In [190]:
# chand sotoon

In [191]:
df.iloc[:, 0:2]

Unnamed: 0,Python,C++
Ali,12,20
Sara,13,14
Taha,12,8
Mahsa,20,16


In [192]:
df.iloc[:, [0, 2]]

Unnamed: 0,Python,Java
Ali,12,18
Sara,13,6
Taha,12,19
Mahsa,20,9


onsor:

In [193]:
# yek onsor

In [194]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [195]:
df.iloc[1]['Python']                    

13

In [196]:
df.iloc[1][0]                    

  df.iloc[1][0]


13

In [197]:
df.iloc[1, 0]                     

13

In [198]:
df.iloc[1:2, 0]     

Sara    13
Name: Python, dtype: int64

In [199]:
df.iloc[1, 0:1]

Python    13
Name: Sara, dtype: int64

In [200]:
df.iloc[[1], 0]

Sara    13
Name: Python, dtype: int64

In [201]:
df.iloc[1, [0]]

Python    13
Name: Sara, dtype: int64

In [202]:
df.iloc[1:2, [0]]     

Unnamed: 0,Python
Sara,13


In [203]:
df.iloc[[1], 0:1]

Unnamed: 0,Python
Sara,13


In [204]:
df.iloc[1:2, 0:1]

Unnamed: 0,Python
Sara,13


In [205]:
df.iloc[[1], [0]]

Unnamed: 0,Python
Sara,13


bakhsh:

In [206]:
# bakhshi az dataframe

In [207]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [208]:
df.iloc[0:2, 0:2]

Unnamed: 0,Python,C++
Ali,12,20
Sara,13,14


In [209]:
df.iloc[[0, 3], [0, 2]]

Unnamed: 0,Python,Java
Ali,12,18
Mahsa,20,9


boolean:

In [210]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [211]:
df.iloc[[True, False, True, True]]

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Taha,12,8,19
Mahsa,20,16,9


In [212]:
df.iloc[:, [False, True, True]]

Unnamed: 0,C++,Java
Ali,20,18
Sara,14,6
Taha,8,19
Mahsa,16,9


In [213]:
df.iloc[[True, False, True, True], [True, False, True]]

Unnamed: 0,Python,Java
Ali,12,18
Taha,12,19
Mahsa,20,9


afzoodan:

In [214]:
# afzoodan be dataframe?

# ba iloc nemitavan satr ya sotooni ezafe kard.

#### Filtering

##### serie

voroodi serie baraye x dar df[x]

In [215]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12, 'Mahsa': 20},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8,  'Mahsa': 16},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19, 'Mahsa': 9}}
df = pd.DataFrame(d)

In [216]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [217]:
df.loc['Sara'] > 10

Python     True
C++        True
Java      False
Name: Sara, dtype: bool

In [218]:
df.where(df.loc['Sara'] > 10)

Unnamed: 0,Python,C++,Java
Ali,,,
Sara,,,
Taha,,,
Mahsa,,,


In [219]:
#df[df.loc['Sara'] > 10]   
#IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match).

In [220]:
df.loc[:,df.loc['Sara'] > 10]

Unnamed: 0,Python,C++
Ali,12,20
Sara,13,14
Taha,12,8
Mahsa,20,16


In [221]:
#---------------

In [222]:
df['C++'] > 10

Ali       True
Sara      True
Taha     False
Mahsa     True
Name: C++, dtype: bool

In [223]:
df.where(df['C++'] > 10)

Unnamed: 0,Python,C++,Java
Ali,12.0,20.0,18.0
Sara,13.0,14.0,6.0
Taha,,,
Mahsa,20.0,16.0,9.0


In [224]:
df[df['C++'] > 10]

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Mahsa,20,16,9


In [225]:
df[df['C++'] > 10]['C++']

Ali      20
Sara     14
Mahsa    16
Name: C++, dtype: int64

In [226]:
df['C++'][df['C++'] > 10]

Ali      20
Sara     14
Mahsa    16
Name: C++, dtype: int64

In [227]:
#---------------

In [228]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [229]:
cond1 = df['C++'] > 10
cond2 = df['C++'] < 18
display(cond1, cond2)

Ali       True
Sara      True
Taha     False
Mahsa     True
Name: C++, dtype: bool

Ali      False
Sara      True
Taha      True
Mahsa     True
Name: C++, dtype: bool

In [230]:
df[cond1]

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Mahsa,20,16,9


In [231]:
df[~cond1]

Unnamed: 0,Python,C++,Java
Taha,12,8,19


In [232]:
df[cond1 & cond2]

Unnamed: 0,Python,C++,Java
Sara,13,14,6
Mahsa,20,16,9


In [233]:
df[cond1 & cond2]['C++']

Sara     14
Mahsa    16
Name: C++, dtype: int64

In [234]:
#---------------

In [235]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [236]:
cond1 = df['C++'] > 10
cond2 = df['Java'] < 10
display(cond1, cond2)

Ali       True
Sara      True
Taha     False
Mahsa     True
Name: C++, dtype: bool

Ali      False
Sara      True
Taha     False
Mahsa     True
Name: Java, dtype: bool

In [237]:
df[cond1 | cond2]

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Mahsa,20,16,9


In [238]:
df[cond1 ^ cond2]

Unnamed: 0,Python,C++,Java
Ali,12,20,18


In [239]:
df[cond1 & cond2]

Unnamed: 0,Python,C++,Java
Sara,13,14,6
Mahsa,20,16,9


In [240]:
df[cond1 & cond2][['C++', 'Java']]

Unnamed: 0,C++,Java
Sara,14,6
Mahsa,16,9


..........

In [241]:
d = {'name':  ['ali', 'taha', 'omid', 'sara', 'negar'],
     'score': ['a', 'b', 'a', 'c', 'd']}

In [242]:
df = pd.DataFrame(d); df

Unnamed: 0,name,score
0,ali,a
1,taha,b
2,omid,a
3,sara,c
4,negar,d


In [243]:
df['score'] == 'a'

0     True
1    False
2     True
3    False
4    False
Name: score, dtype: bool

In [244]:
df[df['score'] == 'a']

Unnamed: 0,name,score
0,ali,a
2,omid,a


In [245]:
df[df['score'] == 'd']

Unnamed: 0,name,score
4,negar,d


In [246]:
df[(df['score'] == 'a') | (df['score'] == 'd')]

Unnamed: 0,name,score
0,ali,a
2,omid,a
4,negar,d


In [247]:
#---------------

In [248]:
df['score'].isin(['a'])

0     True
1    False
2     True
3    False
4    False
Name: score, dtype: bool

In [249]:
df[df['score'].isin(['a'])]

Unnamed: 0,name,score
0,ali,a
2,omid,a


In [250]:
df[df['score'].isin(['a', 'd'])]

Unnamed: 0,name,score
0,ali,a
2,omid,a
4,negar,d


In [251]:
#---------------

In [252]:
df.isin(['a', 'd'])

Unnamed: 0,name,score
0,False,True
1,False,False
2,False,True
3,False,False
4,False,True


In [253]:
df[df.isin(['a', 'd'])]

Unnamed: 0,name,score
0,,a
1,,
2,,a
3,,
4,,d


..........

In [254]:
d = {'name':  ['ali', 'taha', 'omid', 'sara', 'negar'],
     'score': [8, None, 5, 9, None]}

In [255]:
df = pd.DataFrame(d); df

Unnamed: 0,name,score
0,ali,8.0
1,taha,
2,omid,5.0
3,sara,9.0
4,negar,


In [256]:
df['score']

0    8.0
1    NaN
2    5.0
3    9.0
4    NaN
Name: score, dtype: float64

In [257]:
df['score'].isna()

0    False
1     True
2    False
3    False
4     True
Name: score, dtype: bool

In [258]:
df[df['score'].isna()]

Unnamed: 0,name,score
1,taha,
4,negar,


In [259]:
df[~df['score'].isna()]

Unnamed: 0,name,score
0,ali,8.0
2,omid,5.0
3,sara,9.0


In [260]:
df[df['score'].notna()]

Unnamed: 0,name,score
0,ali,8.0
2,omid,5.0
3,sara,9.0


##### dataframe

voroodi datafrmae baraye x dar df[x]

In [261]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12, 'Mahsa': 20},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8,  'Mahsa': 16},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19, 'Mahsa': 9}}
df = pd.DataFrame(d)

In [262]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [263]:
df > 10

Unnamed: 0,Python,C++,Java
Ali,True,True,True
Sara,True,True,False
Taha,True,False,True
Mahsa,True,True,False


In [264]:
df[df > 10]

Unnamed: 0,Python,C++,Java
Ali,12,20.0,18.0
Sara,13,14.0,
Taha,12,,19.0
Mahsa,20,16.0,


In [265]:
df.where(df > 10)

Unnamed: 0,Python,C++,Java
Ali,12,20.0,18.0
Sara,13,14.0,
Taha,12,,19.0
Mahsa,20,16.0,


In [266]:
df.gt(10)

Unnamed: 0,Python,C++,Java
Ali,True,True,True
Sara,True,True,False
Taha,True,False,True
Mahsa,True,True,False


In [267]:
df[df.gt(10)]

Unnamed: 0,Python,C++,Java
Ali,12,20.0,18.0
Sara,13,14.0,
Taha,12,,19.0
Mahsa,20,16.0,


In [268]:
df.where(df.gt(10))

Unnamed: 0,Python,C++,Java
Ali,12,20.0,18.0
Sara,13,14.0,
Taha,12,,19.0
Mahsa,20,16.0,


In [269]:
#---------------

In [270]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [271]:
df[['C++']] > 10

Unnamed: 0,C++
Ali,True
Sara,True
Taha,False
Mahsa,True


In [272]:
df[df[['C++']] > 10]

Unnamed: 0,Python,C++,Java
Ali,,20.0,
Sara,,14.0,
Taha,,,
Mahsa,,16.0,


In [273]:
df.where(df[['C++']] > 10)

Unnamed: 0,Python,C++,Java
Ali,,20.0,
Sara,,14.0,
Taha,,,
Mahsa,,16.0,


In [274]:
#---------------

In [275]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [276]:
df[1:2] > 10

Unnamed: 0,Python,C++,Java
Sara,True,True,False


In [277]:
df[df[1:2] > 10]

Unnamed: 0,Python,C++,Java
Ali,,,
Sara,13.0,14.0,
Taha,,,
Mahsa,,,


In [278]:
df.where(df[1:2] > 10)

Unnamed: 0,Python,C++,Java
Ali,,,
Sara,13.0,14.0,
Taha,,,
Mahsa,,,


##### boolean

In [279]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12, 'Mahsa': 20},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8,  'Mahsa': 16},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19, 'Mahsa': 9}}
df = pd.DataFrame(d); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [280]:
[True, True, False, True]

[True, True, False, True]

In [281]:
df[[True, True, False, True]]

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Mahsa,20,16,9


In [282]:
df.loc[:, [True, True, False]]

Unnamed: 0,Python,C++
Ali,12,20
Sara,13,14
Taha,12,8
Mahsa,20,16


In [283]:
#df.where([True, True, False, True])      ValueError: Array conditional must be same shape as self

In [284]:
#---------------

In [285]:
s = pd.Series([True, True, False, True], ['Ali', 'Sara', 'Taha', 'Mahsa']); s

Ali       True
Sara      True
Taha     False
Mahsa     True
dtype: bool

In [286]:
df[s]

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Mahsa,20,16,9


In [287]:
df.where(s)

Unnamed: 0,Python,C++,Java
Ali,12.0,20.0,18.0
Sara,13.0,14.0,6.0
Taha,,,
Mahsa,20.0,16.0,9.0


In [288]:
#---------------

In [289]:
df['C++'] > 10

Ali       True
Sara      True
Taha     False
Mahsa     True
Name: C++, dtype: bool

In [290]:
df[df['C++'] > 10]

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Mahsa,20,16,9


In [291]:
df.where(df['C++'] > 10)

Unnamed: 0,Python,C++,Java
Ali,12.0,20.0,18.0
Sara,13.0,14.0,6.0
Taha,,,
Mahsa,20.0,16.0,9.0


# Functions

## manage axis

### set_...()

#### set_index (keys, inplace)

set kardane indexe jadid

In [292]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}

In [293]:
df = pd.DataFrame(d); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [294]:
#df.set_index(['Omid', 'Sara', 'Taha'])      KeyError: "None of ['Omid', 'Sara', 'Taha'] are in the columns"

In [295]:
df.set_index([['1', '2', '3']])      

Unnamed: 0,Python,C++,Java
1,12,20,18
2,13,14,6
3,12,8,19


In [296]:
df.set_index([['a', 'b', 'c']])      

Unnamed: 0,Python,C++,Java
a,12,20,18
b,13,14,6
c,12,8,19


In [297]:
df.set_index([['Omid', 'Sara', 'Taha']])      

Unnamed: 0,Python,C++,Java
Omid,12,20,18
Sara,13,14,6
Taha,12,8,19


In [298]:
s = pd.Series(['Omid', 'Sara', 'Taha'], name='name')
df.set_index(s)

Unnamed: 0_level_0,Python,C++,Java
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Omid,12,20,18
Sara,13,14,6
Taha,12,8,19


In [299]:
#df.set_index([['Ali', 'Sara', 'Taha', 'Omid']])   ValueError: Length mismatch: Expected 3 rows, received array of length 4

In [300]:
# MultiIndex

In [301]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}

In [302]:
df = pd.DataFrame(d); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [303]:
i = [['Omid', 'Sara', 'Taha'], ['boy', 'girl', 'boy']]
mi = pd.MultiIndex.from_arrays(i, names=['name', 'gen']); mi
df.set_index(mi)

Unnamed: 0_level_0,Unnamed: 1_level_0,Python,C++,Java
name,gen,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Omid,boy,12,20,18
Sara,girl,13,14,6
Taha,boy,12,8,19


In [304]:
#---------------

In [305]:
df.set_index([['Omid', 'Sara', 'Taha'], ['boy', 'girl', 'boy']], inplace=True)
df

Unnamed: 0,Unnamed: 1,Python,C++,Java
Omid,boy,12,20,18
Sara,girl,13,14,6
Taha,boy,12,8,19


In [306]:
df.index.names = ['name', 'gen']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Python,C++,Java
name,gen,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Omid,boy,12,20,18
Sara,girl,13,14,6
Taha,boy,12,8,19


In [307]:
df.index

MultiIndex([('Omid',  'boy'),
            ('Sara', 'girl'),
            ('Taha',  'boy')],
           names=['name', 'gen'])

tabdile yek sotoon be index

In [308]:
d = {'name':   ['Ali', 'Sara', 'Taha'],
     'Python': [12, 13, 12],
     'C++':    [20, 14, 8],
     'Java':   [18, 6, 19]}

In [309]:
df = pd.DataFrame(d); df

Unnamed: 0,name,Python,C++,Java
0,Ali,12,20,18
1,Sara,13,14,6
2,Taha,12,8,19


In [310]:
df['name'].values

array(['Ali', 'Sara', 'Taha'], dtype=object)

In [311]:
df.set_index(df['name'].values)

Unnamed: 0,name,Python,C++,Java
Ali,Ali,12,20,18
Sara,Sara,13,14,6
Taha,Taha,12,8,19


In [312]:
df['name']

0     Ali
1    Sara
2    Taha
Name: name, dtype: object

In [313]:
df.set_index(df['name'])

Unnamed: 0_level_0,name,Python,C++,Java
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ali,Ali,12,20,18
Sara,Sara,13,14,6
Taha,Taha,12,8,19


In [314]:
df

Unnamed: 0,name,Python,C++,Java
0,Ali,12,20,18
1,Sara,13,14,6
2,Taha,12,8,19


In [315]:
df.set_index('name')

Unnamed: 0_level_0,Python,C++,Java
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [316]:
# MultiIndex

In [317]:
d = {'name':   ['Ali', 'Sara', 'Taha'],
     'gen':    ['boy', 'girl', 'boy'],
     'Python': [12, 13, 12],
     'C++':    [20, 14, 8],
     'Java':   [18, 6, 19]}

In [318]:
df = pd.DataFrame(d); df

Unnamed: 0,name,gen,Python,C++,Java
0,Ali,boy,12,20,18
1,Sara,girl,13,14,6
2,Taha,boy,12,8,19


In [319]:
df.set_index([df['name']])

Unnamed: 0_level_0,name,gen,Python,C++,Java
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Ali,Ali,boy,12,20,18
Sara,Sara,girl,13,14,6
Taha,Taha,boy,12,8,19


In [320]:
df.set_index([df['name'], df['gen']])

Unnamed: 0_level_0,Unnamed: 1_level_0,name,gen,Python,C++,Java
name,gen,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ali,boy,Ali,boy,12,20,18
Sara,girl,Sara,girl,13,14,6
Taha,boy,Taha,boy,12,8,19


In [321]:
df.set_index([df['name'], df['gen']]).drop(columns=['name', 'gen'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Python,C++,Java
name,gen,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ali,boy,12,20,18
Sara,girl,13,14,6
Taha,boy,12,8,19


In [322]:
df

Unnamed: 0,name,gen,Python,C++,Java
0,Ali,boy,12,20,18
1,Sara,girl,13,14,6
2,Taha,boy,12,8,19


In [323]:
df.set_index(['name'])

Unnamed: 0_level_0,gen,Python,C++,Java
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ali,boy,12,20,18
Sara,girl,13,14,6
Taha,boy,12,8,19


In [324]:
df.set_index(['name', 'gen'], inplace=True); df

Unnamed: 0_level_0,Unnamed: 1_level_0,Python,C++,Java
name,gen,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ali,boy,12,20,18
Sara,girl,13,14,6
Taha,boy,12,8,19


In [325]:
df.index

MultiIndex([( 'Ali',  'boy'),
            ('Sara', 'girl'),
            ('Taha',  'boy')],
           names=['name', 'gen'])

In [326]:
#---------------

In [327]:
d = {'name': ['ali', 'reza', 'sara', 'taha', 'ali', 'reza', 'sara', 'taha'],
     'term': ['one', 'one', 'one', 'one', 'two', 'two', 'two', 'two'],
     'Java': [12, 16, 15, 17, 17, 13, 11, 19], 
     'C++':  [15, 14, 18, 16, 16, 17, 13, 20],}

In [328]:
df = pd.DataFrame(d); df

Unnamed: 0,name,term,Java,C++
0,ali,one,12,15
1,reza,one,16,14
2,sara,one,15,18
3,taha,one,17,16
4,ali,two,17,16
5,reza,two,13,17
6,sara,two,11,13
7,taha,two,19,20


In [329]:
df.set_index(['term', 'name'], inplace=True); df

Unnamed: 0_level_0,Unnamed: 1_level_0,Java,C++
term,name,Unnamed: 2_level_1,Unnamed: 3_level_1
one,ali,12,15
one,reza,16,14
one,sara,15,18
one,taha,17,16
two,ali,17,16
two,reza,13,17
two,sara,11,13
two,taha,19,20


In [330]:
df.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Java,C++
term,name,Unnamed: 2_level_1,Unnamed: 3_level_1
one,ali,12,15
one,reza,16,14
one,sara,15,18
one,taha,17,16
two,ali,17,16
two,reza,13,17
two,sara,11,13
two,taha,19,20


In [331]:
df.sort_index(level=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Java,C++
term,name,Unnamed: 2_level_1,Unnamed: 3_level_1
one,ali,12,15
two,ali,17,16
one,reza,16,14
two,reza,13,17
one,sara,15,18
two,sara,11,13
one,taha,17,16
two,taha,19,20


In [332]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Java,C++
term,name,Unnamed: 2_level_1,Unnamed: 3_level_1
one,ali,12,15
one,reza,16,14
one,sara,15,18
one,taha,17,16
two,ali,17,16
two,reza,13,17
two,sara,11,13
two,taha,19,20


In [333]:
df.swaplevel()

Unnamed: 0_level_0,Unnamed: 1_level_0,Java,C++
name,term,Unnamed: 2_level_1,Unnamed: 3_level_1
ali,one,12,15
reza,one,16,14
sara,one,15,18
taha,one,17,16
ali,two,17,16
reza,two,13,17
sara,two,11,13
taha,two,19,20


In [334]:
df.swaplevel('term', 'name')

Unnamed: 0_level_0,Unnamed: 1_level_0,Java,C++
name,term,Unnamed: 2_level_1,Unnamed: 3_level_1
ali,one,12,15
reza,one,16,14
sara,one,15,18
taha,one,17,16
ali,two,17,16
reza,two,13,17
sara,two,11,13
taha,two,19,20


In [335]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Java,C++
term,name,Unnamed: 2_level_1,Unnamed: 3_level_1
one,ali,12,15
one,reza,16,14
one,sara,15,18
one,taha,17,16
two,ali,17,16
two,reza,13,17
two,sara,11,13
two,taha,19,20


In [336]:
df.T

term,one,one,one,one,two,two,two,two
name,ali,reza,sara,taha,ali,reza,sara,taha
Java,12,16,15,17,17,13,11,19
C++,15,14,18,16,16,17,13,20


In [337]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Java,C++
term,name,Unnamed: 2_level_1,Unnamed: 3_level_1
one,ali,12,15
one,reza,16,14
one,sara,15,18
one,taha,17,16
two,ali,17,16
two,reza,13,17
two,sara,11,13
two,taha,19,20


In [338]:
df.unstack()

Unnamed: 0_level_0,Java,Java,Java,Java,C++,C++,C++,C++
name,ali,reza,sara,taha,ali,reza,sara,taha
term,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
one,12,16,15,17,15,14,18,16
two,17,13,11,19,16,17,13,20


groupby (by)

In [339]:
mydict = {'City': ['Hamedan', 'Hamedan', 'Hamedan', 'Tehran', 'Tehran', 'Tehran'],
          'Year': [1396, 1397, 1398, 1396, 1397, 1398],
          'Pop':  [9.3, 7, 8, 8, 8.5, 9]}

In [340]:
df = pd.DataFrame(mydict); df

Unnamed: 0,City,Year,Pop
0,Hamedan,1396,9.3
1,Hamedan,1397,7.0
2,Hamedan,1398,8.0
3,Tehran,1396,8.0
4,Tehran,1397,8.5
5,Tehran,1398,9.0


In [341]:
df.set_index(['City', 'Year'], inplace=True); df

Unnamed: 0_level_0,Unnamed: 1_level_0,Pop
City,Year,Unnamed: 2_level_1
Hamedan,1396,9.3
Hamedan,1397,7.0
Hamedan,1398,8.0
Tehran,1396,8.0
Tehran,1397,8.5
Tehran,1398,9.0


In [342]:
df.groupby('City').max()

Unnamed: 0_level_0,Pop
City,Unnamed: 1_level_1
Hamedan,9.3
Tehran,9.0


In [343]:
df.groupby('Year').max()

Unnamed: 0_level_0,Pop
Year,Unnamed: 1_level_1
1396,9.3
1397,8.5
1398,9.0


In [344]:
#---------------

In [345]:
d = {'name': ['ali', 'reza', 'sara', 'taha', 'ali', 'reza', 'sara', 'taha'],
     'term': ['one', 'one', 'one', 'one', 'two', 'two', 'two', 'two'],
     'Java': [12, 16, 15, 17, 17, 13, 11, 19], 
     'C++':  [15, 14, 18, 16, 16, 17, 13, 20],}

In [346]:
df = pd.DataFrame(d); df

Unnamed: 0,name,term,Java,C++
0,ali,one,12,15
1,reza,one,16,14
2,sara,one,15,18
3,taha,one,17,16
4,ali,two,17,16
5,reza,two,13,17
6,sara,two,11,13
7,taha,two,19,20


In [347]:
df.set_index(['term', 'name'], inplace=True); df

Unnamed: 0_level_0,Unnamed: 1_level_0,Java,C++
term,name,Unnamed: 2_level_1,Unnamed: 3_level_1
one,ali,12,15
one,reza,16,14
one,sara,15,18
one,taha,17,16
two,ali,17,16
two,reza,13,17
two,sara,11,13
two,taha,19,20


In [348]:
df.groupby('term').max()

Unnamed: 0_level_0,Java,C++
term,Unnamed: 1_level_1,Unnamed: 2_level_1
one,17,18
two,19,20


In [349]:
df.groupby('name').max()

Unnamed: 0_level_0,Java,C++
name,Unnamed: 1_level_1,Unnamed: 2_level_1
ali,17,16
reza,16,17
sara,15,18
taha,19,20


#### set_axis (labels, axis)

In [350]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}

In [351]:
df = pd.DataFrame(d); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [352]:
# index

In [353]:
df.set_axis(['x', 'y', 'z'])

Unnamed: 0,Python,C++,Java
x,12,20,18
y,13,14,6
z,12,8,19


In [354]:
#df.set_axis(['Ali', 'Sara', 'Taha', 'Omid'])  # ValueError: Length mismatch: Expected axis has 3 elements, new values have 4 elements

In [355]:
s = pd.Series(['Omid', 'Sara', 'Taha'], name='name')
df = df.set_axis(s); df

Unnamed: 0_level_0,Python,C++,Java
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Omid,12,20,18
Sara,13,14,6
Taha,12,8,19


In [356]:
# columns

In [357]:
df

Unnamed: 0_level_0,Python,C++,Java
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Omid,12,20,18
Sara,13,14,6
Taha,12,8,19


In [358]:
df.set_axis(['a', 'b', 'c'], axis=1)

Unnamed: 0_level_0,a,b,c
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Omid,12,20,18
Sara,13,14,6
Taha,12,8,19


In [359]:
s = pd.Series(['a', 'b', 'c'], name='dars')
df.set_axis(s, axis=1)

dars,a,b,c
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Omid,12,20,18
Sara,13,14,6
Taha,12,8,19


MultiIndex

In [360]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}

In [361]:
df = pd.DataFrame(d); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [362]:
# index

In [363]:
i = [['Omid', 'Sara', 'Taha'], ['boy', 'girl', 'boy']]
mi = pd.MultiIndex.from_arrays(i, names=['name', 'gen']); mi
df.set_axis(mi); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [364]:
df = df.set_axis([['Omid', 'Sara', 'Taha'], ['boy', 'girl', 'boy']]); df

Unnamed: 0,Unnamed: 1,Python,C++,Java
Omid,boy,12,20,18
Sara,girl,13,14,6
Taha,boy,12,8,19


In [365]:
df.index.names = ['name', 'gen']; df

Unnamed: 0_level_0,Unnamed: 1_level_0,Python,C++,Java
name,gen,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Omid,boy,12,20,18
Sara,girl,13,14,6
Taha,boy,12,8,19


In [366]:
df.index

MultiIndex([('Omid',  'boy'),
            ('Sara', 'girl'),
            ('Taha',  'boy')],
           names=['name', 'gen'])

In [367]:
# columns

In [368]:
i = [['R', 'C++', 'Java'], ['a', 'b', 'b']]
mi = pd.MultiIndex.from_arrays(i, names=['name', 'gen']); mi
df.set_axis(mi, axis=1); df

Unnamed: 0_level_0,Unnamed: 1_level_0,Python,C++,Java
name,gen,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Omid,boy,12,20,18
Sara,girl,13,14,6
Taha,boy,12,8,19


In [369]:
df = df.set_axis([['R', 'C++', 'Java'], ['a', 'b', 'b']], axis=1); df

Unnamed: 0_level_0,Unnamed: 1_level_0,R,C++,Java
Unnamed: 0_level_1,Unnamed: 1_level_1,a,b,b
name,gen,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Omid,boy,12,20,18
Sara,girl,13,14,6
Taha,boy,12,8,19


In [370]:
df.columns.names = ['dars', 'noe']; df

Unnamed: 0_level_0,dars,R,C++,Java
Unnamed: 0_level_1,noe,a,b,b
name,gen,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Omid,boy,12,20,18
Sara,girl,13,14,6
Taha,boy,12,8,19


In [371]:
df.columns

MultiIndex([(   'R', 'a'),
            ( 'C++', 'b'),
            ('Java', 'b')],
           names=['dars', 'noe'])

In [372]:
df.axes

[MultiIndex([('Omid',  'boy'),
             ('Sara', 'girl'),
             ('Taha',  'boy')],
            names=['name', 'gen']),
 MultiIndex([(   'R', 'a'),
             ( 'C++', 'b'),
             ('Java', 'b')],
            names=['dars', 'noe'])]

In [373]:
df

Unnamed: 0_level_0,dars,R,C++,Java
Unnamed: 0_level_1,noe,a,b,b
name,gen,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Omid,boy,12,20,18
Sara,girl,13,14,6
Taha,boy,12,8,19


In [374]:
df.T

Unnamed: 0_level_0,name,Omid,Sara,Taha
Unnamed: 0_level_1,gen,boy,girl,boy
dars,noe,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
R,a,12,13,12
C++,b,20,14,8
Java,b,18,6,19


In [375]:
df

Unnamed: 0_level_0,dars,R,C++,Java
Unnamed: 0_level_1,noe,a,b,b
name,gen,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Omid,boy,12,20,18
Sara,girl,13,14,6
Taha,boy,12,8,19


In [376]:
df.unstack()

dars,R,R,C++,C++,Java,Java
noe,a,a,b,b,b,b
gen,boy,girl,boy,girl,boy,girl
name,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
Omid,12.0,,20.0,,18.0,
Sara,,13.0,,14.0,,6.0
Taha,12.0,,8.0,,19.0,


tabdile yek sotoon be index

In [377]:
d = {'name':   ['Ali', 'Sara', 'Taha'],
     'Python': [12, 13, 12],
     'C++':    [20, 14, 8],
     'Java':   [18, 6, 19]}

In [378]:
df = pd.DataFrame(d); df

Unnamed: 0,name,Python,C++,Java
0,Ali,12,20,18
1,Sara,13,14,6
2,Taha,12,8,19


In [379]:
df.set_index('name')

Unnamed: 0_level_0,Python,C++,Java
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [380]:
df

Unnamed: 0,name,Python,C++,Java
0,Ali,12,20,18
1,Sara,13,14,6
2,Taha,12,8,19


In [381]:
df = df.set_axis(df['name']);df

Unnamed: 0_level_0,name,Python,C++,Java
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ali,Ali,12,20,18
Sara,Sara,13,14,6
Taha,Taha,12,8,19


In [382]:
df.pop('name');df

Unnamed: 0_level_0,Python,C++,Java
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


tabdile yek satr be column

In [383]:
l = [['Python', 'C++', 'Java'],
     [12, 13, 12],
     [20, 14, 8],
     [18, 6, 19]]

In [384]:
df = pd.DataFrame(l); df

Unnamed: 0,0,1,2
0,Python,C++,Java
1,12,13,12
2,20,14,8
3,18,6,19


In [385]:
df = df.set_axis(df.iloc[0], axis=1); df

Unnamed: 0,Python,C++,Java
0,Python,C++,Java
1,12,13,12
2,20,14,8
3,18,6,19


In [386]:
df.drop(index=0)

Unnamed: 0,Python,C++,Java
1,12,13,12
2,20,14,8
3,18,6,19


..........

In [387]:
name = ['Ali', 'Sara', 'Taha']
Python = [12, 13, 12]
C = [20, 14, 8]
Java = [18, 6, 19]

In [388]:
pd.DataFrame([Python, C, Java], index=name)

Unnamed: 0,0,1,2
Ali,12,13,12
Sara,20,14,8
Taha,18,6,19


In [389]:
df = pd.DataFrame(data=[name, Python, C, Java]); df

Unnamed: 0,0,1,2
0,Ali,Sara,Taha
1,12,13,12
2,20,14,8
3,18,6,19


In [390]:
i = df.loc[0]; i

0     Ali
1    Sara
2    Taha
Name: 0, dtype: object

In [391]:
df.drop(0, inplace=True); df

Unnamed: 0,0,1,2
1,12,13,12
2,20,14,8
3,18,6,19


In [392]:
df = df.set_axis(i.values); df

Unnamed: 0,0,1,2
Ali,12,13,12
Sara,20,14,8
Taha,18,6,19


In [393]:
df = df.set_axis(['Python', 'C++', 'Java'], axis=1); df

Unnamed: 0,Python,C++,Java
Ali,12,13,12
Sara,20,14,8
Taha,18,6,19


###  reset_index()

#### serie (level, name)

In [394]:
s = pd.Series([12, 8, 19, 17], ['ali', 'taha', 'sara', 'negar']); s

ali      12
taha      8
sara     19
negar    17
dtype: int64

In [395]:
s.reset_index()

Unnamed: 0,index,0
0,ali,12
1,taha,8
2,sara,19
3,negar,17


In [396]:
s.reset_index(name='Python')

Unnamed: 0,index,Python
0,ali,12
1,taha,8
2,sara,19
3,negar,17


In [397]:
#---------------

In [398]:
s = pd.Series([12, 8, 19, 17], ['ali', 'taha', 'sara', 'negar'], name='Python'); s

ali      12
taha      8
sara     19
negar    17
Name: Python, dtype: int64

In [399]:
s.reset_index()

Unnamed: 0,index,Python
0,ali,12
1,taha,8
2,sara,19
3,negar,17


In [400]:
#---------------

In [401]:
s = pd.Series([12, 8, 19, 17], ['ali', 'taha', 'sara', 'negar'], name='Python')
s.index.name = 'Name'; s

Name
ali      12
taha      8
sara     19
negar    17
Name: Python, dtype: int64

In [402]:
s.reset_index()

Unnamed: 0,Name,Python
0,ali,12
1,taha,8
2,sara,19
3,negar,17


MultiIndex

In [403]:
s = pd.Series([12, 8, 19, 17], [['ali', 'taha', 'sara', 'negar'], ['b', 'b', 'g', 'g']], name='Python')
s.index.names = ['Name', 'Gen']; s

Name   Gen
ali    b      12
taha   b       8
sara   g      19
negar  g      17
Name: Python, dtype: int64

In [404]:
s.reset_index()

Unnamed: 0,Name,Gen,Python
0,ali,b,12
1,taha,b,8
2,sara,g,19
3,negar,g,17


In [405]:
s.reset_index(level=1)

Unnamed: 0_level_0,Gen,Python
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
ali,b,12
taha,b,8
sara,g,19
negar,g,17


In [406]:
s.reset_index(level=0)

Unnamed: 0_level_0,Name,Python
Gen,Unnamed: 1_level_1,Unnamed: 2_level_1
b,ali,12
b,taha,8
g,sara,19
g,negar,17


In [407]:
s.reset_index(level='Name')

Unnamed: 0_level_0,Name,Python
Gen,Unnamed: 1_level_1,Unnamed: 2_level_1
b,ali,12
b,taha,8
g,sara,19
g,negar,17


#### dataframe (level, drop, name, inplace)

In [408]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}

In [409]:
df = pd.DataFrame(d); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [410]:
df.reset_index()

Unnamed: 0,index,Python,C++,Java
0,Ali,12,20,18
1,Sara,13,14,6
2,Taha,12,8,19


In [411]:
df.reset_index(drop=True)

Unnamed: 0,Python,C++,Java
0,12,20,18
1,13,14,6
2,12,8,19


In [412]:
df.reset_index(names='Name')

Unnamed: 0,Name,Python,C++,Java
0,Ali,12,20,18
1,Sara,13,14,6
2,Taha,12,8,19


In [413]:
#---------------

In [414]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}

In [415]:
df = pd.DataFrame(d); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [416]:
df.reset_index(names='Name', inplace=True)
df.set_index('Name', inplace=True)
df

Unnamed: 0_level_0,Python,C++,Java
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [417]:
df.reset_index(names='Esm', inplace=True)
df

Unnamed: 0,Esm,Python,C++,Java
0,Ali,12,20,18
1,Sara,13,14,6
2,Taha,12,8,19


In [418]:
df.reset_index(names='num')

Unnamed: 0,num,Esm,Python,C++,Java
0,0,Ali,12,20,18
1,1,Sara,13,14,6
2,2,Taha,12,8,19


MultiIndex

In [419]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
i = [['Ali', 'Sara', 'Taha'], ['b', 'g', 'b']]
c = ['Python', 'C++', 'Java']

In [420]:
df = pd.DataFrame(d, i, c); df

Unnamed: 0,Unnamed: 1,Python,C++,Java
Ali,b,12,20,18
Sara,g,13,14,6
Taha,b,12,8,19


In [421]:
df.reset_index()

Unnamed: 0,level_0,level_1,Python,C++,Java
0,Ali,b,12,20,18
1,Sara,g,13,14,6
2,Taha,b,12,8,19


In [422]:
df.reset_index(names=['Name', 'Gen'])

Unnamed: 0,Name,Gen,Python,C++,Java
0,Ali,b,12,20,18
1,Sara,g,13,14,6
2,Taha,b,12,8,19


In [423]:
# level

In [424]:
df.reset_index(level=0)

Unnamed: 0,level_0,Python,C++,Java
b,Ali,12,20,18
g,Sara,13,14,6
b,Taha,12,8,19


In [425]:
df.reset_index(level=0, names='Name')

Unnamed: 0,Name,Python,C++,Java
b,Ali,12,20,18
g,Sara,13,14,6
b,Taha,12,8,19


In [426]:
df.reset_index(level=1)

Unnamed: 0,level_1,Python,C++,Java
Ali,b,12,20,18
Sara,g,13,14,6
Taha,b,12,8,19


In [427]:
#df.reset_index(level=1, names='Gen')      IndexError: list index out of range ???

In [428]:
df.reset_index(level=[0, 1])

Unnamed: 0,level_0,level_1,Python,C++,Java
0,Ali,b,12,20,18
1,Sara,g,13,14,6
2,Taha,b,12,8,19


In [429]:
df.reset_index(level=[0, 1], names=['Name', 'Gen'])

Unnamed: 0,Name,Gen,Python,C++,Java
0,Ali,b,12,20,18
1,Sara,g,13,14,6
2,Taha,b,12,8,19


In [430]:
# drop

In [431]:
df

Unnamed: 0,Unnamed: 1,Python,C++,Java
Ali,b,12,20,18
Sara,g,13,14,6
Taha,b,12,8,19


In [432]:
df.reset_index(drop=True)

Unnamed: 0,Python,C++,Java
0,12,20,18
1,13,14,6
2,12,8,19


In [433]:
df.reset_index(level=0, drop=True)

Unnamed: 0,Python,C++,Java
b,12,20,18
g,13,14,6
b,12,8,19


In [434]:
df.reset_index(level=1, drop=True)

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [435]:
#---------------

In [436]:
df = pd.DataFrame(d, index=pd.MultiIndex.from_arrays(i, names=['Name', 'Gen']), columns=c); df

Unnamed: 0_level_0,Unnamed: 1_level_0,Python,C++,Java
Name,Gen,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ali,b,12,20,18
Sara,g,13,14,6
Taha,b,12,8,19


In [437]:
df.reset_index()

Unnamed: 0,Name,Gen,Python,C++,Java
0,Ali,b,12,20,18
1,Sara,g,13,14,6
2,Taha,b,12,8,19


In [438]:
df.reset_index(level=0)

Unnamed: 0_level_0,Name,Python,C++,Java
Gen,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
b,Ali,12,20,18
g,Sara,13,14,6
b,Taha,12,8,19


In [439]:
df.reset_index(level=1)

Unnamed: 0_level_0,Gen,Python,C++,Java
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ali,b,12,20,18
Sara,g,13,14,6
Taha,b,12,8,19


### reindex(index, columns, labels, axis, method, level, fill_value, limit)

In [440]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}

In [441]:
df = pd.DataFrame(d)

index & columns 

In [442]:
# index

In [443]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [444]:
df.reindex(index=['Sara'])

Unnamed: 0,Python,C++,Java
Sara,13,14,6


In [445]:
df.reindex(index=['Sara', 'Ali'])

Unnamed: 0,Python,C++,Java
Sara,13,14,6
Ali,12,20,18


In [446]:
df.reindex(index=['Sara', 'Ali', 'Taha'])

Unnamed: 0,Python,C++,Java
Sara,13,14,6
Ali,12,20,18
Taha,12,8,19


In [447]:
df.loc[['Sara', 'Ali', 'Taha'], :]

Unnamed: 0,Python,C++,Java
Sara,13,14,6
Ali,12,20,18
Taha,12,8,19


In [448]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [449]:
df.reindex(index=['Omid', 'Sara', 'Taha'])

Unnamed: 0,Python,C++,Java
Omid,,,
Sara,13.0,14.0,6.0
Taha,12.0,8.0,19.0


In [450]:
df.reindex(index=['Omid', 'Sara', 'Taha'], fill_value=0)

Unnamed: 0,Python,C++,Java
Omid,0,0,0
Sara,13,14,6
Taha,12,8,19


In [451]:
df.reindex(index=['Omid', 'Taha', 'Ali', 'Sara'], fill_value=0)

Unnamed: 0,Python,C++,Java
Omid,0,0,0
Taha,12,8,19
Ali,12,20,18
Sara,13,14,6


In [452]:
df.reindex(index=['Ali', 'Sara', 'Taha', 'Omid', 'Ali'], fill_value=0)

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Omid,0,0,0
Ali,12,20,18


In [453]:
# columns

In [454]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [455]:
df.reindex(columns=['Java','Python'])

Unnamed: 0,Java,Python
Ali,18,12
Sara,6,13
Taha,19,12


In [456]:
df.reindex(columns=['Java', 'Python', 'C++', 'Python'])

Unnamed: 0,Java,Python,C++,Python.1
Ali,18,12,20,12
Sara,6,13,14,13
Taha,19,12,8,12


In [457]:
df.loc[:, ['Java', 'Python', 'C++', 'Python']]

Unnamed: 0,Java,Python,C++,Python.1
Ali,18,12,20,12
Sara,6,13,14,13
Taha,19,12,8,12


In [458]:
df[['Java', 'Python', 'C++', 'Python']]

Unnamed: 0,Java,Python,C++,Python.1
Ali,18,12,20,12
Sara,6,13,14,13
Taha,19,12,8,12


In [459]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [460]:
df.reindex(columns=['Python', 'R'], fill_value=0)

Unnamed: 0,Python,R
Ali,12,0
Sara,13,0
Taha,12,0


In [461]:
df.reindex(columns=['Python', 'R', 'C++', 'Java'], fill_value=0)

Unnamed: 0,Python,R,C++,Java
Ali,12,0,20,18
Sara,13,0,14,6
Taha,12,0,8,19


In [462]:
# tarkibi

In [463]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [464]:
df.reindex(index=['Taha', 'Ali', 'Sara'], columns=['C++', 'Java', 'Python'])

Unnamed: 0,C++,Java,Python
Taha,8,19,12
Ali,20,18,12
Sara,14,6,13


In [465]:
df.reindex(index=['Taha', 'Ali', 'Omid', 'Sara'], columns=['Python', 'C++', 'R', 'Java'], fill_value=0)

Unnamed: 0,Python,C++,R,Java
Taha,12,8,0,19
Ali,12,20,0,18
Omid,0,0,0,0
Sara,13,14,0,6


labels & axis 

In [466]:
# ('index', 'columns') or (0, 1)

In [467]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [468]:
df.reindex(labels=['Omid', 'Sara', 'Taha'], axis=0)

Unnamed: 0,Python,C++,Java
Omid,,,
Sara,13.0,14.0,6.0
Taha,12.0,8.0,19.0


In [469]:
df.reindex(labels=['Omid', 'Sara', 'Taha'], axis='index')

Unnamed: 0,Python,C++,Java
Omid,,,
Sara,13.0,14.0,6.0
Taha,12.0,8.0,19.0


In [470]:
df.reindex(labels=['Python', 'R', 'C++', 'Java'], axis=1)

Unnamed: 0,Python,R,C++,Java
Ali,12,,20,18
Sara,13,,14,6
Taha,12,,8,19


In [471]:
df.reindex(labels=['Python', 'R', 'C++', 'Java'], axis='columns', fill_value=0)

Unnamed: 0,Python,R,C++,Java
Ali,12,0,20,18
Sara,13,0,14,6
Taha,12,0,8,19


level

In [472]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [473]:
df.reindex(index=['Ali'])

Unnamed: 0,Python,C++,Java
Ali,12,20,18


In [474]:
df.reindex(index=[['Ali'], ['b']])

Unnamed: 0,Unnamed: 1,Python,C++,Java
Ali,b,,,


In [475]:
df.reindex(index=[['Ali'], ['b']], level=0)

Unnamed: 0,Unnamed: 1,Python,C++,Java
Ali,b,12,20,18


In [476]:
df.reindex(index=[['b'], ['Ali']], level=1)

Unnamed: 0,Unnamed: 1,Python,C++,Java
b,Ali,12,20,18


In [477]:
df.reindex(index=[['Sara', 'Ali', 'Taha'], ['g', 'b', 'b']], level=0)

Unnamed: 0,Unnamed: 1,Python,C++,Java
Sara,g,13,14,6
Ali,b,12,20,18
Taha,b,12,8,19


In [478]:
df.reindex(index=[['Sara', 'Ali', 'Omid'], ['g', 'b', 'b']], level=0)

Unnamed: 0,Unnamed: 1,Python,C++,Java
Sara,g,13.0,14.0,6.0
Ali,b,12.0,20.0,18.0
Omid,b,,,


In [479]:
#---------------

In [480]:
df1 = df.reindex(index=[['Ali', 'Sara', 'Taha'], ['A', 'S', 'T']], level=0)
df1 = df1.reindex(columns=[['p', 'c', 'j'], ['Python', 'C++', 'Java']], level=1)

In [481]:
df1

Unnamed: 0_level_0,Unnamed: 1_level_0,p,c,j
Unnamed: 0_level_1,Unnamed: 1_level_1,Python,C++,Java
Ali,A,12,20,18
Sara,S,13,14,6
Taha,T,12,8,19


In [482]:
df1.reindex(index=['Sara', 'Ali', 'Taha'])

Unnamed: 0_level_0,p,c,j
Unnamed: 0_level_1,Python,C++,Java
Sara,,,
Ali,,,
Taha,,,


In [483]:
df1.reindex(index=['Sara', 'Ali', 'Taha'], level=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,p,c,j
Unnamed: 0_level_1,Unnamed: 1_level_1,Python,C++,Java
Sara,S,13,14,6
Ali,A,12,20,18
Taha,T,12,8,19


In [484]:
df1.reindex(columns=['j', 'p', 'c'], level=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,j,p,c
Unnamed: 0_level_1,Unnamed: 1_level_1,Java,Python,C++
Ali,A,18,12,20
Sara,S,6,13,14
Taha,T,19,12,8


method & limit

In [485]:
i1 = pd.date_range('1/1/2010', periods=8, freq='D')
i2 = pd.date_range('12/29/2009', periods=13, freq='D')
df2 = pd.DataFrame(data={"price": [100, 101, 95, 100, 89, NaN, 92, 88]}, index=i1)
display(i1, i2, df2)

DatetimeIndex(['2010-01-01', '2010-01-02', '2010-01-03', '2010-01-04',
               '2010-01-05', '2010-01-06', '2010-01-07', '2010-01-08'],
              dtype='datetime64[ns]', freq='D')

DatetimeIndex(['2009-12-29', '2009-12-30', '2009-12-31', '2010-01-01',
               '2010-01-02', '2010-01-03', '2010-01-04', '2010-01-05',
               '2010-01-06', '2010-01-07', '2010-01-08', '2010-01-09',
               '2010-01-10'],
              dtype='datetime64[ns]', freq='D')

Unnamed: 0,price
2010-01-01,100.0
2010-01-02,101.0
2010-01-03,95.0
2010-01-04,100.0
2010-01-05,89.0
2010-01-06,
2010-01-07,92.0
2010-01-08,88.0


In [486]:
df2 = df2.drop('2010-01-04'); df2

Unnamed: 0,price
2010-01-01,100.0
2010-01-02,101.0
2010-01-03,95.0
2010-01-05,89.0
2010-01-06,
2010-01-07,92.0
2010-01-08,88.0


In [487]:
df2.reindex(index=i2)

Unnamed: 0,price
2009-12-29,
2009-12-30,
2009-12-31,
2010-01-01,100.0
2010-01-02,101.0
2010-01-03,95.0
2010-01-04,
2010-01-05,89.0
2010-01-06,
2010-01-07,92.0


In [488]:
df2.reindex(index=i2, fill_value=0)

Unnamed: 0,price
2009-12-29,0.0
2009-12-30,0.0
2009-12-31,0.0
2010-01-01,100.0
2010-01-02,101.0
2010-01-03,95.0
2010-01-04,0.0
2010-01-05,89.0
2010-01-06,
2010-01-07,92.0


In [489]:
df2.reindex(index=i2, method='nearest')

Unnamed: 0,price
2009-12-29,100.0
2009-12-30,100.0
2009-12-31,100.0
2010-01-01,100.0
2010-01-02,101.0
2010-01-03,95.0
2010-01-04,89.0
2010-01-05,89.0
2010-01-06,
2010-01-07,92.0


In [490]:
df2.reindex(index=i2, method='ffill')

Unnamed: 0,price
2009-12-29,
2009-12-30,
2009-12-31,
2010-01-01,100.0
2010-01-02,101.0
2010-01-03,95.0
2010-01-04,95.0
2010-01-05,89.0
2010-01-06,
2010-01-07,92.0


In [491]:
df2.reindex(index=i2, method='bfill')

Unnamed: 0,price
2009-12-29,100.0
2009-12-30,100.0
2009-12-31,100.0
2010-01-01,100.0
2010-01-02,101.0
2010-01-03,95.0
2010-01-04,89.0
2010-01-05,89.0
2010-01-06,
2010-01-07,92.0


In [492]:
df2.reindex(index=i2, method='bfill', limit=1)

Unnamed: 0,price
2009-12-29,
2009-12-30,
2009-12-31,100.0
2010-01-01,100.0
2010-01-02,101.0
2010-01-03,95.0
2010-01-04,89.0
2010-01-05,89.0
2010-01-06,
2010-01-07,92.0


In [493]:
df2.reindex(index=i2, method='bfill', limit=2)

Unnamed: 0,price
2009-12-29,
2009-12-30,100.0
2009-12-31,100.0
2010-01-01,100.0
2010-01-02,101.0
2010-01-03,95.0
2010-01-04,89.0
2010-01-05,89.0
2010-01-06,
2010-01-07,92.0


### take (indices, axis)

In [494]:
df = pd.DataFrame(np.arange(1, 31).reshape((6, 5))); df

Unnamed: 0,0,1,2,3,4
0,1,2,3,4,5
1,6,7,8,9,10
2,11,12,13,14,15
3,16,17,18,19,20
4,21,22,23,24,25
5,26,27,28,29,30


In [495]:
df.reindex([5, 4, 3, 2, 1, 0])

Unnamed: 0,0,1,2,3,4
5,26,27,28,29,30
4,21,22,23,24,25
3,16,17,18,19,20
2,11,12,13,14,15
1,6,7,8,9,10
0,1,2,3,4,5


In [496]:
df.reindex([4, 3, 2, 1, 0], axis=1)

Unnamed: 0,4,3,2,1,0
0,5,4,3,2,1
1,10,9,8,7,6
2,15,14,13,12,11
3,20,19,18,17,16
4,25,24,23,22,21
5,30,29,28,27,26


In [497]:
#---------------

In [498]:
df.take([5, 4, 3, 2, 1, 0])

Unnamed: 0,0,1,2,3,4
5,26,27,28,29,30
4,21,22,23,24,25
3,16,17,18,19,20
2,11,12,13,14,15
1,6,7,8,9,10
0,1,2,3,4,5


In [499]:
df.take([4, 3, 2, 1, 0], axis=1)

Unnamed: 0,4,3,2,1,0
0,5,4,3,2,1
1,10,9,8,7,6
2,15,14,13,12,11
3,20,19,18,17,16
4,25,24,23,22,21
5,30,29,28,27,26


In [500]:
df.take(np.random.permutation(6))

Unnamed: 0,0,1,2,3,4
0,1,2,3,4,5
2,11,12,13,14,15
5,26,27,28,29,30
3,16,17,18,19,20
1,6,7,8,9,10
4,21,22,23,24,25


In [501]:
df.take(np.random.permutation(5), axis=1)

Unnamed: 0,1,3,4,2,0
0,2,4,5,3,1
1,7,9,10,8,6
2,12,14,15,13,11
3,17,19,20,18,16
4,22,24,25,23,21
5,27,29,30,28,26


In [502]:
#---------------

In [503]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}
df = pd.DataFrame(d); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [504]:
#df.take(['Sara', 'Ali', 'Taha'])       ValueError: invalid literal for int() with base 10: 'Sara'

In [505]:
df.take([2, 1, 0])

Unnamed: 0,Python,C++,Java
Taha,12,8,19
Sara,13,14,6
Ali,12,20,18


In [506]:
df.take([2, 1, 0], axis=1)

Unnamed: 0,Java,C++,Python
Ali,18,20,12
Sara,6,14,13
Taha,19,8,12


### rename(index, columns, mapper, axis, inplace, level)

In [507]:
df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})

In [508]:
# index

In [509]:
df

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [510]:
df.rename(index={0: -1})

Unnamed: 0,A,B
-1,1,4
1,2,5
2,3,6


In [511]:
df.rename(index={0: "x"})

Unnamed: 0,A,B
x,1,4
1,2,5
2,3,6


In [512]:
df.rename(index={0: "x", 1: "y", 2: "z"})

Unnamed: 0,A,B
x,1,4
y,2,5
z,3,6


In [513]:
df.rename(index=np.square)

Unnamed: 0,A,B
0,1,4
1,2,5
4,3,6


In [514]:
df.rename(index=float)

Unnamed: 0,A,B
0.0,1,4
1.0,2,5
2.0,3,6


In [515]:
l = lambda x: x+10
df.rename(index=l)

Unnamed: 0,A,B
10,1,4
11,2,5
12,3,6


In [516]:
# columns

In [517]:
df

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [518]:
df.rename(columns={"B": 2})

Unnamed: 0,A,2
0,1,4
1,2,5
2,3,6


In [519]:
df.rename(columns={"B": 'E'})

Unnamed: 0,A,E
0,1,4
1,2,5
2,3,6


In [520]:
df.rename(columns={"A": "a", "B": "c"})

Unnamed: 0,a,c
0,1,4
1,2,5
2,3,6


In [521]:
df.rename(columns=str.lower)

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6


In [522]:
l = lambda x: '|' + x + '|'
df.rename(columns=l)

Unnamed: 0,|A|,|B|
0,1,4
1,2,5
2,3,6


In [523]:
# tarkibi

In [524]:
df

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [525]:
df.rename(index={0: "x"}, columns={"B": 2})

Unnamed: 0,A,2
x,1,4
1,2,5
2,3,6


In [526]:
df.rename(index={0: "x", 1: "y", 2: "z"}, columns=str.lower)

Unnamed: 0,a,b
x,1,4
y,2,5
z,3,6


MultiIndex

In [527]:
d = [[1, 2], [3, 4], [5, 6]]
i = [[0, 1, 2], ['x', 'y', 'z']]
c = [['A', 'B'], ['p', 'q']]

In [528]:
df = pd.DataFrame(d, i, c)

In [529]:
# index

In [530]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Unnamed: 0_level_1,Unnamed: 1_level_1,p,q
0,x,1,2
1,y,3,4
2,z,5,6


In [531]:
df.rename(index={0: -1})

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Unnamed: 0_level_1,Unnamed: 1_level_1,p,q
-1,x,1,2
1,y,3,4
2,z,5,6


In [532]:
df.rename(index={'y': 'o'})

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Unnamed: 0_level_1,Unnamed: 1_level_1,p,q
0,x,1,2
1,o,3,4
2,z,5,6


In [533]:
df.rename(index={0: -1, 'y': 'o'})

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Unnamed: 0_level_1,Unnamed: 1_level_1,p,q
-1,x,1,2
1,o,3,4
2,z,5,6


In [534]:
#df.rename(index=np.square)     TypeError

In [535]:
df.rename(index=np.square, level=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Unnamed: 0_level_1,Unnamed: 1_level_1,p,q
0,x,1,2
1,y,3,4
4,z,5,6


In [536]:
# columns

In [537]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Unnamed: 0_level_1,Unnamed: 1_level_1,p,q
0,x,1,2
1,y,3,4
2,z,5,6


In [538]:
df.rename(columns={"B": 2})

Unnamed: 0_level_0,Unnamed: 1_level_0,A,2
Unnamed: 0_level_1,Unnamed: 1_level_1,p,q
0,x,1,2
1,y,3,4
2,z,5,6


In [539]:
df.rename(columns={"A": "a", "B": "c"})

Unnamed: 0_level_0,Unnamed: 1_level_0,a,c
Unnamed: 0_level_1,Unnamed: 1_level_1,p,q
0,x,1,2
1,y,3,4
2,z,5,6


In [540]:
l = lambda x: '|' + x + '|'
df.rename(columns=l)

Unnamed: 0_level_0,Unnamed: 1_level_0,|A|,|B|
Unnamed: 0_level_1,Unnamed: 1_level_1,|p|,|q|
0,x,1,2
1,y,3,4
2,z,5,6


In [541]:
l = lambda x: '|' + x + '|'
df.rename(columns=l, level=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,|A|,|B|
Unnamed: 0_level_1,Unnamed: 1_level_1,p,q
0,x,1,2
1,y,3,4
2,z,5,6


In [542]:
l = lambda x: '|' + x + '|'
df.rename(columns=l, level=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Unnamed: 0_level_1,Unnamed: 1_level_1,|p|,|q|
0,x,1,2
1,y,3,4
2,z,5,6


### rename_axis (index, columns, mapper, axis, inplace)

In [543]:
df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}); df

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [544]:
df.rename_axis(index='num')

Unnamed: 0_level_0,A,B
num,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1,4
1,2,5
2,3,6


In [545]:
df.rename_axis(columns='name')

name,A,B
0,1,4
1,2,5
2,3,6


In [546]:
df.rename_axis(index='num', columns='name')

name,A,B
num,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1,4
1,2,5
2,3,6


In [547]:
#---------------

In [548]:
df.rename_axis(index='num', columns='name', inplace=True); df

name,A,B
num,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1,4
1,2,5
2,3,6


In [549]:
df.rename_axis(index='num.')

name,A,B
num.,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1,4
1,2,5
2,3,6


In [550]:
df.rename_axis(index=str.upper)

name,A,B
NUM,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1,4
1,2,5
2,3,6


In [551]:
df.rename_axis(columns='name.')

name.,A,B
num,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1,4
1,2,5
2,3,6


In [552]:
df.rename_axis(columns=str.upper)

NAME,A,B
num,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1,4
1,2,5
2,3,6


In [553]:
df.rename_axis(index=str.upper, columns='_name_')

_name_,A,B
NUM,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1,4
1,2,5
2,3,6


MultiIndex

In [554]:
d = [[1, 2], [3, 4], [5, 6]]
i = [[0, 1, 2], ['x', 'y', 'z']]
c = [['A', 'B'], ['p', 'q']]

In [555]:
df = pd.DataFrame(d, i, c); df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Unnamed: 0_level_1,Unnamed: 1_level_1,p,q
0,x,1,2
1,y,3,4
2,z,5,6


In [556]:
df.rename_axis(index=['num', 'harf'])

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Unnamed: 0_level_1,Unnamed: 1_level_1,p,q
num,harf,Unnamed: 2_level_2,Unnamed: 3_level_2
0,x,1,2
1,y,3,4
2,z,5,6


In [557]:
df.rename_axis(columns=['name', 'mod'])

Unnamed: 0_level_0,name,A,B
Unnamed: 0_level_1,mod,p,q
0,x,1,2
1,y,3,4
2,z,5,6


In [558]:
df.rename_axis(index=['num', 'harf'], columns=['name', 'mod'])

Unnamed: 0_level_0,name,A,B
Unnamed: 0_level_1,mod,p,q
num,harf,Unnamed: 2_level_2,Unnamed: 3_level_2
0,x,1,2
1,y,3,4
2,z,5,6


In [559]:
#---------------

In [560]:
df.rename_axis(index=['num', 'harf'], columns=['name', 'mod'], inplace=True); df

Unnamed: 0_level_0,name,A,B
Unnamed: 0_level_1,mod,p,q
num,harf,Unnamed: 2_level_2,Unnamed: 3_level_2
0,x,1,2
1,y,3,4
2,z,5,6


In [561]:
df.rename_axis(index={'harf': 'no'})

Unnamed: 0_level_0,name,A,B
Unnamed: 0_level_1,mod,p,q
num,no,Unnamed: 2_level_2,Unnamed: 3_level_2
0,x,1,2
1,y,3,4
2,z,5,6


In [562]:
df.rename_axis(columns={'mod': 'mo'})

Unnamed: 0_level_0,name,A,B
Unnamed: 0_level_1,mo,p,q
num,harf,Unnamed: 2_level_2,Unnamed: 3_level_2
0,x,1,2
1,y,3,4
2,z,5,6


In [563]:
df.rename_axis(index=str.upper, columns={'mod': 'mo'})

Unnamed: 0_level_0,name,A,B
Unnamed: 0_level_1,mo,p,q
NUM,HARF,Unnamed: 2_level_2,Unnamed: 3_level_2
0,x,1,2
1,y,3,4
2,z,5,6


### sort_index (axis, level, ascending, inplace, na_position, sort_remaining, ignore_index, key)

In [564]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
dars = ['Python', 'C++', 'Java']

In [565]:
df = pd.DataFrame(d, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [566]:
df.sort_index()

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Mahsa,20,16,9
Sara,13,14,6
Taha,12,8,19


In [567]:
df.sort_index(axis=1)

Unnamed: 0,C++,Java,Python
Ali,20,18,12
Sara,14,6,13
Taha,8,19,12
Mahsa,16,9,20


### add_...fix (...fix, axis)

In [568]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
df = pd.DataFrame(d); df

Unnamed: 0,0,1,2
0,12,20,18
1,13,14,6
2,12,8,19
3,20,16,9


In [569]:
df.add_prefix('item_')

Unnamed: 0,item_0,item_1,item_2
0,12,20,18
1,13,14,6
2,12,8,19
3,20,16,9


In [570]:
df.add_suffix('_item')

Unnamed: 0,0_item,1_item,2_item
0,12,20,18
1,13,14,6
2,12,8,19
3,20,16,9


In [571]:
df.add_prefix('item_', axis=0)

Unnamed: 0,0,1,2
item_0,12,20,18
item_1,13,14,6
item_2,12,8,19
item_3,20,16,9


In [572]:
df.add_suffix('_item', axis=0)

Unnamed: 0,0,1,2
0_item,12,20,18
1_item,13,14,6
2_item,12,8,19
3_item,20,16,9


### delete

drop (labels, axis, index, columns, inplace)

In [573]:
# with labels & axis

In [574]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12, 'Mahsa': 20},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8,  'Mahsa': 16},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19, 'Mahsa': 9}}

In [575]:
df = pd.DataFrame(d); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [576]:
df.drop(['Ali'])

Unnamed: 0,Python,C++,Java
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [577]:
df.drop(['Ali', 'Sara'])

Unnamed: 0,Python,C++,Java
Taha,12,8,19
Mahsa,20,16,9


In [578]:
df.drop(['Ali', 'Sara'], axis=0)

Unnamed: 0,Python,C++,Java
Taha,12,8,19
Mahsa,20,16,9


In [579]:
df.drop(['Java'], axis=1)

Unnamed: 0,Python,C++
Ali,12,20
Sara,13,14
Taha,12,8
Mahsa,20,16


In [580]:
df.drop(['Java', 'Python'], axis=1)

Unnamed: 0,C++
Ali,20
Sara,14
Taha,8
Mahsa,16


In [581]:
# with index & columns

In [582]:
df.drop(index=['Ali'])

Unnamed: 0,Python,C++,Java
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [583]:
df.drop(index=['Ali', 'Sara'])

Unnamed: 0,Python,C++,Java
Taha,12,8,19
Mahsa,20,16,9


In [584]:
df.drop(columns=['Python'])

Unnamed: 0,C++,Java
Ali,20,18
Sara,14,6
Taha,8,19
Mahsa,16,9


In [585]:
df.drop(columns=['Java', 'Python'])

Unnamed: 0,C++
Ali,20
Sara,14
Taha,8
Mahsa,16


In [586]:
df.drop(index=['Ali'], columns=['Python'])

Unnamed: 0,C++,Java
Sara,14,6
Taha,8,19
Mahsa,16,9


In [587]:
df.drop(index=['Ali', 'Taha'], columns=['C++'])

Unnamed: 0,Python,Java
Sara,13,6
Mahsa,20,9


In [588]:
#---------------

In [589]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [590]:
df['Java'] < 10

Ali      False
Sara      True
Taha     False
Mahsa     True
Name: Java, dtype: bool

In [591]:
df[df['Java'] < 10]

Unnamed: 0,Python,C++,Java
Sara,13,14,6
Mahsa,20,16,9


In [592]:
df[df['Java'] < 10].index

Index(['Sara', 'Mahsa'], dtype='object')

In [593]:
df.drop(df[df['Java'] < 10].index)

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Taha,12,8,19


In [594]:
for i in df.index:
    if df.loc[i, 'Java'] < 10:
        df.drop(i,inplace=True)
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Taha,12,8,19


pop (item)

In [595]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12, 'Mahsa': 20},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8,  'Mahsa': 16},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19, 'Mahsa': 9}}

In [596]:
df = pd.DataFrame(d); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [597]:
df.pop('Java')

Ali      18
Sara      6
Taha     19
Mahsa     9
Name: Java, dtype: int64

In [598]:
df

Unnamed: 0,Python,C++
Ali,12,20
Sara,13,14
Taha,12,8
Mahsa,20,16


In [599]:
# pop columns

In [600]:
# df.pop('Ali')      KeyError: 'Ali'

df.T.pop('Ali')

Python    12
C++       20
Name: Ali, dtype: int64

In [601]:
df

Unnamed: 0,Python,C++
Ali,12,20
Sara,13,14
Taha,12,8
Mahsa,20,16


In [602]:
df = df.T
df

Unnamed: 0,Ali,Sara,Taha,Mahsa
Python,12,13,12,20
C++,20,14,8,16


In [603]:
df.pop('Ali')

Python    12
C++       20
Name: Ali, dtype: int64

In [604]:
df

Unnamed: 0,Sara,Taha,Mahsa
Python,13,12,20
C++,14,8,16


In [605]:
df = df.T
df

Unnamed: 0,Python,C++
Sara,13,14
Taha,12,8
Mahsa,20,16


## manage value

### NaN

#### is & not ()

In [606]:
d = {'Python': {'Ali': 12,  'Sara': NaN, 'Taha': 12,  'Mahsa': NaN, 'Negar': 11},
     'C++':    {'Ali': NaN, 'Sara': 19,  'Taha': 8,   'Mahsa': NaN, 'Negar': 15},
     'Java':   {'Ali': NaN, 'Sara': NaN, 'Taha': NaN, 'Mahsa': NaN, 'Negar': NaN},
     'R':      {'Ali': 18,  'Sara': 6,   'Taha': 19,  'Mahsa': 15,  'Negar': 18}}

In [607]:
df = pd.DataFrame(d); df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,,19.0,,6
Taha,12.0,8.0,,19
Mahsa,,,,15
Negar,11.0,15.0,,18


In [608]:
df.isna()

Unnamed: 0,Python,C++,Java,R
Ali,False,True,True,False
Sara,True,False,True,False
Taha,False,False,True,False
Mahsa,True,True,True,False
Negar,False,False,True,False


In [609]:
df.isnull()

Unnamed: 0,Python,C++,Java,R
Ali,False,True,True,False
Sara,True,False,True,False
Taha,False,False,True,False
Mahsa,True,True,True,False
Negar,False,False,True,False


In [610]:
df.notna()

Unnamed: 0,Python,C++,Java,R
Ali,True,False,False,True
Sara,False,True,False,True
Taha,True,True,False,True
Mahsa,False,False,False,True
Negar,True,True,False,True


In [611]:
df.notnull()

Unnamed: 0,Python,C++,Java,R
Ali,True,False,False,True
Sara,False,True,False,True
Taha,True,True,False,True
Mahsa,False,False,False,True
Negar,True,True,False,True


In [612]:
#---------------

In [613]:
df.notnull().sum(axis=0)

Python    3
C++       3
Java      0
R         5
dtype: int64

In [614]:
df.notnull().sum(axis=1)

Ali      2
Sara     2
Taha     3
Mahsa    1
Negar    3
dtype: int64

#### dropna (axis, how, thresh, subset, inplace, ignore_index)

In [615]:
d = {'Python': {'Ali': 12,  'Sara': NaN, 'Taha': 12,  'Mahsa': NaN, 'Negar': 11},
     'C++':    {'Ali': NaN, 'Sara': 19,  'Taha': 8,   'Mahsa': NaN, 'Negar': 15},
     'Java':   {'Ali': NaN, 'Sara': NaN, 'Taha': NaN, 'Mahsa': NaN, 'Negar': NaN},
     'R':      {'Ali': 18,  'Sara': 6,   'Taha': 19,  'Mahsa': 15,  'Negar': 18}}

In [616]:
df = pd.DataFrame(d); df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,,19.0,,6
Taha,12.0,8.0,,19
Mahsa,,,,15
Negar,11.0,15.0,,18


In [617]:
df.dropna()

Unnamed: 0,Python,C++,Java,R


In [618]:
df.dropna(axis=1)

Unnamed: 0,R
Ali,18
Sara,6
Taha,19
Mahsa,15
Negar,18


In [619]:
# how

In [620]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,,19.0,,6
Taha,12.0,8.0,,19
Mahsa,,,,15
Negar,11.0,15.0,,18


In [621]:
df.dropna(how='any')

Unnamed: 0,Python,C++,Java,R


In [622]:
df.dropna(how='all')

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,,19.0,,6
Taha,12.0,8.0,,19
Mahsa,,,,15
Negar,11.0,15.0,,18


In [623]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,,19.0,,6
Taha,12.0,8.0,,19
Mahsa,,,,15
Negar,11.0,15.0,,18


In [624]:
df.dropna(how='any', axis=1)

Unnamed: 0,R
Ali,18
Sara,6
Taha,19
Mahsa,15
Negar,18


In [625]:
df.dropna(how='all', axis=1)

Unnamed: 0,Python,C++,R
Ali,12.0,,18
Sara,,19.0,6
Taha,12.0,8.0,19
Mahsa,,,15
Negar,11.0,15.0,18


In [626]:
# thresh

In [627]:
# agar adade haghighi mojood dar yek satr ya sotoon kamtar az in meghdar bood, satr ya sotoon hazf mishavad.

In [628]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,,19.0,,6
Taha,12.0,8.0,,19
Mahsa,,,,15
Negar,11.0,15.0,,18


In [629]:
df.dropna(thresh=1)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,,19.0,,6
Taha,12.0,8.0,,19
Mahsa,,,,15
Negar,11.0,15.0,,18


In [630]:
df.dropna(thresh=2)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,,19.0,,6
Taha,12.0,8.0,,19
Negar,11.0,15.0,,18


In [631]:
df.dropna(thresh=3) 

Unnamed: 0,Python,C++,Java,R
Taha,12.0,8.0,,19
Negar,11.0,15.0,,18


In [632]:
df.dropna(thresh=4) 

Unnamed: 0,Python,C++,Java,R


In [633]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,,19.0,,6
Taha,12.0,8.0,,19
Mahsa,,,,15
Negar,11.0,15.0,,18


In [634]:
df.dropna(thresh=1, axis=1)

Unnamed: 0,Python,C++,R
Ali,12.0,,18
Sara,,19.0,6
Taha,12.0,8.0,19
Mahsa,,,15
Negar,11.0,15.0,18


In [635]:
df.dropna(thresh=4, axis=1)

Unnamed: 0,R
Ali,18
Sara,6
Taha,19
Mahsa,15
Negar,18


In [636]:
df.dropna(thresh=6, axis=1)

Ali
Sara
Taha
Mahsa
Negar


In [637]:
# subset

In [638]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,,19.0,,6
Taha,12.0,8.0,,19
Mahsa,,,,15
Negar,11.0,15.0,,18


In [639]:
df.dropna(subset='C++')

Unnamed: 0,Python,C++,Java,R
Sara,,19.0,,6
Taha,12.0,8.0,,19
Negar,11.0,15.0,,18


In [640]:
df.dropna(subset='Java')

Unnamed: 0,Python,C++,Java,R


In [641]:
df.dropna(subset='R')

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,,19.0,,6
Taha,12.0,8.0,,19
Mahsa,,,,15
Negar,11.0,15.0,,18


In [642]:
df.dropna(subset=['Python', 'C++'])

Unnamed: 0,Python,C++,Java,R
Taha,12.0,8.0,,19
Negar,11.0,15.0,,18


In [643]:
df.dropna(subset=['Python', 'C++'], how='all')

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,,19.0,,6
Taha,12.0,8.0,,19
Negar,11.0,15.0,,18


In [644]:
df.dropna(subset=['Python', 'Java'], how='all')

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Taha,12.0,8.0,,19
Negar,11.0,15.0,,18


In [645]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,,19.0,,6
Taha,12.0,8.0,,19
Mahsa,,,,15
Negar,11.0,15.0,,18


In [646]:
df.dropna(subset='Ali', axis=1)

Unnamed: 0,Python,R
Ali,12.0,18
Sara,,6
Taha,12.0,19
Mahsa,,15
Negar,11.0,18


In [647]:
df.dropna(subset='Mahsa', axis=1)

Unnamed: 0,R
Ali,18
Sara,6
Taha,19
Mahsa,15
Negar,18


In [648]:
df.dropna(subset=['Ali', 'Sara'], axis=1)

Unnamed: 0,R
Ali,18
Sara,6
Taha,19
Mahsa,15
Negar,18


In [649]:
df.dropna(subset=['Ali', 'Sara'], axis=1, how='all')

Unnamed: 0,Python,C++,R
Ali,12.0,,18
Sara,,19.0,6
Taha,12.0,8.0,19
Mahsa,,,15
Negar,11.0,15.0,18


..........

In [650]:
df.loc['Taha', 'Java'] = 12
df.loc['Mahsa', 'R'] = NaN

In [651]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,12.0,19.0
Mahsa,,,,
Negar,11.0,15.0,,18.0


In [652]:
df.dropna()

Unnamed: 0,Python,C++,Java,R
Taha,12.0,8.0,12.0,19.0


In [653]:
df.dropna(axis=1)

Ali
Sara
Taha
Mahsa
Negar


In [654]:
# how

In [655]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,12.0,19.0
Mahsa,,,,
Negar,11.0,15.0,,18.0


In [656]:
df.dropna(how='any')

Unnamed: 0,Python,C++,Java,R
Taha,12.0,8.0,12.0,19.0


In [657]:
df.dropna(how='any', axis=1)

Ali
Sara
Taha
Mahsa
Negar


In [658]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,12.0,19.0
Mahsa,,,,
Negar,11.0,15.0,,18.0


In [659]:
df.dropna(how='all')

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,12.0,19.0
Negar,11.0,15.0,,18.0


In [660]:
df.dropna(how='all', axis=1)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,12.0,19.0
Mahsa,,,,
Negar,11.0,15.0,,18.0


In [661]:
# subset

In [662]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,12.0,19.0
Mahsa,,,,
Negar,11.0,15.0,,18.0


In [663]:
df.dropna(subset='Java')

Unnamed: 0,Python,C++,Java,R
Taha,12.0,8.0,12.0,19.0


In [664]:
df.dropna(subset='R')

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,12.0,19.0
Negar,11.0,15.0,,18.0


In [665]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,12.0,19.0
Mahsa,,,,
Negar,11.0,15.0,,18.0


In [666]:
df.dropna(subset='Ali', axis=1)

Unnamed: 0,Python,R
Ali,12.0,18.0
Sara,,6.0
Taha,12.0,19.0
Mahsa,,
Negar,11.0,18.0


In [667]:
df.dropna(subset='Taha', axis=1)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,12.0,19.0
Mahsa,,,,
Negar,11.0,15.0,,18.0


In [668]:
df.dropna(subset='Mahsa', axis=1)

Ali
Sara
Taha
Mahsa
Negar


#### fillna (value, axis, inplace, limit)

In [669]:
d = {'Python': {'Ali': 12,  'Sara': NaN, 'Taha': 12,  'Mahsa': NaN, 'Negar': 11},
     'C++':    {'Ali': NaN, 'Sara': 19,  'Taha': 8,   'Mahsa': NaN, 'Negar': 15},
     'Java':   {'Ali': NaN, 'Sara': NaN, 'Taha': NaN, 'Mahsa': NaN, 'Negar': NaN},
     'R':      {'Ali': 18,  'Sara': 6,   'Taha': 19,  'Mahsa': 15,  'Negar': 18}}

In [670]:
df = pd.DataFrame(d); df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,,19.0,,6
Taha,12.0,8.0,,19
Mahsa,,,,15
Negar,11.0,15.0,,18


In [671]:
df.fillna(-1)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,-1.0,-1.0,18
Sara,-1.0,19.0,-1.0,6
Taha,12.0,8.0,-1.0,19
Mahsa,-1.0,-1.0,-1.0,15
Negar,11.0,15.0,-1.0,18


In [672]:
df.fillna('o')

Unnamed: 0,Python,C++,Java,R
Ali,12.0,o,o,18
Sara,o,19.0,o,6
Taha,12.0,8.0,o,19
Mahsa,o,o,o,15
Negar,11.0,15.0,o,18


In [673]:
df.fillna({'C++': -1, 'Java': 'o'})

Unnamed: 0,Python,C++,Java,R
Ali,12.0,-1.0,o,18
Sara,,19.0,o,6
Taha,12.0,8.0,o,19
Mahsa,,-1.0,o,15
Negar,11.0,15.0,o,18


In [674]:
df.mean()

Python    11.666667
C++       14.000000
Java            NaN
R         15.200000
dtype: float64

In [675]:
df.fillna(df.mean())

Unnamed: 0,Python,C++,Java,R
Ali,12.0,14.0,,18
Sara,11.666667,19.0,,6
Taha,12.0,8.0,,19
Mahsa,11.666667,14.0,,15
Negar,11.0,15.0,,18


In [676]:
df.mean()['Python']

11.666666666666666

In [677]:
df.fillna(df.mean()['Python'])

Unnamed: 0,Python,C++,Java,R
Ali,12.0,11.666667,11.666667,18
Sara,11.666667,19.0,11.666667,6
Taha,12.0,8.0,11.666667,19
Mahsa,11.666667,11.666667,11.666667,15
Negar,11.0,15.0,11.666667,18


In [678]:
df.mean()[['Python']]

Python    11.666667
dtype: float64

In [679]:
df.fillna(df.mean()[['Python']])

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,11.666667,19.0,,6
Taha,12.0,8.0,,19
Mahsa,11.666667,,,15
Negar,11.0,15.0,,18


limit & axis

In [680]:
# limit: maximam tedade maghadire motavalie NaN baraye por kardan be jelo/aghab.

In [681]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,,19.0,,6
Taha,12.0,8.0,,19
Mahsa,,,,15
Negar,11.0,15.0,,18


In [682]:
df.fillna('o', limit=1)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,o,o,18
Sara,o,19.0,,6
Taha,12.0,8.0,,19
Mahsa,,,,15
Negar,11.0,15.0,,18


In [683]:
df.fillna('o', limit=2)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,o,o,18
Sara,o,19.0,o,6
Taha,12.0,8.0,,19
Mahsa,o,o,,15
Negar,11.0,15.0,,18


In [684]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18
Sara,,19.0,,6
Taha,12.0,8.0,,19
Mahsa,,,,15
Negar,11.0,15.0,,18


In [685]:
df.fillna('o', limit=1, axis=1)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,o,,18.0
Sara,o,19.0,,6.0
Taha,12.0,8.0,o,19.0
Mahsa,o,,,15.0
Negar,11.0,15.0,o,18.0


In [686]:
df.fillna('o', limit=2, axis=1)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,o,o,18.0
Sara,o,19.0,o,6.0
Taha,12.0,8.0,o,19.0
Mahsa,o,o,,15.0
Negar,11.0,15.0,o,18.0


#### combine_first (other)

In [687]:
df1 = pd.DataFrame({'C++': [None, 12], 'Python': [None, 14]}, index=['Ali', 'Taha'])
df2 = pd.DataFrame({'C++': [None, 15], 'Python': [13, None]}, index=['Ali', 'Taha'])

In [688]:
display(df1, df2)

Unnamed: 0,C++,Python
Ali,,
Taha,12.0,14.0


Unnamed: 0,C++,Python
Ali,,13.0
Taha,15.0,


In [689]:
df1.combine_first(df2)

Unnamed: 0,C++,Python
Ali,,13.0
Taha,12.0,14.0


In [690]:
df2.combine_first(df1)

Unnamed: 0,C++,Python
Ali,,13.0
Taha,15.0,14.0


In [691]:
#---------------

In [692]:
df1 = pd.DataFrame({'C++': [None, 12], 'Python': [14, None]}, index=['Ali', 'Taha'])
df2 = pd.DataFrame({'Python': [13, 20, None], 'Java': [None, 11, 17]}, index=['Ali', 'Taha', 'Mahsa'])

In [693]:
display(df1, df2)

Unnamed: 0,C++,Python
Ali,,14.0
Taha,12.0,


Unnamed: 0,Python,Java
Ali,13.0,
Taha,20.0,11.0
Mahsa,,17.0


In [694]:
df1.combine_first(df2)

Unnamed: 0,C++,Java,Python
Ali,,,14.0
Mahsa,,17.0,
Taha,12.0,11.0,20.0


In [695]:
df2.combine_first(df1)

Unnamed: 0,C++,Java,Python
Ali,,,13.0
Mahsa,,17.0,
Taha,12.0,11.0,20.0


### ffill & bfill (axis, inplace, limit)

In [696]:
d = {'Python': {'Ali': 12,  'Sara': NaN, 'Taha': 12,  'Mahsa': 11,  'Negar': NaN},
     'C++':    {'Ali': NaN, 'Sara': 19,  'Taha': 8,   'Mahsa': NaN, 'Negar': 15},
     'Java':   {'Ali': NaN, 'Sara': NaN, 'Taha': 15,  'Mahsa': NaN, 'Negar': NaN},
     'R':      {'Ali': 18,  'Sara': 6,   'Taha': 19,  'Mahsa': NaN, 'Negar': 18}}

In [697]:
df = pd.DataFrame(d); df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,,,
Negar,,15.0,,18.0


ffill

In [698]:
df.ffill()

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,12.0,19.0,,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,8.0,15.0,19.0
Negar,11.0,15.0,15.0,18.0


In [699]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,,,
Negar,,15.0,,18.0


In [700]:
df.ffill(axis=1)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,12.0,12.0,18.0
Sara,,19.0,19.0,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,11.0,11.0,11.0
Negar,,15.0,15.0,18.0


In [701]:
# limit

In [702]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,,,
Negar,,15.0,,18.0


In [703]:
df.ffill()

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,12.0,19.0,,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,8.0,15.0,19.0
Negar,11.0,15.0,15.0,18.0


In [704]:
df.ffill(limit=1)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,12.0,19.0,,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,8.0,15.0,19.0
Negar,11.0,15.0,,18.0


In [705]:
df.ffill(limit=2)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,12.0,19.0,,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,8.0,15.0,19.0
Negar,11.0,15.0,15.0,18.0


In [706]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,,,
Negar,,15.0,,18.0


In [707]:
df.ffill(axis=1)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,12.0,12.0,18.0
Sara,,19.0,19.0,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,11.0,11.0,11.0
Negar,,15.0,15.0,18.0


In [708]:
df.ffill(axis=1, limit=1)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,12.0,,18.0
Sara,,19.0,19.0,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,11.0,,
Negar,,15.0,15.0,18.0


In [709]:
df.ffill(axis=1, limit=2)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,12.0,12.0,18.0
Sara,,19.0,19.0,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,11.0,11.0,
Negar,,15.0,15.0,18.0


bfill

In [710]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,,,
Negar,,15.0,,18.0


In [711]:
df.bfill()

Unnamed: 0,Python,C++,Java,R
Ali,12.0,19.0,15.0,18.0
Sara,12.0,19.0,15.0,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,15.0,,18.0
Negar,,15.0,,18.0


In [712]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,,,
Negar,,15.0,,18.0


In [713]:
df.bfill(axis=1)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,18.0,18.0,18.0
Sara,19.0,19.0,6.0,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,,,
Negar,15.0,15.0,18.0,18.0


In [714]:
# limit

In [715]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,,,
Negar,,15.0,,18.0


In [716]:
df.bfill()

Unnamed: 0,Python,C++,Java,R
Ali,12.0,19.0,15.0,18.0
Sara,12.0,19.0,15.0,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,15.0,,18.0
Negar,,15.0,,18.0


In [717]:
df.bfill(limit=1)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,19.0,,18.0
Sara,12.0,19.0,15.0,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,15.0,,18.0
Negar,,15.0,,18.0


In [718]:
df.bfill(limit=2)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,19.0,15.0,18.0
Sara,12.0,19.0,15.0,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,15.0,,18.0
Negar,,15.0,,18.0


In [719]:
df

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,,18.0
Sara,,19.0,,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,,,
Negar,,15.0,,18.0


In [720]:
df.bfill(axis=1)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,18.0,18.0,18.0
Sara,19.0,19.0,6.0,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,,,
Negar,15.0,15.0,18.0,18.0


In [721]:
df.bfill(axis=1, limit=1)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,,18.0,18.0
Sara,19.0,19.0,6.0,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,,,
Negar,15.0,15.0,18.0,18.0


In [722]:
df.bfill(axis=1, limit=2)

Unnamed: 0,Python,C++,Java,R
Ali,12.0,18.0,18.0,18.0
Sara,19.0,19.0,6.0,6.0
Taha,12.0,8.0,15.0,19.0
Mahsa,11.0,,,
Negar,15.0,15.0,18.0,18.0


### replace (to_replace, value, inplace)

In [723]:
d = [[12, NaN, 18], [NaN, 14, 6], [12, 9, 19]]
name = ['Ali', 'Sara', 'Taha']
dars = ['Python', 'C++', 'Java']

In [724]:
df = pd.DataFrame(d, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12.0,,18
Sara,,14.0,6
Taha,12.0,9.0,19


In [725]:
df.replace(NaN, 0)

Unnamed: 0,Python,C++,Java
Ali,12.0,0.0,18
Sara,0.0,14.0,6
Taha,12.0,9.0,19


In [726]:
df.replace({9: 10})

Unnamed: 0,Python,C++,Java
Ali,12.0,,18
Sara,,14.0,6
Taha,12.0,10.0,19


In [727]:
df.replace({9: 10, NaN: 0})

Unnamed: 0,Python,C++,Java
Ali,12.0,0.0,18
Sara,0.0,14.0,6
Taha,12.0,10.0,19


In [728]:
df.replace([9, NaN], [10, 0])

Unnamed: 0,Python,C++,Java
Ali,12.0,0.0,18
Sara,0.0,14.0,6
Taha,12.0,10.0,19


In [729]:
#---------------

In [730]:
df['C++'].replace({NaN: 0, 9: 10})

Ali      0.0
Sara    14.0
Taha    10.0
Name: C++, dtype: float64

In [731]:
df[['C++']].replace({NaN: 0, 9: 10})

Unnamed: 0,C++
Ali,0.0
Sara,14.0
Taha,10.0


In [732]:
df.replace({'C++': {NaN: 0, 9: 10}})

Unnamed: 0,Python,C++,Java
Ali,12.0,0.0,18
Sara,,14.0,6
Taha,12.0,10.0,19


In [733]:
#---------------

In [734]:
df.replace({'Python': NaN, 'Java': 6}, 'o')

Unnamed: 0,Python,C++,Java
Ali,12.0,,18
Sara,o,14.0,o
Taha,12.0,9.0,19


..........

In [735]:
d = {'Age': {0: 22, 1: 38,2: 26,3: 35,4: 35,5: 34,6: 54,7: 2,8: 27,9: 14},
     'Sex': {0: 'male',1: 'female',2: 'female',3: 'female',4: 'male',5: 'male',6: 'male',7: 'male',8: 'female',9: 'female'}}

In [736]:
df = pd.DataFrame(d); df

Unnamed: 0,Age,Sex
0,22,male
1,38,female
2,26,female
3,35,female
4,35,male
5,34,male
6,54,male
7,2,male
8,27,female
9,14,female


In [737]:
df.replace({'Sex': {'female': 0, 'male': 1}})

Unnamed: 0,Age,Sex
0,22,1
1,38,0
2,26,0
3,35,0
4,35,1
5,34,1
6,54,1
7,2,1
8,27,0
9,14,0


In [738]:
# or:

In [739]:
s = sorted(df['Sex'].unique())
z = zip(s, range(0, len(s) + 1))
dz = dict(z); dz

{'female': 0, 'male': 1}

In [740]:
df['Sex'].replace(dz)

0    1
1    0
2    0
3    0
4    1
5    1
6    1
7    1
8    0
9    0
Name: Sex, dtype: int64

In [741]:
df['Sex'].map(dz)

0    1
1    0
2    0
3    0
4    1
5    1
6    1
7    1
8    0
9    0
Name: Sex, dtype: int64

In [742]:
# or:

In [743]:
l = sorted(df['Sex'].unique()); l

['female', 'male']

In [744]:
df['Sex'].apply(l.index)

0    1
1    0
2    0
3    0
4    1
5    1
6    1
7    1
8    0
9    0
Name: Sex, dtype: int64

### duplicate

#### duplicated (subset, keep)

In [745]:
d = {'col1': ['a', 'b', 'a', 'b', 'b', 'a', 'b'],
     'col2': [10,  10,  20,  40,  30,  30,  40],
     'col3': ['a', 'c', 'e', 'f', 'c', 'e', 'f']}

In [746]:
frame = pd.DataFrame(d); frame

Unnamed: 0,col1,col2,col3
0,a,10,a
1,b,10,c
2,a,20,e
3,b,40,f
4,b,30,c
5,a,30,e
6,b,40,f


In [747]:
frame.duplicated()

0    False
1    False
2    False
3    False
4    False
5    False
6     True
dtype: bool

In [748]:
~frame.duplicated()

0     True
1     True
2     True
3     True
4     True
5     True
6    False
dtype: bool

In [749]:
frame.duplicated(keep='last')

0    False
1    False
2    False
3     True
4    False
5    False
6    False
dtype: bool

In [750]:
# where

In [751]:
frame.where(frame.duplicated())

Unnamed: 0,col1,col2,col3
0,,,
1,,,
2,,,
3,,,
4,,,
5,,,
6,b,40.0,f


In [752]:
frame.where(frame.duplicated(keep='last'))

Unnamed: 0,col1,col2,col3
0,,,
1,,,
2,,,
3,b,40.0,f
4,,,
5,,,
6,,,


subset

In [753]:
frame

Unnamed: 0,col1,col2,col3
0,a,10,a
1,b,10,c
2,a,20,e
3,b,40,f
4,b,30,c
5,a,30,e
6,b,40,f


In [754]:
frame['col1'].duplicated()

0    False
1    False
2     True
3     True
4     True
5     True
6     True
Name: col1, dtype: bool

In [755]:
frame.duplicated(subset='col1')

0    False
1    False
2     True
3     True
4     True
5     True
6     True
dtype: bool

In [756]:
frame.duplicated(subset='col1', keep='last')

0     True
1     True
2     True
3     True
4     True
5    False
6    False
dtype: bool

In [757]:
frame.duplicated(subset='col1', keep=False)

0    True
1    True
2    True
3    True
4    True
5    True
6    True
dtype: bool

In [758]:
frame.duplicated(subset='col3', keep=False)

0    False
1     True
2     True
3     True
4     True
5     True
6     True
dtype: bool

In [759]:
frame

Unnamed: 0,col1,col2,col3
0,a,10,a
1,b,10,c
2,a,20,e
3,b,40,f
4,b,30,c
5,a,30,e
6,b,40,f


In [760]:
frame.duplicated(subset='col1')                    

0    False
1    False
2     True
3     True
4     True
5     True
6     True
dtype: bool

In [761]:
~frame.duplicated(subset='col1')                     # azaye uniq: avvalin ozv az har nemoone.

0     True
1     True
2    False
3    False
4    False
5    False
6    False
dtype: bool

In [762]:
frame[~frame.duplicated(subset='col1')]

Unnamed: 0,col1,col2,col3
0,a,10,a
1,b,10,c


In [763]:
~frame.duplicated(subset='col1', keep='last')        # azaye uniq: akharin ozv az har nemoone.

0    False
1    False
2    False
3    False
4    False
5     True
6     True
dtype: bool

In [764]:
frame[~frame.duplicated(subset='col1', keep='last')]  

Unnamed: 0,col1,col2,col3
5,a,30,e
6,b,40,f


In [765]:
frame

Unnamed: 0,col1,col2,col3
0,a,10,a
1,b,10,c
2,a,20,e
3,b,40,f
4,b,30,c
5,a,30,e
6,b,40,f


In [766]:
frame.duplicated(subset=['col1', 'col3'])               # harkat az bala be paiin 

0    False
1    False
2    False
3    False
4     True
5     True
6     True
dtype: bool

In [767]:
frame.duplicated(subset=['col1', 'col3'], keep='last')  # harkat az paiin be bala 

0    False
1     True
2     True
3     True
4    False
5    False
6    False
dtype: bool

In [768]:
# where

In [769]:
frame.where(frame.duplicated(subset=['col1', 'col3']))

Unnamed: 0,col1,col2,col3
0,,,
1,,,
2,,,
3,,,
4,b,30.0,c
5,a,30.0,e
6,b,40.0,f


In [770]:
frame.where(frame.duplicated(subset=['col1', 'col3'], keep='last'))

Unnamed: 0,col1,col2,col3
0,,,
1,b,10.0,c
2,a,20.0,e
3,b,40.0,f
4,,,
5,,,
6,,,


#### drop_duplicates (subset, keep, inplace, ignore_index)

In [771]:
frame

Unnamed: 0,col1,col2,col3
0,a,10,a
1,b,10,c
2,a,20,e
3,b,40,f
4,b,30,c
5,a,30,e
6,b,40,f


In [772]:
frame.drop_duplicates()

Unnamed: 0,col1,col2,col3
0,a,10,a
1,b,10,c
2,a,20,e
3,b,40,f
4,b,30,c
5,a,30,e


In [773]:
frame.drop_duplicates(keep='last')

Unnamed: 0,col1,col2,col3
0,a,10,a
1,b,10,c
2,a,20,e
4,b,30,c
5,a,30,e
6,b,40,f


In [774]:
frame.drop_duplicates(keep='last', ignore_index=True)

Unnamed: 0,col1,col2,col3
0,a,10,a
1,b,10,c
2,a,20,e
3,b,30,c
4,a,30,e
5,b,40,f


In [775]:
frame.drop_duplicates(keep=False)    # hazfe kamele nemoone haye moshabeh

Unnamed: 0,col1,col2,col3
0,a,10,a
1,b,10,c
2,a,20,e
4,b,30,c
5,a,30,e


subset

In [776]:
frame

Unnamed: 0,col1,col2,col3
0,a,10,a
1,b,10,c
2,a,20,e
3,b,40,f
4,b,30,c
5,a,30,e
6,b,40,f


In [777]:
frame.drop_duplicates(subset='col1')

Unnamed: 0,col1,col2,col3
0,a,10,a
1,b,10,c


In [778]:
frame.drop_duplicates(subset='col1', keep='last')

Unnamed: 0,col1,col2,col3
5,a,30,e
6,b,40,f


In [779]:
frame.drop_duplicates(subset='col1', keep=False)

Unnamed: 0,col1,col2,col3


In [780]:
frame

Unnamed: 0,col1,col2,col3
0,a,10,a
1,b,10,c
2,a,20,e
3,b,40,f
4,b,30,c
5,a,30,e
6,b,40,f


In [781]:
frame.drop_duplicates(subset='col2')

Unnamed: 0,col1,col2,col3
0,a,10,a
2,a,20,e
3,b,40,f
4,b,30,c


In [782]:
frame.drop_duplicates(subset='col2', keep='last')

Unnamed: 0,col1,col2,col3
1,b,10,c
2,a,20,e
5,a,30,e
6,b,40,f


In [783]:
frame.drop_duplicates(subset='col2', keep=False)

Unnamed: 0,col1,col2,col3
2,a,20,e


In [784]:
frame

Unnamed: 0,col1,col2,col3
0,a,10,a
1,b,10,c
2,a,20,e
3,b,40,f
4,b,30,c
5,a,30,e
6,b,40,f


In [785]:
frame.drop_duplicates(subset=['col1', 'col3'])

Unnamed: 0,col1,col2,col3
0,a,10,a
1,b,10,c
2,a,20,e
3,b,40,f


In [786]:
frame.drop_duplicates(subset=['col1', 'col3'], keep='last')

Unnamed: 0,col1,col2,col3
0,a,10,a
4,b,30,c
5,a,30,e
6,b,40,f


In [787]:
frame.drop_duplicates(subset=['col1', 'col3'], keep=False)

Unnamed: 0,col1,col2,col3
0,a,10,a


### value_counts(subset, normalize, sort, ascending, dropna)

In [788]:
d = {'num_legs':  {'falcon': 2, 'dog': 4, 'cat': 4, 'ant': 6, 'cr': None},
     'num_wings': {'falcon': 2, 'dog': 0, 'cat': 0, 'ant': 0, 'cr': 2}}

In [789]:
df = pd.DataFrame(d); df

Unnamed: 0,num_legs,num_wings
falcon,2.0,2
dog,4.0,0
cat,4.0,0
ant,6.0,0
cr,,2


In [790]:
df.value_counts()

num_legs  num_wings
4.0       0            2
2.0       2            1
6.0       0            1
Name: count, dtype: int64

In [791]:
df.value_counts(sort=False)

num_legs  num_wings
2.0       2            1
4.0       0            2
6.0       0            1
Name: count, dtype: int64

In [792]:
df.value_counts(ascending=True)

num_legs  num_wings
2.0       2            1
6.0       0            1
4.0       0            2
Name: count, dtype: int64

In [793]:
df.value_counts(normalize=True)

num_legs  num_wings
4.0       0            0.50
2.0       2            0.25
6.0       0            0.25
Name: proportion, dtype: float64

In [794]:
df.value_counts(dropna=False)

num_legs  num_wings
4.0       0            2
2.0       2            1
6.0       0            1
NaN       2            1
Name: count, dtype: int64

In [795]:
df.value_counts(subset='num_wings')

num_wings
0    3
2    2
Name: count, dtype: int64

In [796]:
df.value_counts(subset='num_legs')

num_legs
4.0    2
2.0    1
6.0    1
Name: count, dtype: int64

In [797]:
df.value_counts(subset='num_legs', dropna=False)

num_legs
4.0    2
2.0    1
6.0    1
NaN    1
Name: count, dtype: int64

In [798]:
df['num_legs'].value_counts(dropna=False)

num_legs
4.0    2
2.0    1
6.0    1
NaN    1
Name: count, dtype: int64

### sort_values (by, axis, ascending, inplace, na_position, ignore_index, key)

In [799]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
dars = ['Python', 'C++', 'Java']

In [800]:
df = pd.DataFrame(d, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [801]:
df.sort_values(by='Python')

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Taha,12,8,19
Sara,13,14,6
Mahsa,20,16,9


In [802]:
df.sort_values(by=['Python', 'C++'])

Unnamed: 0,Python,C++,Java
Taha,12,8,19
Ali,12,20,18
Sara,13,14,6
Mahsa,20,16,9


In [803]:
df.sort_values(by=['Python', 'C++'], ascending=False)

Unnamed: 0,Python,C++,Java
Mahsa,20,16,9
Sara,13,14,6
Ali,12,20,18
Taha,12,8,19


In [804]:
df.sort_values(by=['Python', 'C++'], ascending=[False, True])

Unnamed: 0,Python,C++,Java
Mahsa,20,16,9
Sara,13,14,6
Taha,12,8,19
Ali,12,20,18


In [805]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [806]:
df.sort_values(by='Mahsa', axis=1)

Unnamed: 0,Java,C++,Python
Ali,18,20,12
Sara,6,14,13
Taha,19,8,12
Mahsa,9,16,20


In [807]:
#---------------

In [808]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
df = pd.DataFrame(d); df

Unnamed: 0,0,1,2
0,12,20,18
1,13,14,6
2,12,8,19
3,20,16,9


In [809]:
df.sort_values(by=0)

Unnamed: 0,0,1,2
0,12,20,18
2,12,8,19
1,13,14,6
3,20,16,9


In [810]:
df.sort_values(by=[0, 1])

Unnamed: 0,0,1,2
2,12,8,19
0,12,20,18
1,13,14,6
3,20,16,9


### nlargest (n, columns, keep)

In [811]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
dars = ['Python', 'C++', 'Java']

In [812]:
df = pd.DataFrame(d, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [813]:
df.nlargest(1, columns='Python')

Unnamed: 0,Python,C++,Java
Mahsa,20,16,9


In [814]:
df.nlargest(3, columns='Python')

Unnamed: 0,Python,C++,Java
Mahsa,20,16,9
Sara,13,14,6
Ali,12,20,18


In [815]:
df.nlargest(3, columns='Python', keep='last')

Unnamed: 0,Python,C++,Java
Mahsa,20,16,9
Sara,13,14,6
Taha,12,8,19


In [816]:
#---------------

In [817]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [818]:
df.nsmallest(2, columns='Python')

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Taha,12,8,19


In [819]:
df.nsmallest(2, columns=['Python', 'C++'])

Unnamed: 0,Python,C++,Java
Taha,12,8,19
Ali,12,20,18


### is & not

In [820]:
d = [[12, 20, np.nan], [13, 14, 6], [13, np.nan, np.nan], [20, np.nan, 9]]
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
dars = ['Python', 'C++', 'Java']

In [821]:
df = pd.DataFrame(d, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12,20.0,
Sara,13,14.0,6.0
Taha,13,,
Mahsa,20,,9.0


In [822]:
df.isin([12])

Unnamed: 0,Python,C++,Java
Ali,True,False,False
Sara,False,False,False
Taha,False,False,False
Mahsa,False,False,False


In [823]:
df.isin([12, 20])

Unnamed: 0,Python,C++,Java
Ali,True,True,False
Sara,False,False,False
Taha,False,False,False
Mahsa,True,False,False


In [824]:
df[df.isin([12, 20])]

Unnamed: 0,Python,C++,Java
Ali,12.0,20.0,
Sara,,,
Taha,,,
Mahsa,20.0,,


In [825]:
df.where(df.isin([12, 20]))

Unnamed: 0,Python,C++,Java
Ali,12.0,20.0,
Sara,,,
Taha,,,
Mahsa,20.0,,


### select_dtypes (include, exclude)

In [826]:
df = pd.DataFrame({'a': [1, 2]*2, 'b': [True, False]*2, 'c': [1.0, 2.0]*2, 'e': ['p', 'q']*2}); df

Unnamed: 0,a,b,c,e
0,1,True,1.0,p
1,2,False,2.0,q
2,1,True,1.0,p
3,2,False,2.0,q


In [827]:
df.dtypes

a      int64
b       bool
c    float64
e     object
dtype: object

In [828]:
# include

In [829]:
df.select_dtypes(include='bool')

Unnamed: 0,b
0,True
1,False
2,True
3,False


In [830]:
df.select_dtypes(include=['object', 'int64'])

Unnamed: 0,a,e
0,1,p
1,2,q
2,1,p
3,2,q


In [831]:
# exclude

In [832]:
df.select_dtypes(exclude='bool')

Unnamed: 0,a,c,e
0,1,1.0,p
1,2,2.0,q
2,1,1.0,p
3,2,2.0,q


In [833]:
df.select_dtypes(exclude=['object', 'int64'])

Unnamed: 0,b,c
0,True,1.0
1,False,2.0
2,True,1.0
3,False,2.0


### astype (dtype)

In [834]:
d = {'name':  {0: 'ali', 1: 'taha', 2: 'omid', 3: 'sara'},
     'score': {0: 1, 1: 0, 2: 0, 3: 1}}

In [835]:
df = pd.DataFrame(d); df

Unnamed: 0,name,score
0,ali,1
1,taha,0
2,omid,0
3,sara,1


In [836]:
df['score']                    # dtype: int64

0    1
1    0
2    0
3    1
Name: score, dtype: int64

In [837]:
df['score'].astype('bool')     # dtype: bool

0     True
1    False
2    False
3     True
Name: score, dtype: bool

In [838]:
df

Unnamed: 0,name,score
0,ali,1
1,taha,0
2,omid,0
3,sara,1


In [839]:
df[['score']].astype('bool')    

Unnamed: 0,score
0,True
1,False
2,False
3,True


In [840]:
df

Unnamed: 0,name,score
0,ali,1
1,taha,0
2,omid,0
3,sara,1


In [841]:
df.astype({'score': 'bool'})   

Unnamed: 0,name,score
0,ali,True
1,taha,False
2,omid,False
3,sara,True


..........

In [842]:
n = ['ali', 'ali', 'ali', 'ali', 'sara', 'sara', 'sara', 'taha', 'taha']
s = [11, 20, 13, 14, 15, 6, 12, 18, 19]

In [843]:
df = pd.DataFrame({'name': n, 'score': s}); df

Unnamed: 0,name,score
0,ali,11
1,ali,20
2,ali,13
3,ali,14
4,sara,15
5,sara,6
6,sara,12
7,taha,18
8,taha,19


In [844]:
df['score']                 # dtype: int64

0    11
1    20
2    13
3    14
4    15
5     6
6    12
7    18
8    19
Name: score, dtype: int64

In [845]:
df['score'].astype('str')   # dtype: object

0    11
1    20
2    13
3    14
4    15
5     6
6    12
7    18
8    19
Name: score, dtype: object

In [846]:
#---------------

In [847]:
df['name']

0     ali
1     ali
2     ali
3     ali
4    sara
5    sara
6    sara
7    taha
8    taha
Name: name, dtype: object

In [848]:
c = df['name'].astype('category'); c

0     ali
1     ali
2     ali
3     ali
4    sara
5    sara
6    sara
7    taha
8    taha
Name: name, dtype: category
Categories (3, object): ['ali', 'sara', 'taha']

In [849]:
c.values.categories

Index(['ali', 'sara', 'taha'], dtype='object')

In [850]:
c.value_counts()

name
ali     4
sara    3
taha    2
Name: count, dtype: int64

In [851]:
c.values.codes

array([0, 0, 0, 0, 1, 1, 1, 2, 2], dtype=int8)

In [852]:
c.isin(['sara'])

0    False
1    False
2    False
3    False
4     True
5     True
6     True
7    False
8    False
Name: name, dtype: bool

In [853]:
c[c.isin(['sara'])]

4    sara
5    sara
6    sara
Name: name, dtype: category
Categories (3, object): ['ali', 'sara', 'taha']

In [854]:
c[c.isin(['sara'])].cat.remove_unused_categories()

4    sara
5    sara
6    sara
Name: name, dtype: category
Categories (1, object): ['sara']

..........

In [855]:
import seaborn as sns
df = sns.load_dataset('iris'); df     

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [856]:
df['species'].nunique()

3

In [857]:
df['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [858]:
c = df['species'].astype('category'); c

0         setosa
1         setosa
2         setosa
3         setosa
4         setosa
         ...    
145    virginica
146    virginica
147    virginica
148    virginica
149    virginica
Name: species, Length: 150, dtype: category
Categories (3, object): ['setosa', 'versicolor', 'virginica']

In [859]:
c.values.categories

Index(['setosa', 'versicolor', 'virginica'], dtype='object')

In [860]:
c.value_counts()

species
setosa        50
versicolor    50
virginica     50
Name: count, dtype: int64

In [861]:
c.values.codes

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int8)

In [862]:
c.isin(['setosa'])

0       True
1       True
2       True
3       True
4       True
       ...  
145    False
146    False
147    False
148    False
149    False
Name: species, Length: 150, dtype: bool

In [863]:
c[c.isin(['setosa'])]

0     setosa
1     setosa
2     setosa
3     setosa
4     setosa
5     setosa
6     setosa
7     setosa
8     setosa
9     setosa
10    setosa
11    setosa
12    setosa
13    setosa
14    setosa
15    setosa
16    setosa
17    setosa
18    setosa
19    setosa
20    setosa
21    setosa
22    setosa
23    setosa
24    setosa
25    setosa
26    setosa
27    setosa
28    setosa
29    setosa
30    setosa
31    setosa
32    setosa
33    setosa
34    setosa
35    setosa
36    setosa
37    setosa
38    setosa
39    setosa
40    setosa
41    setosa
42    setosa
43    setosa
44    setosa
45    setosa
46    setosa
47    setosa
48    setosa
49    setosa
Name: species, dtype: category
Categories (3, object): ['setosa', 'versicolor', 'virginica']

In [864]:
c[c.isin(['setosa'])].cat.remove_unused_categories()

0     setosa
1     setosa
2     setosa
3     setosa
4     setosa
5     setosa
6     setosa
7     setosa
8     setosa
9     setosa
10    setosa
11    setosa
12    setosa
13    setosa
14    setosa
15    setosa
16    setosa
17    setosa
18    setosa
19    setosa
20    setosa
21    setosa
22    setosa
23    setosa
24    setosa
25    setosa
26    setosa
27    setosa
28    setosa
29    setosa
30    setosa
31    setosa
32    setosa
33    setosa
34    setosa
35    setosa
36    setosa
37    setosa
38    setosa
39    setosa
40    setosa
41    setosa
42    setosa
43    setosa
44    setosa
45    setosa
46    setosa
47    setosa
48    setosa
49    setosa
Name: species, dtype: category
Categories (1, object): ['setosa']

kam kardane hafeze eshghali ba astype()

In [865]:
df.groupby('species').apply(lambda x: x[:3])

Unnamed: 0_level_0,Unnamed: 1_level_0,sepal_length,sepal_width,petal_length,petal_width,species
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
setosa,0,5.1,3.5,1.4,0.2,setosa
setosa,1,4.9,3.0,1.4,0.2,setosa
setosa,2,4.7,3.2,1.3,0.2,setosa
versicolor,50,7.0,3.2,4.7,1.4,versicolor
versicolor,51,6.4,3.2,4.5,1.5,versicolor
versicolor,52,6.9,3.1,4.9,1.5,versicolor
virginica,100,6.3,3.3,6.0,2.5,virginica
virginica,101,5.8,2.7,5.1,1.9,virginica
virginica,102,7.1,3.0,5.9,2.1,virginica


In [866]:
df['species']                  # dtype: object

0         setosa
1         setosa
2         setosa
3         setosa
4         setosa
         ...    
145    virginica
146    virginica
147    virginica
148    virginica
149    virginica
Name: species, Length: 150, dtype: object

In [867]:
df.info()                      # memory usage: 6.0+ KB

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [868]:
df['species'] = df['species'].astype('category')

In [869]:
df['species']                  # dtype: category

0         setosa
1         setosa
2         setosa
3         setosa
4         setosa
         ...    
145    virginica
146    virginica
147    virginica
148    virginica
149    virginica
Name: species, Length: 150, dtype: category
Categories (3, object): ['setosa', 'versicolor', 'virginica']

In [870]:
df['species'].values.codes     # aknoon in satr ba in maghadire 0, 1, 2 dar hafeze zakhire mishavad.

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int8)

In [871]:
df.info()                      # memory usage: 5.1 KB

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   sepal_length  150 non-null    float64 
 1   sepal_width   150 non-null    float64 
 2   petal_length  150 non-null    float64 
 3   petal_width   150 non-null    float64 
 4   species       150 non-null    category
dtypes: category(1), float64(4)
memory usage: 5.1 KB


## miscellaneous

### squeeze (axis)

In [872]:
d = {'C++': {'Ali': None, 'Sara': 14.0, 'Taha': 9.0}}
df = pd.DataFrame(d); df

Unnamed: 0,C++
Ali,
Sara,14.0
Taha,9.0


In [873]:
df.squeeze()

Ali      NaN
Sara    14.0
Taha     9.0
Name: C++, dtype: float64

In [874]:
df.index.name = 'Name'
df

Unnamed: 0_level_0,C++
Name,Unnamed: 1_level_1
Ali,
Sara,14.0
Taha,9.0


In [875]:
df.squeeze()

Name
Ali      NaN
Sara    14.0
Taha     9.0
Name: C++, dtype: float64

In [876]:
#---------------

In [877]:
d = {'Python': {'Ali': 12.0, 'Sara': None, 'Taha': 12.0},
     'C++':    {'Ali': None, 'Sara': 14.0, 'Taha': 9.0},
     'Java':   {'Ali': 18, 'Sara': 6, 'Taha': 19}}

In [878]:
df = pd.DataFrame(d)
df.index.name = 'Name'
df

Unnamed: 0_level_0,Python,C++,Java
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ali,12.0,,18
Sara,,14.0,6
Taha,12.0,9.0,19


In [879]:
df['C++']

Name
Ali      NaN
Sara    14.0
Taha     9.0
Name: C++, dtype: float64

In [880]:
df[['C++']]

Unnamed: 0_level_0,C++
Name,Unnamed: 1_level_1
Ali,
Sara,14.0
Taha,9.0


In [881]:
df[['C++']].squeeze()

Name
Ali      NaN
Sara    14.0
Taha     9.0
Name: C++, dtype: float64

In [882]:
df.loc['Ali']

Python    12.0
C++        NaN
Java      18.0
Name: Ali, dtype: float64

In [883]:
df.loc[['Ali']]

Unnamed: 0_level_0,Python,C++,Java
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ali,12.0,,18


In [884]:
df.loc[['Ali']].squeeze()

Python    12.0
C++        NaN
Java      18.0
Name: Ali, dtype: float64

### head & tail (n)

In [885]:
df = pd.DataFrame(np.arange(1, 41).reshape((10, 4))); df

Unnamed: 0,0,1,2,3
0,1,2,3,4
1,5,6,7,8
2,9,10,11,12
3,13,14,15,16
4,17,18,19,20
5,21,22,23,24
6,25,26,27,28
7,29,30,31,32
8,33,34,35,36
9,37,38,39,40


In [886]:
# head: Return the first n rows.

In [887]:
df.head()

Unnamed: 0,0,1,2,3
0,1,2,3,4
1,5,6,7,8
2,9,10,11,12
3,13,14,15,16
4,17,18,19,20


In [888]:
df.head(3)

Unnamed: 0,0,1,2,3
0,1,2,3,4
1,5,6,7,8
2,9,10,11,12


In [889]:
df.head(7)

Unnamed: 0,0,1,2,3
0,1,2,3,4
1,5,6,7,8
2,9,10,11,12
3,13,14,15,16
4,17,18,19,20
5,21,22,23,24
6,25,26,27,28


In [890]:
# tail: Return the last n rows.

In [891]:
df.tail()

Unnamed: 0,0,1,2,3
5,21,22,23,24
6,25,26,27,28
7,29,30,31,32
8,33,34,35,36
9,37,38,39,40


In [892]:
df.tail(2)

Unnamed: 0,0,1,2,3
8,33,34,35,36
9,37,38,39,40


In [893]:
df.tail(6)

Unnamed: 0,0,1,2,3
4,17,18,19,20
5,21,22,23,24
6,25,26,27,28
7,29,30,31,32
8,33,34,35,36
9,37,38,39,40


### sample (n, frac, replace, weights, random_state, axis, ignore_index)

In [894]:
df = pd.DataFrame(np.arange(1, 49).reshape((8, 6))); df

Unnamed: 0,0,1,2,3,4,5
0,1,2,3,4,5,6
1,7,8,9,10,11,12
2,13,14,15,16,17,18
3,19,20,21,22,23,24
4,25,26,27,28,29,30
5,31,32,33,34,35,36
6,37,38,39,40,41,42
7,43,44,45,46,47,48


In [895]:
# n

In [896]:
df.sample()

Unnamed: 0,0,1,2,3,4,5
5,31,32,33,34,35,36


In [897]:
df.sample(n=3)

Unnamed: 0,0,1,2,3,4,5
1,7,8,9,10,11,12
7,43,44,45,46,47,48
5,31,32,33,34,35,36


In [898]:
df.sample(n=3, axis=1)

Unnamed: 0,4,3,2
0,5,4,3
1,11,10,9
2,17,16,15
3,23,22,21
4,29,28,27
5,35,34,33
6,41,40,39
7,47,46,45


In [899]:
# frac

In [900]:
df.sample(frac=0.5)

Unnamed: 0,0,1,2,3,4,5
1,7,8,9,10,11,12
4,25,26,27,28,29,30
6,37,38,39,40,41,42
5,31,32,33,34,35,36


In [901]:
df.sample(frac=0.5, axis=1)

Unnamed: 0,0,1,4
0,1,2,5
1,7,8,11
2,13,14,17
3,19,20,23
4,25,26,29
5,31,32,35
6,37,38,41
7,43,44,47


In [902]:
# replace

In [903]:
#df.sample(10)          ValueError: Cannot take a larger sample than population when 'replace=False'

In [904]:
df.sample(10, replace=True)   

Unnamed: 0,0,1,2,3,4,5
4,25,26,27,28,29,30
4,25,26,27,28,29,30
7,43,44,45,46,47,48
4,25,26,27,28,29,30
1,7,8,9,10,11,12
7,43,44,45,46,47,48
3,19,20,21,22,23,24
7,43,44,45,46,47,48
3,19,20,21,22,23,24
4,25,26,27,28,29,30


In [905]:
df.sample(10, replace=True, axis=1)   

Unnamed: 0,4,2,1,4.1,5,0,4.2,0.1,2.1,5.1
0,5,3,2,5,6,1,5,1,3,6
1,11,9,8,11,12,7,11,7,9,12
2,17,15,14,17,18,13,17,13,15,18
3,23,21,20,23,24,19,23,19,21,24
4,29,27,26,29,30,25,29,25,27,30
5,35,33,32,35,36,31,35,31,33,36
6,41,39,38,41,42,37,41,37,39,42
7,47,45,44,47,48,43,47,43,45,48


In [906]:
df.sample(5, replace=True)

Unnamed: 0,0,1,2,3,4,5
7,43,44,45,46,47,48
0,1,2,3,4,5,6
0,1,2,3,4,5,6
1,7,8,9,10,11,12
0,1,2,3,4,5,6


In [907]:
df.sample(5, replace=True, axis=1)   

Unnamed: 0,5,1,3,0,1.1
0,6,2,4,1,2
1,12,8,10,7,8
2,18,14,16,13,14
3,24,20,22,19,20
4,30,26,28,25,26
5,36,32,34,31,32
6,42,38,40,37,38
7,48,44,46,43,44


In [908]:
# random_state

In [909]:
# dar chand bar run kardane selloole zir, har bar natayeje motafaveti migirim:

In [910]:
df.sample(n=3)   

Unnamed: 0,0,1,2,3,4,5
7,43,44,45,46,47,48
0,1,2,3,4,5,6
4,25,26,27,28,29,30


In [911]:
# ba dastoore random_state, ba har bar run kardane sellool, haman natije bare avval hasel mishavad:

In [912]:
df.sample(n=3, random_state=1)   

Unnamed: 0,0,1,2,3,4,5
7,43,44,45,46,47,48
2,13,14,15,16,17,18
1,7,8,9,10,11,12


..........

In [913]:
d = {'F1': {'s1': 7, 's2': 4, 's3': 6, 's4': 0, 's5': 3, 's6': 7},
     'F2': {'s1': 2, 's2': 1, 's3': 3, 's4': 0, 's5': 6, 's6': 2},
     'F3': {'s1': 5, 's2': 4, 's3': 1, 's4': 7, 's5': 2, 's6': 9},
     'F4': {'s1': 9, 's2': 4, 's3': 1, 's4': 7, 's5': 8, 's6': 1}}

In [914]:
df = pd.DataFrame(d); df

Unnamed: 0,F1,F2,F3,F4
s1,7,2,5,9
s2,4,1,4,4
s3,6,3,1,1
s4,0,0,7,7
s5,3,6,2,8
s6,7,2,9,1


In [915]:
# n

In [916]:
df.sample(n=2)

Unnamed: 0,F1,F2,F3,F4
s4,0,0,7,7
s6,7,2,9,1


In [917]:
df.sample(n=2, axis=1)

Unnamed: 0,F2,F3
s1,2,5
s2,1,4
s3,3,1
s4,0,7
s5,6,2
s6,2,9


In [918]:
s = df['F2'].sample(n=4); s

s3    3
s2    1
s6    2
s4    0
Name: F2, dtype: int64

In [919]:
# frac

In [920]:
df.sample(frac=0.2)

Unnamed: 0,F1,F2,F3,F4
s5,3,6,2,8


In [921]:
df.sample(frac=0.2, axis=1)

Unnamed: 0,F4
s1,9
s2,4
s3,1
s4,7
s5,8
s6,1


In [922]:
# replace

In [923]:
df.sample(10, replace=True)   

Unnamed: 0,F1,F2,F3,F4
s5,3,6,2,8
s1,7,2,5,9
s1,7,2,5,9
s4,0,0,7,7
s3,6,3,1,1
s3,6,3,1,1
s1,7,2,5,9
s2,4,1,4,4
s2,4,1,4,4
s2,4,1,4,4


In [924]:
df.sample(8, replace=True, axis=1)   

Unnamed: 0,F3,F1,F1.1,F1.2,F3.1,F1.3,F3.2,F3.3
s1,5,7,7,7,5,7,5,5
s2,4,4,4,4,4,4,4,4
s3,1,6,6,6,1,6,1,1
s4,7,0,0,0,7,0,7,7
s5,2,3,3,3,2,3,2,2
s6,9,7,7,7,9,7,9,9


In [925]:
# weights

In [926]:
df

Unnamed: 0,F1,F2,F3,F4
s1,7,2,5,9
s2,4,1,4,4
s3,6,3,1,1
s4,0,0,7,7
s5,3,6,2,8
s6,7,2,9,1


In [927]:
df.sample(3, weights='F3')  

Unnamed: 0,F1,F2,F3,F4
s4,0,0,7,7
s2,4,1,4,4
s5,3,6,2,8


### to_...()

to_dict

In [928]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
name = ['Ali', 'Sara', 'Taha']
dars = ['Python', 'C++', 'Java']

In [929]:
df = pd.DataFrame(d, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [930]:
df.to_dict()

{'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
 'C++': {'Ali': 20, 'Sara': 14, 'Taha': 8},
 'Java': {'Ali': 18, 'Sara': 6, 'Taha': 19}}

In [931]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}
pd.DataFrame(d)

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [932]:
#---------------

In [933]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
df1 = pd.DataFrame(d); df1

Unnamed: 0,0,1,2
0,12,20,18
1,13,14,6
2,12,8,19


In [934]:
df1.to_dict()

{0: {0: 12, 1: 13, 2: 12}, 1: {0: 20, 1: 14, 2: 8}, 2: {0: 18, 1: 6, 2: 19}}

to_numpy

In [935]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [936]:
df.to_numpy()

array([[12, 20, 18],
       [13, 14,  6],
       [12,  8, 19]], dtype=int64)

In [937]:
df.values

array([[12, 20, 18],
       [13, 14,  6],
       [12,  8, 19]], dtype=int64)

## arithmetic operation

### math

In [938]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
dars = ['Python', 'C++', 'Java']

In [939]:
df = pd.DataFrame(d, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [940]:
df + 5

Unnamed: 0,Python,C++,Java
Ali,17,25,23
Sara,18,19,11
Taha,17,13,24
Mahsa,25,21,14


In [941]:
df.add(5)

Unnamed: 0,Python,C++,Java
Ali,17,25,23
Sara,18,19,11
Taha,17,13,24
Mahsa,25,21,14


In [942]:
df.sub(3)

Unnamed: 0,Python,C++,Java
Ali,9,17,15
Sara,10,11,3
Taha,9,5,16
Mahsa,17,13,6


In [943]:
df.multiply(4)

Unnamed: 0,Python,C++,Java
Ali,48,80,72
Sara,52,56,24
Taha,48,32,76
Mahsa,80,64,36


In [944]:
df.divide(2)

Unnamed: 0,Python,C++,Java
Ali,6.0,10.0,9.0
Sara,6.5,7.0,3.0
Taha,6.0,4.0,9.5
Mahsa,10.0,8.0,4.5


In [945]:
df.pow(3)

Unnamed: 0,Python,C++,Java
Ali,1728,8000,5832
Sara,2197,2744,216
Taha,1728,512,6859
Mahsa,8000,4096,729


In [946]:
df.mod(5)

Unnamed: 0,Python,C++,Java
Ali,2,0,3
Sara,3,4,1
Taha,2,3,4
Mahsa,0,1,4


add seri to dataframe

In [947]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [948]:
myser = df.loc['Ali']
myser

Python    12
C++       20
Java      18
Name: Ali, dtype: int64

In [949]:
df.add(myser)         # broadcasting

Unnamed: 0,Python,C++,Java
Ali,24,40,36
Sara,25,34,24
Taha,24,28,37
Mahsa,32,36,27


In [950]:
#---------------

In [951]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [952]:
myser = df['Python']
myser

Ali      12
Sara     13
Taha     12
Mahsa    20
Name: Python, dtype: int64

In [953]:
df.add(myser, axis=0)

Unnamed: 0,Python,C++,Java
Ali,24,32,30
Sara,26,27,19
Taha,24,20,31
Mahsa,40,36,29


2 DataFrames

In [954]:
arr1 = np.arange(12).reshape((4, 3)); arr1

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [955]:
df1 = pd.DataFrame(data=arr1, columns=list('abc')); df1

Unnamed: 0,a,b,c
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11


In [956]:
arr2 = np.arange(10).reshape((5, 2))
df2 = pd.DataFrame(data=arr2, columns=list('ab')); df2

Unnamed: 0,a,b
0,0,1
1,2,3
2,4,5
3,6,7
4,8,9


In [957]:
df2.loc[1, 'b']

3

In [958]:
df2.loc[[1], ['b']]

Unnamed: 0,b
1,3


In [959]:
df2.loc[1, 'b'] = np.nan

In [960]:
df1

Unnamed: 0,a,b,c
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11


In [961]:
df2

Unnamed: 0,a,b
0,0,1.0
1,2,
2,4,5.0
3,6,7.0
4,8,9.0


In [962]:
df1 + df2

Unnamed: 0,a,b,c
0,0.0,2.0,
1,5.0,,
2,10.0,12.0,
3,15.0,17.0,
4,,,


In [963]:
df1.add(df2)

Unnamed: 0,a,b,c
0,0.0,2.0,
1,5.0,,
2,10.0,12.0,
3,15.0,17.0,
4,,,


In [964]:
df1.add(df2, fill_value=0)

Unnamed: 0,a,b,c
0,0.0,2.0,2.0
1,5.0,4.0,5.0
2,10.0,12.0,8.0
3,15.0,17.0,11.0
4,8.0,9.0,


In [965]:
df1.sub(df2, fill_value=0)

Unnamed: 0,a,b,c
0,0.0,0.0,2.0
1,1.0,4.0,5.0
2,2.0,2.0,8.0
3,3.0,3.0,11.0
4,-8.0,-9.0,


### describe (axis)

In [966]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
dars = ['Python', 'C++', 'Java']

In [967]:
df = pd.DataFrame(d, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [968]:
df.describe()

Unnamed: 0,Python,C++,Java
count,4.0,4.0,4.0
mean,14.25,14.5,13.0
std,3.86221,5.0,6.480741
min,12.0,8.0,6.0
25%,12.0,12.5,8.25
50%,12.5,15.0,13.5
75%,14.75,17.0,18.25
max,20.0,20.0,19.0


In [969]:
df.count()

Python    4
C++       4
Java      4
dtype: int64

In [970]:
df.count(axis=1)

Ali      3
Sara     3
Taha     3
Mahsa    3
dtype: int64

In [971]:
df.sum()

Python    57
C++       58
Java      52
dtype: int64

In [972]:
df.sum(axis=1)

Ali      50
Sara     33
Taha     39
Mahsa    45
dtype: int64

In [973]:
df.mean()

Python    14.25
C++       14.50
Java      13.00
dtype: float64

In [974]:
df.mean(axis=1)

Ali      16.666667
Sara     11.000000
Taha     13.000000
Mahsa    15.000000
dtype: float64

In [975]:
df.mean(axis=1, skipna=False)

Ali      16.666667
Sara     11.000000
Taha     13.000000
Mahsa    15.000000
dtype: float64

..........

In [976]:
df = pd.DataFrame(np.random.randn(1000, 3))

In [977]:
df.head()

Unnamed: 0,0,1,2
0,-0.057172,-1.726407,0.35357
1,-0.448725,-1.564963,2.743957
2,0.508077,0.532047,-2.281545
3,-1.715186,-0.078274,-1.263073
4,-0.235862,-2.013336,-1.756256


In [978]:
df.describe()

Unnamed: 0,0,1,2
count,1000.0,1000.0,1000.0
mean,0.001443,-0.035066,0.013178
std,0.976564,0.969129,1.036264
min,-3.343962,-3.629512,-3.250518
25%,-0.633505,-0.685884,-0.677319
50%,-0.017013,-0.042167,-0.01564
75%,0.613067,0.614032,0.737829
max,3.365354,2.74815,3.316298


In [979]:
df.min()

0   -3.343962
1   -3.629512
2   -3.250518
dtype: float64

In [980]:
df.max()

0    3.365354
1    2.748150
2    3.316298
dtype: float64

In [981]:
# tabdile dade haye bozorgtar az 3 be 3 va koochektar az -3 be -3

In [982]:
df[np.abs(df) > 3] = np.sign(df) * 3

In [983]:
df.head()

Unnamed: 0,0,1,2
0,-0.057172,-1.726407,0.35357
1,-0.448725,-1.564963,2.743957
2,0.508077,0.532047,-2.281545
3,-1.715186,-0.078274,-1.263073
4,-0.235862,-2.013336,-1.756256


In [984]:
df.describe()

Unnamed: 0,0,1,2
count,1000.0,1000.0,1000.0
mean,0.001679,-0.034185,0.01308
std,0.973422,0.96619,1.034448
min,-3.0,-3.0,-3.0
25%,-0.633505,-0.685884,-0.677319
50%,-0.017013,-0.042167,-0.01564
75%,0.613067,0.614032,0.737829
max,3.0,2.74815,3.0


In [985]:
df.min()

0   -3.0
1   -3.0
2   -3.0
dtype: float64

In [986]:
df.max()

0    3.00000
1    2.74815
2    3.00000
dtype: float64

### eq , ne , gt , ge , lt , le

In [987]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
name = ['Ali', 'Sara', 'Taha']
dars = ['Python', 'C++', 'Java']

In [988]:
df = pd.DataFrame(d, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [989]:
df == 12

Unnamed: 0,Python,C++,Java
Ali,True,False,False
Sara,False,False,False
Taha,True,False,False


In [990]:
df.eq(12)

Unnamed: 0,Python,C++,Java
Ali,True,False,False
Sara,False,False,False
Taha,True,False,False


In [991]:
df != 20

Unnamed: 0,Python,C++,Java
Ali,True,False,True
Sara,True,True,True
Taha,True,True,True


In [992]:
df.ne(20)

Unnamed: 0,Python,C++,Java
Ali,True,False,True
Sara,True,True,True
Taha,True,True,True


In [993]:
df.gt(12)

Unnamed: 0,Python,C++,Java
Ali,False,True,True
Sara,True,True,False
Taha,False,False,True


In [994]:
df.ge(12)

Unnamed: 0,Python,C++,Java
Ali,True,True,True
Sara,True,True,False
Taha,True,False,True


In [995]:
df.lt(14)

Unnamed: 0,Python,C++,Java
Ali,True,False,False
Sara,True,False,True
Taha,True,True,False


In [996]:
df.le(14)

Unnamed: 0,Python,C++,Java
Ali,True,False,False
Sara,True,True,True
Taha,True,True,False


### max,idxmax & min,idxmin (axis)

In [997]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
name = ['Ali', 'Sara', 'Taha']
dars = ['Python', 'C++', 'Java']

In [998]:
df = pd.DataFrame(d, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [999]:
df.max()

Python    13
C++       20
Java      19
dtype: int64

In [1000]:
df.idxmax()

Python    Sara
C++        Ali
Java      Taha
dtype: object

In [1001]:
df.max(axis=1)

Ali     20
Sara    14
Taha    19
dtype: int64

In [1002]:
df.idxmax(axis=1)

Ali      C++
Sara     C++
Taha    Java
dtype: object

In [1003]:
#---------------

In [1004]:
df.min()

Python    12
C++        8
Java       6
dtype: int64

In [1005]:
df.idxmin()

Python     Ali
C++       Taha
Java      Sara
dtype: object

In [1006]:
df.min(axis=1)

Ali     12
Sara     6
Taha     8
dtype: int64

In [1007]:
df.idxmin(axis=1)

Ali     Python
Sara      Java
Taha       C++
dtype: object

### cumsum , cumprod , cummax , cummin (axis, skipna)

In [1008]:
d = [[12, 20, 18], [13, 14, 6], [np.nan, 8, 19], [20, 16, 9]]
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
dars = ['Python', 'C++', 'Java']

In [1009]:
df = pd.DataFrame(d, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12.0,20,18
Sara,13.0,14,6
Taha,,8,19
Mahsa,20.0,16,9


In [1010]:
df.cumsum()

Unnamed: 0,Python,C++,Java
Ali,12.0,20,18
Sara,25.0,34,24
Taha,,42,43
Mahsa,45.0,58,52


In [1011]:
df.cumsum(skipna=False)

Unnamed: 0,Python,C++,Java
Ali,12.0,20,18
Sara,25.0,34,24
Taha,,42,43
Mahsa,,58,52


In [1012]:
df.cumsum(axis=1)

Unnamed: 0,Python,C++,Java
Ali,12.0,32.0,50.0
Sara,13.0,27.0,33.0
Taha,,8.0,27.0
Mahsa,20.0,36.0,45.0


In [1013]:
#---------------

In [1014]:
df.cumprod()

Unnamed: 0,Python,C++,Java
Ali,12.0,20,18
Sara,156.0,280,108
Taha,,2240,2052
Mahsa,3120.0,35840,18468


In [1015]:
df.cumprod(axis=1)

Unnamed: 0,Python,C++,Java
Ali,12.0,240.0,4320.0
Sara,13.0,182.0,1092.0
Taha,,8.0,152.0
Mahsa,20.0,320.0,2880.0


In [1016]:
#---------------

In [1017]:
df.cummax()

Unnamed: 0,Python,C++,Java
Ali,12.0,20,18
Sara,13.0,20,18
Taha,,20,19
Mahsa,20.0,20,19


In [1018]:
df.cummax(axis=1)

Unnamed: 0,Python,C++,Java
Ali,12.0,20.0,20.0
Sara,13.0,14.0,14.0
Taha,,8.0,19.0
Mahsa,20.0,20.0,20.0


In [1019]:
#---------------

In [1020]:
df.cummin()

Unnamed: 0,Python,C++,Java
Ali,12.0,20,18
Sara,12.0,14,6
Taha,,8,6
Mahsa,12.0,8,6


In [1021]:
df.cummin(axis=1)

Unnamed: 0,Python,C++,Java
Ali,12.0,12.0,12.0
Sara,13.0,13.0,6.0
Taha,,8.0,8.0
Mahsa,20.0,16.0,9.0


## change structure

### Transpose & ustack

In [1022]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
dars = ['Python', 'C++', 'Java']

In [1023]:
df = pd.DataFrame(d, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [1024]:
df.T

Unnamed: 0,Ali,Sara,Taha,Mahsa
Python,12,13,12,20
C++,20,14,8,16
Java,18,6,19,9


In [1025]:
df.transpose()

Unnamed: 0,Ali,Sara,Taha,Mahsa
Python,12,13,12,20
C++,20,14,8,16
Java,18,6,19,9


In [1026]:
#---------------

In [1027]:
pd.DataFrame([['Omid', 'boy'], ['Sara', 'girl'], ['Taha', 'boy']])

Unnamed: 0,0,1
0,Omid,boy
1,Sara,girl
2,Taha,boy


In [1028]:
pd.DataFrame([['Omid', 'Sara', 'Taha'], ['boy', 'girl', 'boy']]).T

Unnamed: 0,0,1
0,Omid,boy
1,Sara,girl
2,Taha,boy


unstack()

In [1029]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [1030]:
df.stack()

Ali    Python    12
       C++       20
       Java      18
Sara   Python    13
       C++       14
       Java       6
Taha   Python    12
       C++        8
       Java      19
Mahsa  Python    20
       C++       16
       Java       9
dtype: int64

In [1031]:
s = df.stack()
display(type(s), s.index)

pandas.core.series.Series

MultiIndex([(  'Ali', 'Python'),
            (  'Ali',    'C++'),
            (  'Ali',   'Java'),
            ( 'Sara', 'Python'),
            ( 'Sara',    'C++'),
            ( 'Sara',   'Java'),
            ( 'Taha', 'Python'),
            ( 'Taha',    'C++'),
            ( 'Taha',   'Java'),
            ('Mahsa', 'Python'),
            ('Mahsa',    'C++'),
            ('Mahsa',   'Java')],
           )

In [1032]:
s

Ali    Python    12
       C++       20
       Java      18
Sara   Python    13
       C++       14
       Java       6
Taha   Python    12
       C++        8
       Java      19
Mahsa  Python    20
       C++       16
       Java       9
dtype: int64

In [1033]:
s.unstack()

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19
Mahsa,20,16,9


In [1034]:
s.swaplevel()

Python  Ali      12
C++     Ali      20
Java    Ali      18
Python  Sara     13
C++     Sara     14
Java    Sara      6
Python  Taha     12
C++     Taha      8
Java    Taha     19
Python  Mahsa    20
C++     Mahsa    16
Java    Mahsa     9
dtype: int64

In [1035]:
s.swaplevel().unstack()

Unnamed: 0,Ali,Sara,Taha,Mahsa
Python,12,13,12,20
C++,20,14,8,16
Java,18,6,19,9


### pivot (columns, index, values)

In [1036]:
d = {'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
     'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
     'baz': [1, 2, 3, 4, 5, 6],
     'zoo': ['x', 'y', 'z', 'q', 'w', 't']}

In [1037]:
df = pd.DataFrame(d); df

Unnamed: 0,foo,bar,baz,zoo
0,one,A,1,x
1,one,B,2,y
2,one,C,3,z
3,two,A,4,q
4,two,B,5,w
5,two,C,6,t


In [1038]:
df.pivot(index='foo', columns='bar', values='baz')

bar,A,B,C
foo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,1,2,3
two,4,5,6


In [1039]:
df.pivot(index='foo', columns='bar', values=['baz', 'zoo'])

Unnamed: 0_level_0,baz,baz,baz,zoo,zoo,zoo
bar,A,B,C,A,B,C
foo,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
one,1,2,3,x,y,z
two,4,5,6,q,w,t


In [1040]:
#---------------

In [1041]:
df = pd.DataFrame(d).set_index('foo'); df

Unnamed: 0_level_0,bar,baz,zoo
foo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,1,x
one,B,2,y
one,C,3,z
two,A,4,q
two,B,5,w
two,C,6,t


In [1042]:
df.pivot(columns='bar', values='baz')

bar,A,B,C
foo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,1,2,3
two,4,5,6


In [1043]:
df.pivot(columns='bar', values=['baz', 'zoo'])

Unnamed: 0_level_0,baz,baz,baz,zoo,zoo,zoo
bar,A,B,C,A,B,C
foo,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
one,1,2,3,x,y,z
two,4,5,6,q,w,t


..........

In [1044]:
d = {"lev1":   [1, 1, 1, 2, 2, 2],
     "lev2":   [1, 1, 2, 1, 1, 2],
     "lev3":   [1, 2, 1, 2, 1, 2],
     "lev4":   [1, 2, 3, 4, 5, 6],
     "values": [0, 1, 2, 3, 4, 5]}

In [1045]:
df = pd.DataFrame(d); df

Unnamed: 0,lev1,lev2,lev3,lev4,values
0,1,1,1,1,0
1,1,1,2,2,1
2,1,2,1,3,2
3,2,1,2,4,3
4,2,1,1,5,4
5,2,2,2,6,5


In [1046]:
df.pivot(index="lev1", columns=["lev2", "lev3"], values="values")

lev2,1,1,2,2
lev3,1,2,1,2
lev1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,0.0,1.0,2.0,
2,4.0,3.0,,5.0


In [1047]:
df.pivot(index=["lev1", "lev2"], columns="lev3", values="values")

Unnamed: 0_level_0,lev3,1,2
lev1,lev2,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,0.0,1.0
1,2,2.0,
2,1,4.0,3.0
2,2,,5.0


..........

In [1048]:
d = {"A": ['one', 'one', 'one', 'two', 'two', 'two'],
     "B": [1, 1, 2, 1, 1, 2],
     "C": [1, 2, 1, 2, 1, 2],
     "V": [0, 1, 2, 3, 4, 5]}

In [1049]:
df = pd.DataFrame(d); df

Unnamed: 0,A,B,C,V
0,one,1,1,0
1,one,1,2,1
2,one,2,1,2
3,two,1,2,3
4,two,1,1,4
5,two,2,2,5


In [1050]:
df.pivot(index="A", columns=["B", "C"], values="V")

B,1,1,2,2
C,1,2,1,2
A,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
one,0.0,1.0,2.0,
two,4.0,3.0,,5.0


In [1051]:
df.pivot(index=["A", "B"], columns="C", values="V")

Unnamed: 0_level_0,C,1,2
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,1,0.0,1.0
one,2,2.0,
two,1,4.0,3.0
two,2,,5.0


### melt (id_vars, value_vars, var_name, value_name, col_level, ignore_index)

In [1052]:
d = {'Name': ['Ali', 'Sara', 'Mahsa'], 'C++': [12, 13, 20], 'Python': [14, 16, 8]}
df = pd.DataFrame(d); df

Unnamed: 0,Name,C++,Python
0,Ali,12,14
1,Sara,13,16
2,Mahsa,20,8


In [1053]:
df.melt()                    

Unnamed: 0,variable,value
0,Name,Ali
1,Name,Sara
2,Name,Mahsa
3,C++,12
4,C++,13
5,C++,20
6,Python,14
7,Python,16
8,Python,8


In [1054]:
df.melt(id_vars=['Name'])                    

Unnamed: 0,Name,variable,value
0,Ali,C++,12
1,Sara,C++,13
2,Mahsa,C++,20
3,Ali,Python,14
4,Sara,Python,16
5,Mahsa,Python,8


In [1055]:
df.melt(id_vars=['Name'], value_vars=['C++'])

Unnamed: 0,Name,variable,value
0,Ali,C++,12
1,Sara,C++,13
2,Mahsa,C++,20


In [1056]:
df.melt(id_vars=['Name'], value_vars=['C++'], var_name='Dars', value_name='Score')

Unnamed: 0,Name,Dars,Score
0,Ali,C++,12
1,Sara,C++,13
2,Mahsa,C++,20


In [1057]:
df.melt(id_vars=['Name'], value_vars=['C++', 'Python'], var_name='Dars', value_name='Score')

Unnamed: 0,Name,Dars,Score
0,Ali,C++,12
1,Sara,C++,13
2,Mahsa,C++,20
3,Ali,Python,14
4,Sara,Python,16
5,Mahsa,Python,8


In [1058]:
#---------------

In [1059]:
m = pd.melt(df, id_vars=['Name']); m

Unnamed: 0,Name,variable,value
0,Ali,C++,12
1,Sara,C++,13
2,Mahsa,C++,20
3,Ali,Python,14
4,Sara,Python,16
5,Mahsa,Python,8


In [1060]:
p = m.pivot(index='Name', columns='variable', values='value'); p

variable,C++,Python
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ali,12,14
Mahsa,20,8
Sara,13,16


In [1061]:
p.reset_index()

variable,Name,C++,Python
0,Ali,12,14
1,Mahsa,20,8
2,Sara,13,16


In [1062]:
df

Unnamed: 0,Name,C++,Python
0,Ali,12,14
1,Sara,13,16
2,Mahsa,20,8


MultiColumns

In [1063]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
i = ['Ali', 'Sara', 'Taha']
c = [['a', 'b', 'c'], ['Python', 'C++', 'Java']]

In [1064]:
df = pd.DataFrame(d, i, c); df

Unnamed: 0_level_0,a,b,c
Unnamed: 0_level_1,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [1065]:
df.melt()  

Unnamed: 0,variable_0,variable_1,value
0,a,Python,12
1,a,Python,13
2,a,Python,12
3,b,C++,20
4,b,C++,14
5,b,C++,8
6,c,Java,18
7,c,Java,6
8,c,Java,19


In [1066]:
df.melt(var_name=['sath', 'Dars'], ignore_index=False)  

Unnamed: 0,sath,Dars,value
Ali,a,Python,12
Sara,a,Python,13
Taha,a,Python,12
Ali,b,C++,20
Sara,b,C++,14
Taha,b,C++,8
Ali,c,Java,18
Sara,c,Java,6
Taha,c,Java,19


In [1067]:
df.melt(col_level=0)  

Unnamed: 0,variable,value
0,a,12
1,a,13
2,a,12
3,b,20
4,b,14
5,b,8
6,c,18
7,c,6
8,c,19


In [1068]:
df.melt(col_level=1)  

Unnamed: 0,variable,value
0,Python,12
1,Python,13
2,Python,12
3,C++,20
4,C++,14
5,C++,8
6,Java,18
7,Java,6
8,Java,19


In [1069]:
df.melt(col_level=1, value_vars=['C++', 'Java'], var_name='Dars', value_name='Score', ignore_index=False)  

Unnamed: 0,Dars,Score
Ali,C++,20
Sara,C++,14
Taha,C++,8
Ali,Java,18
Sara,Java,6
Taha,Java,19


MultiIndex

In [1070]:
d = [[12, 20], [13, 14], [12, 8]]
i = [['Ali', 'Sara', 'Taha'], ['b', 'g', 'b']]
c = ['Python', 'C++']

In [1071]:
df = pd.DataFrame(d, i, c); df

Unnamed: 0,Unnamed: 1,Python,C++
Ali,b,12,20
Sara,g,13,14
Taha,b,12,8


In [1072]:
df.melt()  

Unnamed: 0,variable,value
0,Python,12
1,Python,13
2,Python,12
3,C++,20
4,C++,14
5,C++,8


In [1073]:
df.melt(ignore_index=False)  

Unnamed: 0,Unnamed: 1,variable,value
Ali,b,Python,12
Sara,g,Python,13
Taha,b,Python,12
Ali,b,C++,20
Sara,g,C++,14
Taha,b,C++,8


In [1074]:
#---------------

In [1075]:
df.index.names = ['Name', 'Gen']; df

Unnamed: 0_level_0,Unnamed: 1_level_0,Python,C++
Name,Gen,Unnamed: 2_level_1,Unnamed: 3_level_1
Ali,b,12,20
Sara,g,13,14
Taha,b,12,8


In [1076]:
df.melt(ignore_index=False)  

Unnamed: 0_level_0,Unnamed: 1_level_0,variable,value
Name,Gen,Unnamed: 2_level_1,Unnamed: 3_level_1
Ali,b,Python,12
Sara,g,Python,13
Taha,b,Python,12
Ali,b,C++,20
Sara,g,C++,14
Taha,b,C++,8


In [1077]:
#df.melt(id_vars='Name')        KeyError: 'Name'

In [1078]:
df.reset_index(level=0, inplace=True); df

Unnamed: 0_level_0,Name,Python,C++
Gen,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
b,Ali,12,20
g,Sara,13,14
b,Taha,12,8


In [1079]:
df.melt(id_vars='Name', ignore_index=False)     

Unnamed: 0_level_0,Name,variable,value
Gen,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
b,Ali,Python,12
g,Sara,Python,13
b,Taha,Python,12
b,Ali,C++,20
g,Sara,C++,14
b,Taha,C++,8


In [1080]:
df.melt(id_vars='Name', value_vars=['C++'], var_name='Dars', value_name='Score', ignore_index=False)     

Unnamed: 0_level_0,Name,Dars,Score
Gen,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
b,Ali,C++,20
g,Sara,C++,14
b,Taha,C++,8


In [1081]:
#---------------

In [1082]:
df.reset_index(inplace=True); df

Unnamed: 0,Gen,Name,Python,C++
0,b,Ali,12,20
1,g,Sara,13,14
2,b,Taha,12,8


In [1083]:
df.melt()  

Unnamed: 0,variable,value
0,Gen,b
1,Gen,g
2,Gen,b
3,Name,Ali
4,Name,Sara
5,Name,Taha
6,Python,12
7,Python,13
8,Python,12
9,C++,20


In [1084]:
df.melt(id_vars=['Name'])                    

Unnamed: 0,Name,variable,value
0,Ali,Gen,b
1,Sara,Gen,g
2,Taha,Gen,b
3,Ali,Python,12
4,Sara,Python,13
5,Taha,Python,12
6,Ali,C++,20
7,Sara,C++,14
8,Taha,C++,8


In [1085]:
df.melt(id_vars=['Name', 'Gen'])                    

Unnamed: 0,Name,Gen,variable,value
0,Ali,b,Python,12
1,Sara,g,Python,13
2,Taha,b,Python,12
3,Ali,b,C++,20
4,Sara,g,C++,14
5,Taha,b,C++,8


In [1086]:
df.melt(id_vars=['Name', 'Gen', 'Python'])                    

Unnamed: 0,Name,Gen,Python,variable,value
0,Ali,b,12,C++,20
1,Sara,g,13,C++,14
2,Taha,b,12,C++,8


In [1087]:
df.melt(id_vars=['Name', 'Gen', 'Python', 'C++'])                    

Unnamed: 0,Name,Gen,Python,C++,variable,value


## applying function

### apply (func, axis, args, kwargs)

Apply a function along an axis of the DataFrame.

In [1088]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
name = ['Ali', 'Sara', 'Taha']
dars = ['Python', 'C++', 'Java']

In [1089]:
df = pd.DataFrame(d, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [1090]:
df.apply(min)

Python    12
C++        8
Java       6
dtype: int64

In [1091]:
df.apply(min, axis=1)

Ali     12
Sara     6
Taha     8
dtype: int64

In [1092]:
#---------------

In [1093]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [1094]:
df.apply(sum)

Python    37
C++       42
Java      43
dtype: int64

In [1095]:
df.apply(sum, axis=1)

Ali     50
Sara    33
Taha    39
dtype: int64

In [1096]:
#---------------

In [1097]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [1098]:
f = lambda x: x.min()

In [1099]:
df.apply(f)

Python    12
C++        8
Java       6
dtype: int64

In [1100]:
df.apply(f, axis=1)

Ali     12
Sara     6
Taha     8
dtype: int64

In [1101]:
#---------------

In [1102]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [1103]:
f = lambda x: x-2
df.apply(f)

Unnamed: 0,Python,C++,Java
Ali,10,18,16
Sara,11,12,4
Taha,10,6,17


In [1104]:
f = lambda x, y: x-y
df.apply(f, args=(2,))

Unnamed: 0,Python,C++,Java
Ali,10,18,16
Sara,11,12,4
Taha,10,6,17


In [1105]:
#---------------

In [1106]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [1107]:
f = lambda x: pd.Series([x.min(), x.max()], index=['min', 'max'])

In [1108]:
df.apply(f)

Unnamed: 0,Python,C++,Java
min,12,8,6
max,13,20,19


In [1109]:
df.apply(f, axis=1)

Unnamed: 0,min,max
Ali,12,20
Sara,6,14
Taha,8,19


taghir dadane yek satr ya sotoon az dataframe

In [1110]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [1111]:
df['Python']

Ali     12
Sara    13
Taha    12
Name: Python, dtype: int64

In [1112]:
df['Python'].apply(lambda x: x-2)

Ali     10
Sara    11
Taha    10
Name: Python, dtype: int64

In [1113]:
#---------------

In [1114]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [1115]:
def f(x):
    if x >15:
        return 'A'
    elif x >= 10:
        return 'B'
    else:
        return 'C'        

In [1116]:
# tabe f besoorate onsor be onsor emal mishavad, pas ba map kar mikonad na apply

#df.apply(f)       ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [1117]:
df['C++'].apply(f)

Ali     A
Sara    B
Taha    C
Name: C++, dtype: object

In [1118]:
df['Java'].apply(f)

Ali     A
Sara    C
Taha    A
Name: Java, dtype: object

In [1119]:
df['Java'].apply(f).value_counts()

Java
A    2
C    1
Name: count, dtype: int64

In [1120]:
#---------------

In [1121]:
df.loc['Ali']

Python    12
C++       20
Java      18
Name: Ali, dtype: int64

In [1122]:
df.loc['Ali'].apply(lambda x: x - 2)

Python    10
C++       18
Java      16
Name: Ali, dtype: int64

In [1123]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [1124]:
df.loc['Ali'] = df.loc['Ali'].apply(lambda x: x - 2); df

Unnamed: 0,Python,C++,Java
Ali,10,18,16
Sara,13,14,6
Taha,12,8,19


In [1125]:
#---------------

In [1126]:
d = {'color':   {1: 'G', 2: 'F', 3: 'E', 4: 'I', 5: 'J', 6: 'J', 7: 'I', 8: 'H', 9: 'E', 10: 'H'},
     'clarity': {1: 1, 2: 2, 3: 4, 4: 3, 5: 1, 6: 5, 7: 6, 8: 2, 9: 3, 10: 4}}

In [1127]:
df = pd.DataFrame(d); df

Unnamed: 0,color,clarity
1,G,1
2,F,2
3,E,4
4,I,3
5,J,1
6,J,5
7,I,6
8,H,2
9,E,3
10,H,4


In [1128]:
df['color'] = df['color'].apply(list('JIHGFED').index); df

Unnamed: 0,color,clarity
1,3,1
2,4,2
3,5,4
4,1,3
5,0,1
6,0,5
7,1,6
8,2,2
9,5,3
10,2,4


### map (func, kwargs)

Apply a function to a Dataframe elementwise.

In [1129]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
name = ['Ali', 'Sara', 'Taha']
dars = ['Python', 'C++', 'Java']

In [1130]:
df = pd.DataFrame(d, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [1131]:
# df.map(min)        Error

In [1132]:
# df.map(sum)        Error

In [1133]:
df.map(lambda x: x-5)

Unnamed: 0,Python,C++,Java
Ali,7,15,13
Sara,8,9,1
Taha,7,3,14


In [1134]:
df.map(lambda x: '%.2f'%x)

Unnamed: 0,Python,C++,Java
Ali,12.0,20.0,18.0
Sara,13.0,14.0,6.0
Taha,12.0,8.0,19.0


In [1135]:
#df.apply(lambda x: '%.2f'%x)     TypeError: cannot convert the series to <class 'float'>

In [1136]:
#---------------

In [1137]:
df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [1138]:
def f(x):
    if x >15:
        return 'A'
    elif x >= 10:
        return 'B'
    else:
        return 'C'        

In [1139]:
df.map(f)

Unnamed: 0,Python,C++,Java
Ali,B,A,A
Sara,B,B,C
Taha,B,C,A


In [1140]:
# apply dar serie ha besoorate onsor be onsor kar mikonad va moshkeli nadarad.

df['Python'].apply(f)

Ali     B
Sara    B
Taha    B
Name: Python, dtype: object

In [1141]:
# tabe f besoorate onsor be onsor emal mishavad, pas ba map kar mikonad na apply
#df.apply(f)      ValueError

In [1142]:
#---------------

In [1143]:
df['Java'].map({6: 0})

Ali     NaN
Sara    0.0
Taha    NaN
Name: Java, dtype: float64

### transform (func, axis, args, kwargs)

Call func on self producing a DataFrame with the same axis shape as self.

Only perform transforming type operations.

In [1144]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
name = ['Ali', 'Sara', 'Taha']
dars = ['Python', 'C++', 'Java']

In [1145]:
df = pd.DataFrame(d, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [1146]:
df.transform(np.sqrt)

Unnamed: 0,Python,C++,Java
Ali,3.464102,4.472136,4.242641
Sara,3.605551,3.741657,2.44949
Taha,3.464102,2.828427,4.358899


In [1147]:
df.transform([np.sqrt])

Unnamed: 0_level_0,Python,C++,Java
Unnamed: 0_level_1,sqrt,sqrt,sqrt
Ali,3.464102,4.472136,4.242641
Sara,3.605551,3.741657,2.44949
Taha,3.464102,2.828427,4.358899


In [1148]:
df.transform([np.sqrt, np.square])

Unnamed: 0_level_0,Python,Python,C++,C++,Java,Java
Unnamed: 0_level_1,sqrt,square,sqrt,square,sqrt,square
Ali,3.464102,144,4.472136,400,4.242641,324
Sara,3.605551,169,3.741657,196,2.44949,36
Taha,3.464102,144,2.828427,64,4.358899,361


In [1149]:
df.transform([np.square, lambda x: x-5])

Unnamed: 0_level_0,Python,Python,C++,C++,Java,Java
Unnamed: 0_level_1,square,<lambda>,square,<lambda>,square,<lambda>
Ali,144,7,400,15,324,13
Sara,169,8,196,9,36,1
Taha,144,7,64,3,361,14


In [1150]:
df.transform([np.square, lambda x: x-5], axis=1)

Unnamed: 0,Unnamed: 1,Python,C++,Java
Ali,square,144,400,324
Ali,<lambda>,7,15,13
Sara,square,169,196,36
Sara,<lambda>,8,9,1
Taha,square,144,64,361
Taha,<lambda>,7,3,14


### agg (func, axis, args, kwargs)

Aggregate using one or more operations over the specified axis.

Only perform aggregating type operations.

In [1151]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
name = ['Ali', 'Sara', 'Taha']
dars = ['Python', 'C++', 'Java']

In [1152]:
df = pd.DataFrame(d, name, dars); df

Unnamed: 0,Python,C++,Java
Ali,12,20,18
Sara,13,14,6
Taha,12,8,19


In [1153]:
df.agg('min')

Python    12
C++        8
Java       6
dtype: int64

In [1154]:
df.T.agg('min')

Ali     12
Sara     6
Taha     8
dtype: int64

In [1155]:
df.agg('min', axis=1)

Ali     12
Sara     6
Taha     8
dtype: int64

In [1156]:
df.agg(['min'])

Unnamed: 0,Python,C++,Java
min,12,8,6


In [1157]:
df.agg(['min'], axis=1)

Unnamed: 0,min
Ali,12
Sara,6
Taha,8


In [1158]:
df.agg(['min', 'max', 'sum', 'average'])

Unnamed: 0,Python,C++,Java
min,12.0,8.0,6.0
max,13.0,20.0,19.0
sum,37.0,42.0,43.0
average,12.333333,14.0,14.333333


In [1159]:
df.agg(['min', 'max', 'sum', 'average'], axis=1)

Unnamed: 0,min,max,sum,average
Ali,12.0,20.0,50.0,16.666667
Sara,6.0,14.0,33.0,11.0
Taha,8.0,19.0,39.0,13.0


### combine (other, func, fill_value)

Perform column-wise combine with another DataFrame.

In [1160]:
f = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2
df1 = pd.DataFrame({'A': [5, 0], 'B': [2, 6]})
df2 = pd.DataFrame({'A': [1, 1], 'B': [4, 5]})

In [1161]:
display(df1, df2)

Unnamed: 0,A,B
0,5,2
1,0,6


Unnamed: 0,A,B
0,1,4
1,1,5


In [1162]:
df1.combine(df2, f)

Unnamed: 0,A,B
0,1,2
1,1,6


In [1163]:
df1.combine(df2, np.minimum)

Unnamed: 0,A,B
0,1,2
1,0,5


In [1164]:
#---------------

In [1165]:
f = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2
df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]})
df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})

In [1166]:
display(df1, df2)

Unnamed: 0,A,B
0,0,
1,0,4.0


Unnamed: 0,A,B
0,1,3
1,1,3


In [1167]:
df1.combine(df2, f, fill_value=1)

Unnamed: 0,A,B
0,0,1.0
1,0,4.0


In [1168]:
df1.combine(df2, f, fill_value=3)

Unnamed: 0,A,B
0,0,3.0
1,0,3.0


### groupby (by, axis, level, group_keys, dropna)

In [1169]:
df = pd.DataFrame({'Brand': ['BMW', 'BMW', 'Benz', 'Benz'], 'Max Speed': [220, 180, 230, 200]}); df

Unnamed: 0,Brand,Max Speed
0,BMW,220
1,BMW,180
2,Benz,230
3,Benz,200


In [1170]:
df.groupby('Brand').apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,Brand,Max Speed
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BMW,0,BMW,220
BMW,1,BMW,180
Benz,2,Benz,230
Benz,3,Benz,200


In [1171]:
df.groupby('Brand').describe()

Unnamed: 0_level_0,Max Speed,Max Speed,Max Speed,Max Speed,Max Speed,Max Speed,Max Speed,Max Speed
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Brand,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
BMW,2.0,200.0,28.284271,180.0,190.0,200.0,210.0,220.0
Benz,2.0,215.0,21.213203,200.0,207.5,215.0,222.5,230.0


In [1172]:
df.groupby(['Brand']).max()

Unnamed: 0_level_0,Max Speed
Brand,Unnamed: 1_level_1
BMW,220
Benz,230


In [1173]:
df.set_index('Brand', inplace=True); df

Unnamed: 0_level_0,Max Speed
Brand,Unnamed: 1_level_1
BMW,220
BMW,180
Benz,230
Benz,200


In [1174]:
df.groupby(['Brand']).max()

Unnamed: 0_level_0,Max Speed
Brand,Unnamed: 1_level_1
BMW,220
Benz,230


In [1175]:
df.groupby(level=0).max()

Unnamed: 0_level_0,Max Speed
Brand,Unnamed: 1_level_1
BMW,220
Benz,230


..........

In [1176]:
l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
df = pd.DataFrame(l, columns=["a", "b", "c"]); df

Unnamed: 0,a,b,c
0,1,2.0,3
1,1,,4
2,2,1.0,3
3,1,2.0,2


In [1177]:
df.groupby('b').apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c
b,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,2,2,1.0,3
2.0,0,1,2.0,3
2.0,3,1,2.0,2


In [1178]:
df.groupby('b', dropna=False).apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c
b,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,2,2,1.0,3
2.0,0,1,2.0,3
2.0,3,1,2.0,2
,1,1,,4


In [1179]:
df.groupby("b").sum()

Unnamed: 0_level_0,a,c
b,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,2,3
2.0,2,5


In [1180]:
df.groupby(by="b", dropna=False).sum()

Unnamed: 0_level_0,a,c
b,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,2,3
2.0,2,5
,1,4


In [1181]:
#---------------

In [1182]:
l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
df = pd.DataFrame(l, columns=["a", "b", "c"]); df

Unnamed: 0,a,b,c
0,a,12.0,12.0
1,,12.3,33.0
2,b,12.3,123.0
3,a,1.0,1.0


In [1183]:
df.groupby("a").sum()

Unnamed: 0_level_0,b,c
a,Unnamed: 1_level_1,Unnamed: 2_level_1
a,13.0,13.0
b,12.3,123.0


In [1184]:
df.groupby("a", dropna=False).sum()

Unnamed: 0_level_0,b,c
a,Unnamed: 1_level_1,Unnamed: 2_level_1
a,13.0,13.0
b,12.3,123.0
,12.3,33.0


..........

In [1185]:
d = {'key1' : ['ali', 'ali', 'ali', 'sara', 'sara', 'sara', 'sara'],
     'key2' : ['one', 'one', 'two', 'one', 'one', 'two', 'two'],
     'data' : [12, 16, 13, 20, 8, 17, 10]}

In [1186]:
df = pd.DataFrame(d); df

Unnamed: 0,key1,key2,data
0,ali,one,12
1,ali,one,16
2,ali,two,13
3,sara,one,20
4,sara,one,8
5,sara,two,17
6,sara,two,10


In [1187]:
df.groupby('key1').apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,key1,key2,data
key1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ali,0,ali,one,12
ali,1,ali,one,16
ali,2,ali,two,13
sara,3,sara,one,20
sara,4,sara,one,8
sara,5,sara,two,17
sara,6,sara,two,10


In [1188]:
df.groupby('key1').describe()

Unnamed: 0_level_0,data,data,data,data,data,data,data,data
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
key1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
ali,3.0,13.666667,2.081666,12.0,12.5,13.0,14.5,16.0
sara,4.0,13.75,5.678908,8.0,9.5,13.5,17.75,20.0


In [1189]:
df.groupby('key1').max()

Unnamed: 0_level_0,key2,data
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
ali,two,16
sara,two,20


In [1190]:
df.groupby('key1').max()[['data']]

Unnamed: 0_level_0,data
key1,Unnamed: 1_level_1
ali,16
sara,20


In [1191]:
df.groupby(['key1', 'key2']).apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,key1,key2,data
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ali,one,0,ali,one,12
ali,one,1,ali,one,16
ali,two,2,ali,two,13
sara,one,3,sara,one,20
sara,one,4,sara,one,8
sara,two,5,sara,two,17
sara,two,6,sara,two,10


In [1192]:
df.groupby(['key1', 'key2']).max()

Unnamed: 0_level_0,Unnamed: 1_level_0,data
key1,key2,Unnamed: 2_level_1
ali,one,16
ali,two,13
sara,one,20
sara,two,17


In [1193]:
#---------------

In [1194]:
df

Unnamed: 0,key1,key2,data
0,ali,one,12
1,ali,one,16
2,ali,two,13
3,sara,one,20
4,sara,one,8
5,sara,two,17
6,sara,two,10


In [1195]:
df['key1']

0     ali
1     ali
2     ali
3    sara
4    sara
5    sara
6    sara
Name: key1, dtype: object

In [1196]:
df['data']

0    12
1    16
2    13
3    20
4     8
5    17
6    10
Name: data, dtype: int64

In [1197]:
df['data'].groupby(df['key1']).apply(lambda x: x)

key1   
ali   0    12
      1    16
      2    13
sara  3    20
      4     8
      5    17
      6    10
Name: data, dtype: int64

In [1198]:
df.groupby(df['key1']).apply(lambda x: x)['data']

key1   
ali   0    12
      1    16
      2    13
sara  3    20
      4     8
      5    17
      6    10
Name: data, dtype: int64

In [1199]:
df[['data']]

Unnamed: 0,data
0,12
1,16
2,13
3,20
4,8
5,17
6,10


In [1200]:
df[['data']].groupby(df['key1']).apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,data
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
ali,0,12
ali,1,16
ali,2,13
sara,3,20
sara,4,8
sara,5,17
sara,6,10


In [1201]:
df.groupby(df['key1']).apply(lambda x: x)[['data']]

Unnamed: 0_level_0,Unnamed: 1_level_0,data
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
ali,0,12
ali,1,16
ali,2,13
sara,3,20
sara,4,8
sara,5,17
sara,6,10


In [1202]:
df['data'].groupby(df['key1']).max()

key1
ali     16
sara    20
Name: data, dtype: int64

In [1203]:
df[['data']].groupby(df['key1']).max()

Unnamed: 0_level_0,data
key1,Unnamed: 1_level_1
ali,16
sara,20


In [1204]:
df.groupby(df['key1']).max()[['data']]

Unnamed: 0_level_0,data
key1,Unnamed: 1_level_1
ali,16
sara,20


In [1205]:
#---------------

In [1206]:
df

Unnamed: 0,key1,key2,data
0,ali,one,12
1,ali,one,16
2,ali,two,13
3,sara,one,20
4,sara,one,8
5,sara,two,17
6,sara,two,10


In [1207]:
df[df['key1']=='ali']

Unnamed: 0,key1,key2,data
0,ali,one,12
1,ali,one,16
2,ali,two,13


In [1208]:
l = list(df.groupby('key1'))
display(l, l[0][1])

[('ali',
    key1 key2  data
  0  ali  one    12
  1  ali  one    16
  2  ali  two    13),
 ('sara',
     key1 key2  data
  3  sara  one    20
  4  sara  one     8
  5  sara  two    17
  6  sara  two    10)]

Unnamed: 0,key1,key2,data
0,ali,one,12
1,ali,one,16
2,ali,two,13


In [1209]:
d = dict(list(df.groupby('key1')))
display(d, d['ali'])

{'ali':   key1 key2  data
 0  ali  one    12
 1  ali  one    16
 2  ali  two    13,
 'sara':    key1 key2  data
 3  sara  one    20
 4  sara  one     8
 5  sara  two    17
 6  sara  two    10}

Unnamed: 0,key1,key2,data
0,ali,one,12
1,ali,one,16
2,ali,two,13


..........

In [1210]:
a = [5, 6, 7, 8, 9, 10, 11, 12, 13]
b = [15, 16, 17, 18, 19, 20, 21, 22, 23]

In [1211]:
df = pd.DataFrame({'col1': a, 'col2': b}); df

Unnamed: 0,col1,col2
0,5,15
1,6,16
2,7,17
3,8,18
4,9,19
5,10,20
6,11,21
7,12,22
8,13,23


In [1212]:
q = pd.cut(df['col1'], 4); q

0    (4.992, 7.0]
1    (4.992, 7.0]
2    (4.992, 7.0]
3      (7.0, 9.0]
4      (7.0, 9.0]
5     (9.0, 11.0]
6     (9.0, 11.0]
7    (11.0, 13.0]
8    (11.0, 13.0]
Name: col1, dtype: category
Categories (4, interval[float64, right]): [(4.992, 7.0] < (7.0, 9.0] < (9.0, 11.0] < (11.0, 13.0]]

In [1213]:
df.groupby(q, observed=True).apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,col1,col2
col1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"(4.992, 7.0]",0,5,15
"(4.992, 7.0]",1,6,16
"(4.992, 7.0]",2,7,17
"(7.0, 9.0]",3,8,18
"(7.0, 9.0]",4,9,19
"(9.0, 11.0]",5,10,20
"(9.0, 11.0]",6,11,21
"(11.0, 13.0]",7,12,22
"(11.0, 13.0]",8,13,23


In [1214]:
df['col1'].groupby(q, observed=True).apply(lambda x: x)

col1           
(4.992, 7.0]  0     5
              1     6
              2     7
(7.0, 9.0]    3     8
              4     9
(9.0, 11.0]   5    10
              6    11
(11.0, 13.0]  7    12
              8    13
Name: col1, dtype: int64

In [1215]:
df['col2'].groupby(q, observed=True).apply(lambda x: x)

col1           
(4.992, 7.0]  0    15
              1    16
              2    17
(7.0, 9.0]    3    18
              4    19
(9.0, 11.0]   5    20
              6    21
(11.0, 13.0]  7    22
              8    23
Name: col2, dtype: int64

In [1216]:
df[['col2']].groupby(q, observed=True).apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,col2
col1,Unnamed: 1_level_1,Unnamed: 2_level_1
"(4.992, 7.0]",0,15
"(4.992, 7.0]",1,16
"(4.992, 7.0]",2,17
"(7.0, 9.0]",3,18
"(7.0, 9.0]",4,19
"(9.0, 11.0]",5,20
"(9.0, 11.0]",6,21
"(11.0, 13.0]",7,22
"(11.0, 13.0]",8,23


In [1217]:
#---------------

In [1218]:
f = lambda g: [g.max(), g.count()]

In [1219]:
df['col2'].groupby(q, observed=True).apply(f) 

col1
(4.992, 7.0]    [17, 3]
(7.0, 9.0]      [19, 2]
(9.0, 11.0]     [21, 2]
(11.0, 13.0]    [23, 2]
Name: col2, dtype: object

In [1220]:
#---------------

In [1221]:
myfunc = lambda g: {'max': g.max(), 'count': g.count()}

In [1222]:
df['col2'].groupby(q, observed=True).apply(myfunc) 

col1               
(4.992, 7.0]  max      17
              count     3
(7.0, 9.0]    max      19
              count     2
(9.0, 11.0]   max      21
              count     2
(11.0, 13.0]  max      23
              count     2
Name: col2, dtype: int64

In [1223]:
df['col2'].groupby(q, observed=True).apply(myfunc).unstack()

Unnamed: 0_level_0,max,count
col1,Unnamed: 1_level_1,Unnamed: 2_level_1
"(4.992, 7.0]",17,3
"(7.0, 9.0]",19,2
"(9.0, 11.0]",21,2
"(11.0, 13.0]",23,2


In [1224]:
df['col2'].groupby(q, observed=True).agg(['max', 'count'])

Unnamed: 0_level_0,max,count
col1,Unnamed: 1_level_1,Unnamed: 2_level_1
"(4.992, 7.0]",17,3
"(7.0, 9.0]",19,2
"(9.0, 11.0]",21,2
"(11.0, 13.0]",23,2


..........

In [1225]:
n = ['ali', 'ali', 'ali', 'ali', 'sara', 'sara', 'sara', 'taha', 'taha']
s = [11, 20, 13, 14, 15, 6, 12, 18, 19]

In [1226]:
df = pd.DataFrame({'name': n, 'score': s}); df

Unnamed: 0,name,score
0,ali,11
1,ali,20
2,ali,13
3,ali,14
4,sara,15
5,sara,6
6,sara,12
7,taha,18
8,taha,19


In [1227]:
df.groupby('name').apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,name,score
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ali,0,ali,11
ali,1,ali,20
ali,2,ali,13
ali,3,ali,14
sara,4,sara,15
sara,5,sara,6
sara,6,sara,12
taha,7,taha,18
taha,8,taha,19


In [1228]:
df.groupby('name').score.apply(lambda x: x)

name   
ali   0    11
      1    20
      2    13
      3    14
sara  4    15
      5     6
      6    12
taha  7    18
      8    19
Name: score, dtype: int64

In [1229]:
df.groupby('name')['score'].apply(lambda x: x)

name   
ali   0    11
      1    20
      2    13
      3    14
sara  4    15
      5     6
      6    12
taha  7    18
      8    19
Name: score, dtype: int64

In [1230]:
df.groupby('name')[['score']].apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,score
name,Unnamed: 1_level_1,Unnamed: 2_level_1
ali,0,11
ali,1,20
ali,2,13
ali,3,14
sara,4,15
sara,5,6
sara,6,12
taha,7,18
taha,8,19


In [1231]:
df.groupby('name').apply(lambda x: x)[['score']]

Unnamed: 0_level_0,Unnamed: 1_level_0,score
name,Unnamed: 1_level_1,Unnamed: 2_level_1
ali,0,11
ali,1,20
ali,2,13
ali,3,14
sara,4,15
sara,5,6
sara,6,12
taha,7,18
taha,8,19


In [1232]:
#---------------

In [1233]:
df.groupby('name').max()

Unnamed: 0_level_0,score
name,Unnamed: 1_level_1
ali,20
sara,15
taha,19


In [1234]:
df.groupby('name').score.max()

name
ali     20
sara    15
taha    19
Name: score, dtype: int64

In [1235]:
df.groupby('name')['score'].max()

name
ali     20
sara    15
taha    19
Name: score, dtype: int64

In [1236]:
df.groupby('name')[['score']].max()

Unnamed: 0_level_0,score
name,Unnamed: 1_level_1
ali,20
sara,15
taha,19


transform

In [1237]:
df

Unnamed: 0,name,score
0,ali,11
1,ali,20
2,ali,13
3,ali,14
4,sara,15
5,sara,6
6,sara,12
7,taha,18
8,taha,19


In [1238]:
df.groupby('name').count()

Unnamed: 0_level_0,score
name,Unnamed: 1_level_1
ali,4
sara,3
taha,2


In [1239]:
df.groupby('name').apply('count')

Unnamed: 0_level_0,score
name,Unnamed: 1_level_1
ali,4
sara,3
taha,2


In [1240]:
df.groupby('name').transform('count')

Unnamed: 0,score
0,4
1,4
2,4
3,4
4,3
5,3
6,3
7,2
8,2


In [1241]:
#---------------

In [1242]:
df.groupby('name').transform('max')

Unnamed: 0,score
0,20
1,20
2,20
3,20
4,15
5,15
6,15
7,19
8,19


In [1243]:
df.groupby('name')[['score']].transform('max')

Unnamed: 0,score
0,20
1,20
2,20
3,20
4,15
5,15
6,15
7,19
8,19


In [1244]:
df.groupby('name')['score'].transform('max')

0    20
1    20
2    20
3    20
4    15
5    15
6    15
7    19
8    19
Name: score, dtype: int64

In [1245]:
#---------------

In [1246]:
df.groupby('name').transform(lambda x: x.max())

Unnamed: 0,score
0,20
1,20
2,20
3,20
4,15
5,15
6,15
7,19
8,19


In [1247]:
df.groupby('name').transform(lambda x: x)

Unnamed: 0,score
0,11
1,20
2,13
3,14
4,15
5,6
6,12
7,18
8,19


In [1248]:
df.groupby('name').transform(lambda x: x - 1)

Unnamed: 0,score
0,10
1,19
2,12
3,13
4,14
5,5
6,11
7,17
8,18


In [1249]:
#---------------

In [1250]:
g = df.groupby('name')['score']
(df['score'] - g.transform('mean')) / g.transform('std')

0   -0.903696
1    1.420094
2   -0.387298
3   -0.129099
4    0.872872
5   -1.091089
6    0.218218
7   -0.707107
8    0.707107
Name: score, dtype: float64

In [1251]:
g = df.groupby('name')['score']
(g.transform(lambda x: x) - g.transform('mean')) / g.transform('std')

0   -0.903696
1    1.420094
2   -0.387298
3   -0.129099
4    0.872872
5   -1.091089
6    0.218218
7   -0.707107
8    0.707107
Name: score, dtype: float64

...........

In [1252]:
n = ['ali', 'ali', 'ali', 'ali', 'sara', 'sara', 'sara', 'taha', 'taha']
r = [7, 4, 9, 2, 4, 5, 1, 8, 9]
s = [11, 20, 13, 14, 15, 6, 12, 18, 19]

In [1253]:
df = pd.DataFrame({'name': n, 'rank': r, 'score': s}); df

Unnamed: 0,name,rank,score
0,ali,7,11
1,ali,4,20
2,ali,9,13
3,ali,2,14
4,sara,4,15
5,sara,5,6
6,sara,1,12
7,taha,8,18
8,taha,9,19


In [1254]:
df.groupby('name').apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,name,rank,score
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ali,0,ali,7,11
ali,1,ali,4,20
ali,2,ali,9,13
ali,3,ali,2,14
sara,4,sara,4,15
sara,5,sara,5,6
sara,6,sara,1,12
taha,7,taha,8,18
taha,8,taha,9,19


In [1255]:
df.groupby('name').score.apply(lambda x: x)

name   
ali   0    11
      1    20
      2    13
      3    14
sara  4    15
      5     6
      6    12
taha  7    18
      8    19
Name: score, dtype: int64

In [1256]:
df.groupby('name')['score'].apply(lambda x: x)

name   
ali   0    11
      1    20
      2    13
      3    14
sara  4    15
      5     6
      6    12
taha  7    18
      8    19
Name: score, dtype: int64

In [1257]:
df.groupby('name')[['score']].apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,score
name,Unnamed: 1_level_1,Unnamed: 2_level_1
ali,0,11
ali,1,20
ali,2,13
ali,3,14
sara,4,15
sara,5,6
sara,6,12
taha,7,18
taha,8,19


In [1258]:
df.groupby('name')[['score']].sum()

Unnamed: 0_level_0,score
name,Unnamed: 1_level_1
ali,58
sara,33
taha,37


In [1259]:
df.groupby('name')['rank'].apply(lambda x: x)

Flushing oldest 200 entries.
  warn('Output cache limit (currently {sz} entries) hit.\n'


name   
ali   0    7
      1    4
      2    9
      3    2
sara  4    4
      5    5
      6    1
taha  7    8
      8    9
Name: rank, dtype: int64

In [1260]:
df.groupby('name')[['rank']].apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,rank
name,Unnamed: 1_level_1,Unnamed: 2_level_1
ali,0,7
ali,1,4
ali,2,9
ali,3,2
sara,4,4
sara,5,5
sara,6,1
taha,7,8
taha,8,9


In [1261]:
df.groupby('name')[['rank', 'score']].apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,rank,score
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ali,0,7,11
ali,1,4,20
ali,2,9,13
ali,3,2,14
sara,4,4,15
sara,5,5,6
sara,6,1,12
taha,7,8,18
taha,8,9,19


In [1262]:
#---------------

In [1263]:
df.groupby('name').transform('max')

Unnamed: 0,rank,score
0,9,20
1,9,20
2,9,20
3,9,20
4,5,15
5,5,15
6,5,15
7,9,19
8,9,19


In [1264]:
df.groupby('name')['rank'].transform('max')

0    9
1    9
2    9
3    9
4    5
5    5
6    5
7    9
8    9
Name: rank, dtype: int64

In [1265]:
df.groupby('name')[['rank']].transform('max')

Unnamed: 0,rank
0,9
1,9
2,9
3,9
4,5
5,5
6,5
7,9
8,9


In [1266]:
df.groupby('name')[['rank']].transform('mean')

Unnamed: 0,rank
0,5.5
1,5.5
2,5.5
3,5.5
4,3.333333
5,3.333333
6,3.333333
7,8.5
8,8.5


In [1267]:
df

Unnamed: 0,name,rank,score
0,ali,7,11
1,ali,4,20
2,ali,9,13
3,ali,2,14
4,sara,4,15
5,sara,5,6
6,sara,1,12
7,taha,8,18
8,taha,9,19


In [1268]:
df.groupby('name')[['rank']].transform(lambda x: x + 10)

Unnamed: 0,rank
0,17
1,14
2,19
3,12
4,14
5,15
6,11
7,18
8,19


In [1269]:
df[['rank']] = df.groupby('name')[['rank']].transform(lambda x: x + 10); df

Unnamed: 0,name,rank,score
0,ali,17,11
1,ali,14,20
2,ali,19,13
3,ali,12,14
4,sara,14,15
5,sara,15,6
6,sara,11,12
7,taha,18,18
8,taha,19,19


In [1270]:
#---------------

In [1271]:
df

Unnamed: 0,name,rank,score
0,ali,17,11
1,ali,14,20
2,ali,19,13
3,ali,12,14
4,sara,14,15
5,sara,15,6
6,sara,11,12
7,taha,18,18
8,taha,19,19


In [1272]:
df.groupby('name').min()

Unnamed: 0_level_0,rank,score
name,Unnamed: 1_level_1,Unnamed: 2_level_1
ali,12,11
sara,11,6
taha,18,18


In [1273]:
df.groupby('name').agg('min')

Unnamed: 0_level_0,rank,score
name,Unnamed: 1_level_1,Unnamed: 2_level_1
ali,12,11
sara,11,6
taha,18,18


In [1274]:
df.groupby('name').apply(lambda x: x.min())

Unnamed: 0_level_0,name,rank,score
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ali,ali,12,11
sara,sara,11,6
taha,taha,18,18


In [1275]:
df.groupby('name').transform('min')

Unnamed: 0,rank,score
0,12,11
1,12,11
2,12,11
3,12,11
4,11,6
5,11,6
6,11,6
7,18,18
8,18,18


...........

In [1276]:
df = pd.DataFrame({'Bird' : ['A', 'A', 'B', 'B', 'B'],'Speed' : [380, 370, 24, 26,np.nan]}); df

Unnamed: 0,Bird,Speed
0,A,380.0
1,A,370.0
2,B,24.0
3,B,26.0
4,B,


In [1277]:
df.groupby('Bird').mean()

Unnamed: 0_level_0,Speed
Bird,Unnamed: 1_level_1
A,375.0
B,25.0


In [1278]:
df.groupby('Bird')['Speed'].apply(lambda x: x.fillna(x.mean()))

Bird   
A     0    380.0
      1    370.0
B     2     24.0
      3     26.0
      4     25.0
Name: Speed, dtype: float64

In [1279]:
df.groupby('Bird')['Speed'].transform(lambda x: x.fillna(x.mean()))

0    380.0
1    370.0
2     24.0
3     26.0
4     25.0
Name: Speed, dtype: float64

In [1280]:
df['Speed'] = df.groupby('Bird')['Speed'].transform(lambda x: x.fillna(x.mean())); df

Unnamed: 0,Bird,Speed
0,A,380.0
1,A,370.0
2,B,24.0
3,B,26.0
4,B,25.0


...........

In [1281]:
import seaborn as sns
df = sns.load_dataset('iris'); df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [1282]:
df.groupby(['species']).apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,sepal_length,sepal_width,petal_length,petal_width,species
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
setosa,0,5.1,3.5,1.4,0.2,setosa
setosa,1,4.9,3.0,1.4,0.2,setosa
setosa,2,4.7,3.2,1.3,0.2,setosa
setosa,3,4.6,3.1,1.5,0.2,setosa
setosa,4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...,...
virginica,145,6.7,3.0,5.2,2.3,virginica
virginica,146,6.3,2.5,5.0,1.9,virginica
virginica,147,6.5,3.0,5.2,2.0,virginica
virginica,148,6.2,3.4,5.4,2.3,virginica


In [1283]:
df.groupby(['species']).apply(lambda x: x[:3])

Unnamed: 0_level_0,Unnamed: 1_level_0,sepal_length,sepal_width,petal_length,petal_width,species
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
setosa,0,5.1,3.5,1.4,0.2,setosa
setosa,1,4.9,3.0,1.4,0.2,setosa
setosa,2,4.7,3.2,1.3,0.2,setosa
versicolor,50,7.0,3.2,4.7,1.4,versicolor
versicolor,51,6.4,3.2,4.5,1.5,versicolor
versicolor,52,6.9,3.1,4.9,1.5,versicolor
virginica,100,6.3,3.3,6.0,2.5,virginica
virginica,101,5.8,2.7,5.1,1.9,virginica
virginica,102,7.1,3.0,5.9,2.1,virginica


In [1284]:
#---------------

In [1285]:
f = lambda x, n=3: x[:n]

In [1286]:
df.groupby(['species']).apply(f)

Unnamed: 0_level_0,Unnamed: 1_level_0,sepal_length,sepal_width,petal_length,petal_width,species
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
setosa,0,5.1,3.5,1.4,0.2,setosa
setosa,1,4.9,3.0,1.4,0.2,setosa
setosa,2,4.7,3.2,1.3,0.2,setosa
versicolor,50,7.0,3.2,4.7,1.4,versicolor
versicolor,51,6.4,3.2,4.5,1.5,versicolor
versicolor,52,6.9,3.1,4.9,1.5,versicolor
virginica,100,6.3,3.3,6.0,2.5,virginica
virginica,101,5.8,2.7,5.1,1.9,virginica
virginica,102,7.1,3.0,5.9,2.1,virginica


In [1287]:
df.groupby(['species']).apply(f, 2)

Unnamed: 0_level_0,Unnamed: 1_level_0,sepal_length,sepal_width,petal_length,petal_width,species
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
setosa,0,5.1,3.5,1.4,0.2,setosa
setosa,1,4.9,3.0,1.4,0.2,setosa
versicolor,50,7.0,3.2,4.7,1.4,versicolor
versicolor,51,6.4,3.2,4.5,1.5,versicolor
virginica,100,6.3,3.3,6.0,2.5,virginica
virginica,101,5.8,2.7,5.1,1.9,virginica


In [1288]:
#---------------

In [1289]:
df.groupby(['species']).min()

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
setosa,4.3,2.3,1.0,0.1
versicolor,4.9,2.0,3.0,1.0
virginica,4.9,2.2,4.5,1.4


In [1290]:
df.groupby(['species']).agg('min')

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
setosa,4.3,2.3,1.0,0.1
versicolor,4.9,2.0,3.0,1.0
virginica,4.9,2.2,4.5,1.4


In [1291]:
df.groupby(['species']).apply(lambda x: x.min())

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width,species
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
setosa,4.3,2.3,1.0,0.1,setosa
versicolor,4.9,2.0,3.0,1.0,versicolor
virginica,4.9,2.2,4.5,1.4,virginica


In [1292]:
df.groupby(['species']).transform('min')

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,4.3,2.3,1.0,0.1
1,4.3,2.3,1.0,0.1
2,4.3,2.3,1.0,0.1
3,4.3,2.3,1.0,0.1
4,4.3,2.3,1.0,0.1
...,...,...,...,...
145,4.9,2.2,4.5,1.4
146,4.9,2.2,4.5,1.4
147,4.9,2.2,4.5,1.4
148,4.9,2.2,4.5,1.4


In [1293]:
#---------------

In [1294]:
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [1295]:
df.sort_values(by='sepal_length') 

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
13,4.3,3.0,1.1,0.1,setosa
42,4.4,3.2,1.3,0.2,setosa
38,4.4,3.0,1.3,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
41,4.5,2.3,1.3,0.3,setosa
...,...,...,...,...,...
122,7.7,2.8,6.7,2.0,virginica
118,7.7,2.6,6.9,2.3,virginica
117,7.7,3.8,6.7,2.2,virginica
135,7.7,3.0,6.1,2.3,virginica


In [1296]:
df.sort_values(by='sepal_length')[:6] 

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
13,4.3,3.0,1.1,0.1,setosa
42,4.4,3.2,1.3,0.2,setosa
38,4.4,3.0,1.3,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
41,4.5,2.3,1.3,0.3,setosa
22,4.6,3.6,1.0,0.2,setosa


In [1297]:
f = lambda frame, n=3: frame.sort_values(by='sepal_length')[:n]     

In [1298]:
f(df, 8)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
13,4.3,3.0,1.1,0.1,setosa
42,4.4,3.2,1.3,0.2,setosa
38,4.4,3.0,1.3,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
41,4.5,2.3,1.3,0.3,setosa
22,4.6,3.6,1.0,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
6,4.6,3.4,1.4,0.3,setosa


In [1299]:
df.groupby(['species']).apply(f)

Unnamed: 0_level_0,Unnamed: 1_level_0,sepal_length,sepal_width,petal_length,petal_width,species
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
setosa,13,4.3,3.0,1.1,0.1,setosa
setosa,8,4.4,2.9,1.4,0.2,setosa
setosa,38,4.4,3.0,1.3,0.2,setosa
versicolor,57,4.9,2.4,3.3,1.0,versicolor
versicolor,60,5.0,2.0,3.5,1.0,versicolor
versicolor,93,5.0,2.3,3.3,1.0,versicolor
virginica,106,4.9,2.5,4.5,1.7,virginica
virginica,121,5.6,2.8,4.9,2.0,virginica
virginica,113,5.7,2.5,5.0,2.0,virginica


In [1300]:
df.groupby(['species']).apply(f, 5)

Unnamed: 0_level_0,Unnamed: 1_level_0,sepal_length,sepal_width,petal_length,petal_width,species
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
setosa,13,4.3,3.0,1.1,0.1,setosa
setosa,8,4.4,2.9,1.4,0.2,setosa
setosa,38,4.4,3.0,1.3,0.2,setosa
setosa,42,4.4,3.2,1.3,0.2,setosa
setosa,41,4.5,2.3,1.3,0.3,setosa
versicolor,57,4.9,2.4,3.3,1.0,versicolor
versicolor,60,5.0,2.0,3.5,1.0,versicolor
versicolor,93,5.0,2.3,3.3,1.0,versicolor
versicolor,98,5.1,2.5,3.0,1.1,versicolor
versicolor,59,5.2,2.7,3.9,1.4,versicolor


MultiIndex

In [1301]:
l = [['BMW','BMW','Benz','Benz'], ['A','B','A','B']]
mi = pd.MultiIndex.from_arrays(l, names=('Brand', 'Class'))
df = pd.DataFrame({'Max Speed': [220, 180, 230, 200]}, index=mi); df

Unnamed: 0_level_0,Unnamed: 1_level_0,Max Speed
Brand,Class,Unnamed: 2_level_1
BMW,A,220
BMW,B,180
Benz,A,230
Benz,B,200


In [1302]:
df.groupby(level=0).apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Max Speed
Brand,Brand,Class,Unnamed: 3_level_1
BMW,BMW,A,220
BMW,BMW,B,180
Benz,Benz,A,230
Benz,Benz,B,200


In [1303]:
df.groupby(level=1).apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Max Speed
Class,Brand,Class,Unnamed: 3_level_1
A,BMW,A,220
A,Benz,A,230
B,BMW,B,180
B,Benz,B,200


In [1304]:
df.groupby(level=0).describe()

Unnamed: 0_level_0,Max Speed,Max Speed,Max Speed,Max Speed,Max Speed,Max Speed,Max Speed,Max Speed
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Brand,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
BMW,2.0,200.0,28.284271,180.0,190.0,200.0,210.0,220.0
Benz,2.0,215.0,21.213203,200.0,207.5,215.0,222.5,230.0


In [1305]:
df.groupby(level=0).mean()

Unnamed: 0_level_0,Max Speed
Brand,Unnamed: 1_level_1
BMW,200.0
Benz,215.0


In [1306]:
#---------------

In [1307]:
df.reset_index(('Brand', 'Class'), inplace=True); df

Unnamed: 0,Brand,Class,Max Speed
0,BMW,A,220
1,BMW,B,180
2,Benz,A,230
3,Benz,B,200


In [1308]:
df.groupby(['Brand', 'Class']).apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Brand,Class,Max Speed
Brand,Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BMW,A,0,BMW,A,220
BMW,B,1,BMW,B,180
Benz,A,2,Benz,A,230
Benz,B,3,Benz,B,200


In [1309]:
df.groupby('Brand').apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,Brand,Class,Max Speed
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BMW,0,BMW,A,220
BMW,1,BMW,B,180
Benz,2,Benz,A,230
Benz,3,Benz,B,200


In [1310]:
df.groupby("Class").describe()

Unnamed: 0_level_0,Max Speed,Max Speed,Max Speed,Max Speed,Max Speed,Max Speed,Max Speed,Max Speed
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Class,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
A,2.0,225.0,7.071068,220.0,222.5,225.0,227.5,230.0
B,2.0,190.0,14.142136,180.0,185.0,190.0,195.0,200.0


In [1311]:
df.groupby("Class").max()

Unnamed: 0_level_0,Brand,Max Speed
Class,Unnamed: 1_level_1,Unnamed: 2_level_1
A,Benz,230
B,Benz,200


In [1312]:
# MultiColumns

In [1313]:
arr = np.array([[11, 12, 16, 4, 15],[17, 2, 18, 19, 10],[7, 15, 13, 14, 11],[8, 17, 13, 20, 12]])
ci = pd.MultiIndex.from_arrays([[1, 2, 3, 1, 2],['Ali', 'Ali', 'Ali', 'Sara', 'Sara']],names=['X', 'Y'])

In [1314]:
df = pd.DataFrame(arr, columns=ci); df

X,1,2,3,1,2
Y,Ali,Ali,Ali,Sara,Sara
0,11,12,16,4,15
1,17,2,18,19,10
2,7,15,13,14,11
3,8,17,13,20,12


In [1315]:
df.groupby(level='Y', axis=1).max()

  df.groupby(level='Y', axis=1).max()


Y,Ali,Sara
0,16,15
1,18,19
2,15,14
3,17,20


In [1316]:
df.T

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3
X,Y,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,Ali,11,17,7,8
2,Ali,12,2,15,17
3,Ali,16,18,13,13
1,Sara,4,19,14,20
2,Sara,15,10,11,12


In [1317]:
df.T.groupby(level='Y').max()

Unnamed: 0_level_0,0,1,2,3
Y,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ali,16,18,15,17
Sara,15,19,14,20


In [1318]:
df.T.groupby(level='Y').max().T

Y,Ali,Sara
0,16,15
1,18,19
2,15,14
3,17,20
