#### [pandas.pydata.org](https://pandas.pydata.org/pandas-docs/stable/index.html)

In [None]:
import pandas as pd
import numpy as np
from numpy import nan as NaN 

# Intro

### DataFrame(data, index, columns)

In [None]:
score = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
pd.DataFrame(data=score)

In [None]:
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
pd.DataFrame(data=score, index=name)

In [None]:
dars = ['Python', 'C++', 'Java']
pd.DataFrame(data=score, columns=dars)

In [None]:
pd.DataFrame(data=score, index=name, columns=dars)

In [None]:
df = pd.DataFrame(score, name, dars); df

In [None]:
len(df)

In [None]:
df.shape

In [None]:
df.shape[0]

In [None]:
df.shape[1]

In [None]:
df.size

In [None]:
df.dtypes

In [None]:
df.values

In [None]:
df.index

In [None]:
df.columns

In [None]:
df.axes

In [None]:
df.index.name = 'name'
df

In [None]:
df.columns.name = 'dars'
df

In [None]:
df.T

In [None]:
df

In [None]:
df.unstack()

In [None]:
df.unstack().unstack()

In [None]:
df.describe()

In [None]:
df.info()

### MultiIndex

In [None]:
d = np.arange(12).reshape((4, 3))
i = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]]
c = [['Ohio', 'Colorado', 'Ohio'], ['Green', 'Red', 'Green']]

In [None]:
frame = pd.DataFrame(data=d, index=i, columns=c)
frame

In [None]:
frame.reset_index()

In [None]:
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
frame

In [None]:
frame.reset_index()

In [None]:
frame.index

In [None]:
frame.columns

In [None]:
frame.axes

In [None]:
frame

In [None]:
frame.swaplevel()

In [None]:
frame.T

In [None]:
frame.unstack()

pd.MultiIndex.from_arrays()

In [None]:
d = np.arange(12).reshape((4, 3))
i = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]]
c = [['Ohio', 'Colorado', 'Ohio'], ['Green', 'Red', 'Green']]

In [None]:
frame = pd.DataFrame(data=d, index=i, columns=c)
frame

In [None]:
mi = pd.MultiIndex.from_arrays(i, names=['key1', 'key2'])
mc = pd.MultiIndex.from_arrays(c, names=['state', 'color'])

In [None]:
frame = pd.DataFrame(data=d, index=mi, columns=mc)
frame

### data with dict {} 

In [None]:
# sotooni:

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}

In [None]:
pd.DataFrame(d)

In [None]:
#---------------

In [None]:
d = {'Python': [12, 13, 12],
     'C++':    [20, 14, 8],
     'Java':   [18, 6, 19]}
name = ['Ali', 'Sara', 'Taha']

In [None]:
pd.DataFrame(d)

In [None]:
pd.DataFrame(data=d, index=name)

In [None]:
#---------------

In [None]:
d = {'name':   ['Ali', 'Sara', 'Taha'],
     'Python': [12, 13, 12],
     'C++':    [20, 14, 8],
     'Java':   [18, 6, 19]}

In [None]:
pd.DataFrame(d)

In [None]:
pd.DataFrame(d).set_index('name')

..........

In [None]:
d = {"Name":  ["Python", "C++"],
     "Score": [18, 17]}
pd.DataFrame(d)

In [None]:
d = {"Name":  {"0":"Python", "1":"C++"},
     "Score": {"0":18, "1":17}}
pd.DataFrame(d)

In [None]:
# satri:

In [None]:
l = [['Python', 18], ['C++', 17]]
pd.DataFrame(l)

In [None]:
l = [{'Name':'Python', 'Score':18}, {'Name':'C++', 'Score':17}]
pd.DataFrame(l)

..........

In [None]:
d = [3, 4, 2, 7]
i = ['a', 'b', 'c', 'd']

In [None]:
pd.DataFrame(index=i, data=d)

In [None]:
pd.DataFrame(index=i, data={'score': [3, 4, 2, 7]})

In [None]:
pd.DataFrame(index=i, data={'score': 5})

### Indexing & Filtering

#### intro

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12, 'Mahsa': 20},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8,  'Mahsa': 16},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19, 'Mahsa': 9}}
df = pd.DataFrame(d)

In [None]:
df

In [None]:
# dastresie sotooni (seri va dataframe)

In [None]:
df.Java                 # type: Series

In [None]:
df['Java']              # type: Series

In [None]:
df[['Java']]            # type: DataFrame

In [None]:
df[['Python', 'Java']]

In [None]:
# dastresie satri (dataframe)

In [None]:
df

In [None]:
df[1:2]

In [None]:
df[:2]

In [None]:
df[:]

In [None]:
# afzoodan be dataframe (faghat sotoon)

In [None]:
df['R'] = np.nan 
df

In [None]:
df[['C']] = np.nan 
df

In [None]:
df[['a', 'b']] = np.nan 
df

..........

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
df = pd.DataFrame(d)

In [None]:
df

In [None]:
df[1]

In [None]:
df[[1]]

In [None]:
df[[0, 2]]

In [None]:
#---------------

In [None]:
df

In [None]:
df[1:2]

In [None]:
df[:2]

In [None]:
df[:]

In [None]:
#---------------

In [None]:
df[3] = np.nan
df

In [None]:
df[[4]] = np.nan
df

In [None]:
df[[5, 6]] = np.nan
df

#### loc

satr:

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12, 'Mahsa': 20},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8,  'Mahsa': 16},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19, 'Mahsa': 9}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
# yek satr

In [None]:
df.loc['Sara']

In [None]:
df.loc['Sara':'Sara']

In [None]:
df.loc[['Sara']]

In [None]:
# bakhshi az yek satr

In [None]:
df.loc['Sara', 'C++':]

In [None]:
df.loc['Sara':'Sara', 'C++':]

In [None]:
df.loc[['Sara'], 'C++':]

In [None]:
df.loc['Sara', ['Python', 'Java']]

In [None]:
df.loc['Sara':'Sara', ['Python', 'Java']]

In [None]:
df.loc[['Sara'], ['Python', 'Java']]

In [None]:
# chand satr

In [None]:
df.loc['Sara':'Mahsa']

In [None]:
df.loc[['Sara', 'Ali', 'Mahsa']]

In [None]:
df.index != 'Sara'

In [None]:
df.loc[df.index != 'Sara']  

sotoon:

In [None]:
# yek sotoon

In [None]:
df

In [None]:
df.loc[:, 'Python']

In [None]:
df.loc[:, 'Python':'Python']

In [None]:
df.loc[:, ['Python']]

In [None]:
# bakhshi az yek sotoon

In [None]:
df.loc['Taha':, 'Python']

In [None]:
df.loc['Taha':, 'Python':'Python']

In [None]:
df.loc['Taha':, ['Python']]

In [None]:
df.loc[['Sara', 'Mahsa'], 'Python']

In [None]:
df.loc[['Sara', 'Mahsa'], 'Python':'Python']

In [None]:
df.loc[['Sara', 'Mahsa'], ['Python']]

In [None]:
# chand sotoon

In [None]:
df.loc[:, 'Python':'C++']

In [None]:
df.loc[:, ['Python', 'Java']]

In [None]:
df.columns != 'Java'

In [None]:
df.loc[:, (df.columns != 'Java')]  

onsor:

In [None]:
# yek onsor

In [None]:
df

In [None]:
df.loc['Sara', 'Python']                   # type: int

In [None]:
df.loc['Sara']['Python']                   

In [None]:
df.loc['Sara':'Sara', 'Python']            # type: serie

In [None]:
df.loc['Sara', 'Python':'Python']

In [None]:
df.loc[['Sara'], 'Python']

In [None]:
df.loc['Sara', ['Python']]

In [None]:
df.loc['Sara':'Sara', ['Python']]          # type: dataframe

In [None]:
df.loc[['Sara'], 'Python':'Python'] 

In [None]:
df.loc['Sara':'Sara', 'Python':'Python']

In [None]:
df.loc[['Sara'], ['Python']]

bakhsh

In [None]:
# bakhshi az dataframe

In [None]:
df

In [None]:
df.loc['Ali':'Sara', 'Python':'C++']

In [None]:
df.loc[['Ali', 'Mahsa'], ['Python', 'Java']]

In [None]:
df.loc[['Ali', 'Mahsa'], ['Python', 'Java']]

In [None]:
df.loc[df.index != 'Sara', df.columns != 'C++']

boolean

In [None]:
df

In [None]:
df.loc[[True, False, True, True]]

In [None]:
df.loc[:, [False, True, True]]

In [None]:
df.loc[[True, False, True, True], [True, False, True]]

afzoodan:

In [None]:
# afzoodan be dataframe (faghat sotoon)

In [None]:
df

In [None]:
#df.loc[['Amin']] = np.nan       KeyError: "None of [Index(['Amin'], dtype='object')] are in the [index]"

In [None]:
df.loc[:, ['R']] = np.nan
df

In [None]:
df.loc[:, [2]] = 15
df

In [None]:
#---------------

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
df = pd.DataFrame(d)
df

In [None]:
df.loc[[3]]

In [None]:
#df.loc[[4]] = np.nan        KeyError: "None of [Index([4], dtype='int32')] are in the [index]"

In [None]:
df.loc[:, [2]]

In [None]:
df.loc[:, [3]] = np.nan
df

In [None]:
df.loc[:, ['a']] = np.nan
df

#### iloc

satr:

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12, 'Mahsa': 20},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8,  'Mahsa': 16},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19, 'Mahsa': 9}}
df = pd.DataFrame(d)

In [None]:
df

In [None]:
# yek satr

In [None]:
df.iloc[1]

In [None]:
df.iloc[-1]

In [None]:
df.iloc[1:2]

In [None]:
df.iloc[[1]]

In [None]:
df.iloc[[-1]]

In [None]:
# bakhshi az yek satr

In [None]:
df.iloc[1, 1:]

In [None]:
df.iloc[1, [0, 2]]

In [None]:
df.iloc[1:2, 1:]

In [None]:
df.iloc[[1], 1:]

In [None]:
df.iloc[1:2, [0, 2]]

In [None]:
df.iloc[[1], [0, 2]]

In [None]:
# chand satr

In [None]:
df.iloc[1:3]

In [None]:
df.iloc[[0, 2, 1]]

sotoon

In [None]:
# yek sotoon

In [None]:
df

In [None]:
df.iloc[:, 0]

In [None]:
df.iloc[:, 0:1]

In [None]:
df.iloc[:, [0]]

In [None]:
# bakhshi az yek sotoon

In [None]:
df.iloc[2:, 0]

In [None]:
df.iloc[2:, 0:1]

In [None]:
df.iloc[2:, [0]]

In [None]:
df.iloc[[1, 3], 0]

In [None]:
df.iloc[[1, 3], 0:1]

In [None]:
df.iloc[[1, 3], [0]]

In [None]:
# chand sotoon

In [None]:
df.iloc[:, 0:2]

In [None]:
df.iloc[:, [0, 2]]

onsor:

In [None]:
# yek onsor

In [None]:
df

In [None]:
df.iloc[1]['Python']                    

In [None]:
df.iloc[1][0]                    

In [None]:
df.iloc[1, 0]                     

In [None]:
df.iloc[1:2, 0]     

In [None]:
df.iloc[1, 0:1]

In [None]:
df.iloc[[1], 0]

In [None]:
df.iloc[1, [0]]

In [None]:
df.iloc[1:2, [0]]     

In [None]:
df.iloc[[1], 0:1]

In [None]:
df.iloc[1:2, 0:1]

In [None]:
df.iloc[[1], [0]]

bakhsh:

In [None]:
# bakhshi az dataframe

In [None]:
df

In [None]:
df.iloc[0:2, 0:2]

In [None]:
df.iloc[[0, 3], [0, 2]]

boolean:

In [None]:
df

In [None]:
df.iloc[[True, False, True, True]]

In [None]:
df.iloc[:, [False, True, True]]

In [None]:
df.iloc[[True, False, True, True], [True, False, True]]

afzoodan:

In [None]:
# afzoodan be dataframe?

# ba iloc nemitavan satr ya sotooni ezafe kard.

#### Filtering

##### serie

voroodi serie baraye x dar df[x]

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12, 'Mahsa': 20},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8,  'Mahsa': 16},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19, 'Mahsa': 9}}
df = pd.DataFrame(d)

In [None]:
df

In [None]:
df.loc['Sara'] > 10

In [None]:
df.where(df.loc['Sara'] > 10)

In [None]:
#df[df.loc['Sara'] > 10]   
#IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match).

In [None]:
df.loc[:,df.loc['Sara'] > 10]

In [None]:
#---------------

In [None]:
df['C++'] > 10

In [None]:
df.where(df['C++'] > 10)

In [None]:
df[df['C++'] > 10]

In [None]:
df[df['C++'] > 10]['C++']

In [None]:
df['C++'][df['C++'] > 10]

In [None]:
#---------------

In [None]:
df

In [None]:
cond1 = df['C++'] > 10
cond2 = df['C++'] < 18
display(cond1, cond2)

In [None]:
df[cond1]

In [None]:
df[~cond1]

In [None]:
df[cond1 & cond2]

In [None]:
df[cond1 & cond2]['C++']

In [None]:
#---------------

In [None]:
df

In [None]:
cond1 = df['C++'] > 10
cond2 = df['Java'] < 10
display(cond1, cond2)

In [None]:
df[cond1 | cond2]

In [None]:
df[cond1 ^ cond2]

In [None]:
df[cond1 & cond2]

In [None]:
df[cond1 & cond2][['C++', 'Java']]

..........

In [None]:
d = {'name':  ['ali', 'taha', 'omid', 'sara', 'negar'],
     'score': ['a', 'b', 'a', 'c', 'd']}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df['score'] == 'a'

In [None]:
df[df['score'] == 'a']

In [None]:
df[df['score'] == 'd']

In [None]:
df[(df['score'] == 'a') | (df['score'] == 'd')]

In [None]:
#---------------

In [None]:
df['score'].isin(['a'])

In [None]:
df[df['score'].isin(['a'])]

In [None]:
df[df['score'].isin(['a', 'd'])]

In [None]:
#---------------

In [None]:
df.isin(['a', 'd'])

In [None]:
df[df.isin(['a', 'd'])]

..........

In [None]:
d = {'name':  ['ali', 'taha', 'omid', 'sara', 'negar'],
     'score': [8, None, 5, 9, None]}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df['score']

In [None]:
df['score'].isna()

In [None]:
df[df['score'].isna()]

In [None]:
df[~df['score'].isna()]

In [None]:
df[df['score'].notna()]

##### dataframe

voroodi datafrmae baraye x dar df[x]

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12, 'Mahsa': 20},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8,  'Mahsa': 16},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19, 'Mahsa': 9}}
df = pd.DataFrame(d)

In [None]:
df

In [None]:
df > 10

In [None]:
df[df > 10]

In [None]:
df.where(df > 10)

In [None]:
df.gt(10)

In [None]:
df[df.gt(10)]

In [None]:
df.where(df.gt(10))

In [None]:
#---------------

In [None]:
df

In [None]:
df[['C++']] > 10

In [None]:
df[df[['C++']] > 10]

In [None]:
df.where(df[['C++']] > 10)

In [None]:
#---------------

In [None]:
df

In [None]:
df[1:2] > 10

In [None]:
df[df[1:2] > 10]

In [None]:
df.where(df[1:2] > 10)

##### boolean

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12, 'Mahsa': 20},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8,  'Mahsa': 16},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19, 'Mahsa': 9}}
df = pd.DataFrame(d); df

In [None]:
[True, True, False, True]

In [None]:
df[[True, True, False, True]]

In [None]:
df.loc[:, [True, True, False]]

In [None]:
#df.where([True, True, False, True])      ValueError: Array conditional must be same shape as self

In [None]:
#---------------

In [None]:
s = pd.Series([True, True, False, True], ['Ali', 'Sara', 'Taha', 'Mahsa']); s

In [None]:
df[s]

In [None]:
df.where(s)

In [None]:
#---------------

In [None]:
df['C++'] > 10

In [None]:
df[df['C++'] > 10]

In [None]:
df.where(df['C++'] > 10)

# Functions

## manage axis

### set_...()

#### set_index (keys, inplace)

set kardane indexe jadid

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
#df.set_index(['Omid', 'Sara', 'Taha'])      KeyError: "None of ['Omid', 'Sara', 'Taha'] are in the columns"

In [None]:
df.set_index([['1', '2', '3']])      

In [None]:
df.set_index([['a', 'b', 'c']])      

In [None]:
df.set_index([['Omid', 'Sara', 'Taha']])      

In [None]:
s = pd.Series(['Omid', 'Sara', 'Taha'], name='name')
df.set_index(s)

In [None]:
#df.set_index([['Ali', 'Sara', 'Taha', 'Omid']])   ValueError: Length mismatch: Expected 3 rows, received array of length 4

In [None]:
# MultiIndex

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
i = [['Omid', 'Sara', 'Taha'], ['boy', 'girl', 'boy']]
mi = pd.MultiIndex.from_arrays(i, names=['name', 'gen']); mi
df.set_index(mi)

In [None]:
#---------------

In [None]:
df.set_index([['Omid', 'Sara', 'Taha'], ['boy', 'girl', 'boy']], inplace=True)
df

In [None]:
df.index.names = ['name', 'gen']
df

In [None]:
df.index

tabdile yek sotoon be index

In [None]:
d = {'name':   ['Ali', 'Sara', 'Taha'],
     'Python': [12, 13, 12],
     'C++':    [20, 14, 8],
     'Java':   [18, 6, 19]}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df['name'].values

In [None]:
df.set_index(df['name'].values)

In [None]:
df['name']

In [None]:
df.set_index(df['name'])

In [None]:
df

In [None]:
df.set_index('name')

In [None]:
# MultiIndex

In [None]:
d = {'name':   ['Ali', 'Sara', 'Taha'],
     'gen':    ['boy', 'girl', 'boy'],
     'Python': [12, 13, 12],
     'C++':    [20, 14, 8],
     'Java':   [18, 6, 19]}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.set_index([df['name']])

In [None]:
df.set_index([df['name'], df['gen']])

In [None]:
df.set_index([df['name'], df['gen']]).drop(columns=['name', 'gen'])

In [None]:
df

In [None]:
df.set_index(['name'])

In [None]:
df.set_index(['name', 'gen'], inplace=True); df

In [None]:
df.index

In [None]:
#---------------

In [None]:
d = {'name': ['ali', 'reza', 'sara', 'taha', 'ali', 'reza', 'sara', 'taha'],
     'term': ['one', 'one', 'one', 'one', 'two', 'two', 'two', 'two'],
     'Java': [12, 16, 15, 17, 17, 13, 11, 19], 
     'C++':  [15, 14, 18, 16, 16, 17, 13, 20],}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.set_index(['term', 'name'], inplace=True); df

In [None]:
df.sort_index()

In [None]:
df.sort_index(level=1)

In [None]:
df

In [None]:
df.swaplevel()

In [None]:
df.swaplevel('term', 'name')

In [None]:
df

In [None]:
df.T

In [None]:
df

In [None]:
df.unstack()

groupby (by)

In [None]:
mydict = {'City': ['Hamedan', 'Hamedan', 'Hamedan', 'Tehran', 'Tehran', 'Tehran'],
          'Year': [1396, 1397, 1398, 1396, 1397, 1398],
          'Pop':  [9.3, 7, 8, 8, 8.5, 9]}

In [None]:
df = pd.DataFrame(mydict); df

In [None]:
df.set_index(['City', 'Year'], inplace=True); df

In [None]:
df.groupby('City').max()

In [None]:
df.groupby('Year').max()

In [None]:
#---------------

In [None]:
d = {'name': ['ali', 'reza', 'sara', 'taha', 'ali', 'reza', 'sara', 'taha'],
     'term': ['one', 'one', 'one', 'one', 'two', 'two', 'two', 'two'],
     'Java': [12, 16, 15, 17, 17, 13, 11, 19], 
     'C++':  [15, 14, 18, 16, 16, 17, 13, 20],}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.set_index(['term', 'name'], inplace=True); df

In [None]:
df.groupby('term').max()

In [None]:
df.groupby('name').max()

#### set_axis (labels, axis)

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
# index

In [None]:
df.set_axis(['x', 'y', 'z'])

In [None]:
#df.set_axis(['Ali', 'Sara', 'Taha', 'Omid'])  # ValueError: Length mismatch: Expected axis has 3 elements, new values have 4 elements

In [None]:
s = pd.Series(['Omid', 'Sara', 'Taha'], name='name')
df = df.set_axis(s); df

In [None]:
# columns

In [None]:
df

In [None]:
df.set_axis(['a', 'b', 'c'], axis=1)

In [None]:
s = pd.Series(['a', 'b', 'c'], name='dars')
df.set_axis(s, axis=1)

MultiIndex

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
# index

In [None]:
i = [['Omid', 'Sara', 'Taha'], ['boy', 'girl', 'boy']]
mi = pd.MultiIndex.from_arrays(i, names=['name', 'gen']); mi
df.set_axis(mi); df

In [None]:
df = df.set_axis([['Omid', 'Sara', 'Taha'], ['boy', 'girl', 'boy']]); df

In [None]:
df.index.names = ['name', 'gen']; df

In [None]:
df.index

In [None]:
# columns

In [None]:
i = [['R', 'C++', 'Java'], ['a', 'b', 'b']]
mi = pd.MultiIndex.from_arrays(i, names=['name', 'gen']); mi
df.set_axis(mi, axis=1); df

In [None]:
df = df.set_axis([['R', 'C++', 'Java'], ['a', 'b', 'b']], axis=1); df

In [None]:
df.columns.names = ['dars', 'noe']; df

In [None]:
df.columns

In [None]:
df.axes

In [None]:
df

In [None]:
df.T

In [None]:
df

In [None]:
df.unstack()

tabdile yek sotoon be index

In [None]:
d = {'name':   ['Ali', 'Sara', 'Taha'],
     'Python': [12, 13, 12],
     'C++':    [20, 14, 8],
     'Java':   [18, 6, 19]}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.set_index('name')

In [None]:
df

In [None]:
df = df.set_axis(df['name']);df

In [None]:
df.pop('name');df

tabdile yek satr be column

In [None]:
l = [['Python', 'C++', 'Java'],
     [12, 13, 12],
     [20, 14, 8],
     [18, 6, 19]]

In [None]:
df = pd.DataFrame(l); df

In [None]:
df = df.set_axis(df.iloc[0], axis=1); df

In [None]:
df.drop(index=0)

..........

In [None]:
name = ['Ali', 'Sara', 'Taha']
Python = [12, 13, 12]
C = [20, 14, 8]
Java = [18, 6, 19]

In [None]:
pd.DataFrame([Python, C, Java], index=name)

In [None]:
df = pd.DataFrame(data=[name, Python, C, Java]); df

In [None]:
i = df.loc[0]; i

In [None]:
df.drop(0, inplace=True); df

In [None]:
df = df.set_axis(i.values); df

In [None]:
df = df.set_axis(['Python', 'C++', 'Java'], axis=1); df

###  reset_index()

#### serie (level, name)

In [None]:
s = pd.Series([12, 8, 19, 17], ['ali', 'taha', 'sara', 'negar']); s

In [None]:
s.reset_index()

In [None]:
s.reset_index(name='Python')

In [None]:
#---------------

In [None]:
s = pd.Series([12, 8, 19, 17], ['ali', 'taha', 'sara', 'negar'], name='Python'); s

In [None]:
s.reset_index()

In [None]:
#---------------

In [None]:
s = pd.Series([12, 8, 19, 17], ['ali', 'taha', 'sara', 'negar'], name='Python')
s.index.name = 'Name'; s

In [None]:
s.reset_index()

MultiIndex

In [None]:
s = pd.Series([12, 8, 19, 17], [['ali', 'taha', 'sara', 'negar'], ['b', 'b', 'g', 'g']], name='Python')
s.index.names = ['Name', 'Gen']; s

In [None]:
s.reset_index()

In [None]:
s.reset_index(level=1)

In [None]:
s.reset_index(level=0)

In [None]:
s.reset_index(level='Name')

#### dataframe (level, drop, name, inplace)

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.reset_index()

In [None]:
df.reset_index(drop=True)

In [None]:
df.reset_index(names='Name')

In [None]:
#---------------

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.reset_index(names='Name', inplace=True)
df.set_index('Name', inplace=True)
df

In [None]:
df.reset_index(names='Esm', inplace=True)
df

In [None]:
df.reset_index(names='num')

MultiIndex

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
i = [['Ali', 'Sara', 'Taha'], ['b', 'g', 'b']]
c = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, i, c); df

In [None]:
df.reset_index()

In [None]:
df.reset_index(names=['Name', 'Gen'])

In [None]:
# level

In [None]:
df.reset_index(level=0)

In [None]:
df.reset_index(level=0, names='Name')

In [None]:
df.reset_index(level=1)

In [None]:
#df.reset_index(level=1, names='Gen')      IndexError: list index out of range ???

In [None]:
df.reset_index(level=[0, 1])

In [None]:
df.reset_index(level=[0, 1], names=['Name', 'Gen'])

In [None]:
# drop

In [None]:
df

In [None]:
df.reset_index(drop=True)

In [None]:
df.reset_index(level=0, drop=True)

In [None]:
df.reset_index(level=1, drop=True)

In [None]:
#---------------

In [None]:
df = pd.DataFrame(d, index=pd.MultiIndex.from_arrays(i, names=['Name', 'Gen']), columns=c); df

In [None]:
df.reset_index()

In [None]:
df.reset_index(level=0)

In [None]:
df.reset_index(level=1)

### reindex(index, columns, labels, axis, method, level, fill_value, limit)

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}

In [None]:
df = pd.DataFrame(d)

index & columns 

In [None]:
# index

In [None]:
df

In [None]:
df.reindex(index=['Sara'])

In [None]:
df.reindex(index=['Sara', 'Ali'])

In [None]:
df.reindex(index=['Sara', 'Ali', 'Taha'])

In [None]:
df.loc[['Sara', 'Ali', 'Taha'], :]

In [None]:
df

In [None]:
df.reindex(index=['Omid', 'Sara', 'Taha'])

In [None]:
df.reindex(index=['Omid', 'Sara', 'Taha'], fill_value=0)

In [None]:
df.reindex(index=['Omid', 'Taha', 'Ali', 'Sara'], fill_value=0)

In [None]:
df.reindex(index=['Ali', 'Sara', 'Taha', 'Omid', 'Ali'], fill_value=0)

In [None]:
# columns

In [None]:
df

In [None]:
df.reindex(columns=['Java','Python'])

In [None]:
df.reindex(columns=['Java', 'Python', 'C++', 'Python'])

In [None]:
df.loc[:, ['Java', 'Python', 'C++', 'Python']]

In [None]:
df[['Java', 'Python', 'C++', 'Python']]

In [None]:
df

In [None]:
df.reindex(columns=['Python', 'R'], fill_value=0)

In [None]:
df.reindex(columns=['Python', 'R', 'C++', 'Java'], fill_value=0)

In [None]:
# tarkibi

In [None]:
df

In [None]:
df.reindex(index=['Taha', 'Ali', 'Sara'], columns=['C++', 'Java', 'Python'])

In [None]:
df.reindex(index=['Taha', 'Ali', 'Omid', 'Sara'], columns=['Python', 'C++', 'R', 'Java'], fill_value=0)

labels & axis 

In [None]:
# ('index', 'columns') or (0, 1)

In [None]:
df

In [None]:
df.reindex(labels=['Omid', 'Sara', 'Taha'], axis=0)

In [None]:
df.reindex(labels=['Omid', 'Sara', 'Taha'], axis='index')

In [None]:
df.reindex(labels=['Python', 'R', 'C++', 'Java'], axis=1)

In [None]:
df.reindex(labels=['Python', 'R', 'C++', 'Java'], axis='columns', fill_value=0)

level

In [None]:
df

In [None]:
df.reindex(index=['Ali'])

In [None]:
df.reindex(index=[['Ali'], ['b']])

In [None]:
df.reindex(index=[['Ali'], ['b']], level=0)

In [None]:
df.reindex(index=[['b'], ['Ali']], level=1)

In [None]:
df.reindex(index=[['Sara', 'Ali', 'Taha'], ['g', 'b', 'b']], level=0)

In [None]:
df.reindex(index=[['Sara', 'Ali', 'Omid'], ['g', 'b', 'b']], level=0)

In [None]:
#---------------

In [None]:
df1 = df.reindex(index=[['Ali', 'Sara', 'Taha'], ['A', 'S', 'T']], level=0)
df1 = df1.reindex(columns=[['p', 'c', 'j'], ['Python', 'C++', 'Java']], level=1)

In [None]:
df1

In [None]:
df1.reindex(index=['Sara', 'Ali', 'Taha'])

In [None]:
df1.reindex(index=['Sara', 'Ali', 'Taha'], level=0)

In [None]:
df1.reindex(columns=['j', 'p', 'c'], level=0)

method & limit

In [None]:
i1 = pd.date_range('1/1/2010', periods=8, freq='D')
i2 = pd.date_range('12/29/2009', periods=13, freq='D')
df2 = pd.DataFrame(data={"price": [100, 101, 95, 100, 89, NaN, 92, 88]}, index=i1)
display(i1, i2, df2)

In [None]:
df2 = df2.drop('2010-01-04'); df2

In [None]:
df2.reindex(index=i2)

In [None]:
df2.reindex(index=i2, fill_value=0)

In [None]:
df2.reindex(index=i2, method='nearest')

In [None]:
df2.reindex(index=i2, method='ffill')

In [None]:
df2.reindex(index=i2, method='bfill')

In [None]:
df2.reindex(index=i2, method='bfill', limit=1)

In [None]:
df2.reindex(index=i2, method='bfill', limit=2)

### take (indices, axis)

In [None]:
df = pd.DataFrame(np.arange(1, 31).reshape((6, 5))); df

In [None]:
df.reindex([5, 4, 3, 2, 1, 0])

In [None]:
df.reindex([4, 3, 2, 1, 0], axis=1)

In [None]:
#---------------

In [None]:
df.take([5, 4, 3, 2, 1, 0])

In [None]:
df.take([4, 3, 2, 1, 0], axis=1)

In [None]:
df.take(np.random.permutation(6))

In [None]:
df.take(np.random.permutation(5), axis=1)

In [None]:
#---------------

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}
df = pd.DataFrame(d); df

In [None]:
#df.take(['Sara', 'Ali', 'Taha'])       ValueError: invalid literal for int() with base 10: 'Sara'

In [None]:
df.take([2, 1, 0])

In [None]:
df.take([2, 1, 0], axis=1)

### rename(index, columns, mapper, axis, inplace, level)

In [None]:
df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})

In [None]:
# index

In [None]:
df

In [None]:
df.rename(index={0: -1})

In [None]:
df.rename(index={0: "x"})

In [None]:
df.rename(index={0: "x", 1: "y", 2: "z"})

In [None]:
df.rename(index=np.square)

In [None]:
df.rename(index=float)

In [None]:
l = lambda x: x+10
df.rename(index=l)

In [None]:
# columns

In [None]:
df

In [None]:
df.rename(columns={"B": 2})

In [None]:
df.rename(columns={"B": 'E'})

In [None]:
df.rename(columns={"A": "a", "B": "c"})

In [None]:
df.rename(columns=str.lower)

In [None]:
l = lambda x: '|' + x + '|'
df.rename(columns=l)

In [None]:
# tarkibi

In [None]:
df

In [None]:
df.rename(index={0: "x"}, columns={"B": 2})

In [None]:
df.rename(index={0: "x", 1: "y", 2: "z"}, columns=str.lower)

MultiIndex

In [None]:
d = [[1, 2], [3, 4], [5, 6]]
i = [[0, 1, 2], ['x', 'y', 'z']]
c = [['A', 'B'], ['p', 'q']]

In [None]:
df = pd.DataFrame(d, i, c)

In [None]:
# index

In [None]:
df

In [None]:
df.rename(index={0: -1})

In [None]:
df.rename(index={'y': 'o'})

In [None]:
df.rename(index={0: -1, 'y': 'o'})

In [None]:
#df.rename(index=np.square)     TypeError

In [None]:
df.rename(index=np.square, level=0)

In [None]:
# columns

In [None]:
df

In [None]:
df.rename(columns={"B": 2})

In [None]:
df.rename(columns={"A": "a", "B": "c"})

In [None]:
l = lambda x: '|' + x + '|'
df.rename(columns=l)

In [None]:
l = lambda x: '|' + x + '|'
df.rename(columns=l, level=0)

In [None]:
l = lambda x: '|' + x + '|'
df.rename(columns=l, level=1)

### rename_axis (index, columns, mapper, axis, inplace)

In [None]:
df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}); df

In [None]:
df.rename_axis(index='num')

In [None]:
df.rename_axis(columns='name')

In [None]:
df.rename_axis(index='num', columns='name')

In [None]:
#---------------

In [None]:
df.rename_axis(index='num', columns='name', inplace=True); df

In [None]:
df.rename_axis(index='num.')

In [None]:
df.rename_axis(index=str.upper)

In [None]:
df.rename_axis(columns='name.')

In [None]:
df.rename_axis(columns=str.upper)

In [None]:
df.rename_axis(index=str.upper, columns='_name_')

MultiIndex

In [None]:
d = [[1, 2], [3, 4], [5, 6]]
i = [[0, 1, 2], ['x', 'y', 'z']]
c = [['A', 'B'], ['p', 'q']]

In [None]:
df = pd.DataFrame(d, i, c); df

In [None]:
df.rename_axis(index=['num', 'harf'])

In [None]:
df.rename_axis(columns=['name', 'mod'])

In [None]:
df.rename_axis(index=['num', 'harf'], columns=['name', 'mod'])

In [None]:
#---------------

In [None]:
df.rename_axis(index=['num', 'harf'], columns=['name', 'mod'], inplace=True); df

In [None]:
df.rename_axis(index={'harf': 'no'})

In [None]:
df.rename_axis(columns={'mod': 'mo'})

In [None]:
df.rename_axis(index=str.upper, columns={'mod': 'mo'})

### sort_index (axis, level, ascending, inplace, na_position, sort_remaining, ignore_index, key)

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
dars = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, name, dars); df

In [None]:
df.sort_index()

In [None]:
df.sort_index(axis=1)

### add_...fix (...fix, axis)

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
df = pd.DataFrame(d); df

In [None]:
df.add_prefix('item_')

In [None]:
df.add_suffix('_item')

In [None]:
df.add_prefix('item_', axis=0)

In [None]:
df.add_suffix('_item', axis=0)

### delete

drop (labels, axis, index, columns, inplace)

In [None]:
# with labels & axis

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12, 'Mahsa': 20},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8,  'Mahsa': 16},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19, 'Mahsa': 9}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.drop(['Ali'])

In [None]:
df.drop(['Ali', 'Sara'])

In [None]:
df.drop(['Ali', 'Sara'], axis=0)

In [None]:
df.drop(['Java'], axis=1)

In [None]:
df.drop(['Java', 'Python'], axis=1)

In [None]:
# with index & columns

In [None]:
df.drop(index=['Ali'])

In [None]:
df.drop(index=['Ali', 'Sara'])

In [None]:
df.drop(columns=['Python'])

In [None]:
df.drop(columns=['Java', 'Python'])

In [None]:
df.drop(index=['Ali'], columns=['Python'])

In [None]:
df.drop(index=['Ali', 'Taha'], columns=['C++'])

In [None]:
#---------------

In [None]:
df

In [None]:
df['Java'] < 10

In [None]:
df[df['Java'] < 10]

In [None]:
df[df['Java'] < 10].index

In [None]:
df.drop(df[df['Java'] < 10].index)

In [None]:
for i in df.index:
    if df.loc[i, 'Java'] < 10:
        df.drop(i,inplace=True)
df

pop (item)

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12, 'Mahsa': 20},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8,  'Mahsa': 16},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19, 'Mahsa': 9}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.pop('Java')

In [None]:
df

In [None]:
# pop columns

In [None]:
# df.pop('Ali')      KeyError: 'Ali'

df.T.pop('Ali')

In [None]:
df

In [None]:
df = df.T
df

In [None]:
df.pop('Ali')

In [None]:
df

In [None]:
df = df.T
df

## manage value

### NaN

#### is & not ()

In [None]:
d = {'Python': {'Ali': 12,  'Sara': NaN, 'Taha': 12,  'Mahsa': NaN, 'Negar': 11},
     'C++':    {'Ali': NaN, 'Sara': 19,  'Taha': 8,   'Mahsa': NaN, 'Negar': 15},
     'Java':   {'Ali': NaN, 'Sara': NaN, 'Taha': NaN, 'Mahsa': NaN, 'Negar': NaN},
     'R':      {'Ali': 18,  'Sara': 6,   'Taha': 19,  'Mahsa': 15,  'Negar': 18}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.isna()

In [None]:
df.isnull()

In [None]:
df.notna()

In [None]:
df.notnull()

In [None]:
#---------------

In [None]:
df.notnull().sum(axis=0)

In [None]:
df.notnull().sum(axis=1)

#### dropna (axis, how, thresh, subset, inplace, ignore_index)

In [None]:
d = {'Python': {'Ali': 12,  'Sara': NaN, 'Taha': 12,  'Mahsa': NaN, 'Negar': 11},
     'C++':    {'Ali': NaN, 'Sara': 19,  'Taha': 8,   'Mahsa': NaN, 'Negar': 15},
     'Java':   {'Ali': NaN, 'Sara': NaN, 'Taha': NaN, 'Mahsa': NaN, 'Negar': NaN},
     'R':      {'Ali': 18,  'Sara': 6,   'Taha': 19,  'Mahsa': 15,  'Negar': 18}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.dropna()

In [None]:
df.dropna(axis=1)

In [None]:
# how

In [None]:
df

In [None]:
df.dropna(how='any')

In [None]:
df.dropna(how='all')

In [None]:
df

In [None]:
df.dropna(how='any', axis=1)

In [None]:
df.dropna(how='all', axis=1)

In [None]:
# thresh

In [None]:
# agar adade haghighi mojood dar yek satr ya sotoon kamtar az in meghdar bood, satr ya sotoon hazf mishavad.

In [None]:
df

In [None]:
df.dropna(thresh=1)

In [None]:
df.dropna(thresh=2)

In [None]:
df.dropna(thresh=3) 

In [None]:
df.dropna(thresh=4) 

In [None]:
df

In [None]:
df.dropna(thresh=1, axis=1)

In [None]:
df.dropna(thresh=4, axis=1)

In [None]:
df.dropna(thresh=6, axis=1)

In [None]:
# subset

In [None]:
df

In [None]:
df.dropna(subset='C++')

In [None]:
df.dropna(subset='Java')

In [None]:
df.dropna(subset='R')

In [None]:
df.dropna(subset=['Python', 'C++'])

In [None]:
df.dropna(subset=['Python', 'C++'], how='all')

In [None]:
df.dropna(subset=['Python', 'Java'], how='all')

In [None]:
df

In [None]:
df.dropna(subset='Ali', axis=1)

In [None]:
df.dropna(subset='Mahsa', axis=1)

In [None]:
df.dropna(subset=['Ali', 'Sara'], axis=1)

In [None]:
df.dropna(subset=['Ali', 'Sara'], axis=1, how='all')

..........

In [None]:
df.loc['Taha', 'Java'] = 12
df.loc['Mahsa', 'R'] = NaN

In [None]:
df

In [None]:
df.dropna()

In [None]:
df.dropna(axis=1)

In [None]:
# how

In [None]:
df

In [None]:
df.dropna(how='any')

In [None]:
df.dropna(how='any', axis=1)

In [None]:
df

In [None]:
df.dropna(how='all')

In [None]:
df.dropna(how='all', axis=1)

In [None]:
# subset

In [None]:
df

In [None]:
df.dropna(subset='Java')

In [None]:
df.dropna(subset='R')

In [None]:
df

In [None]:
df.dropna(subset='Ali', axis=1)

In [None]:
df.dropna(subset='Taha', axis=1)

In [None]:
df.dropna(subset='Mahsa', axis=1)

#### fillna (value, axis, inplace, limit)

In [None]:
d = {'Python': {'Ali': 12,  'Sara': NaN, 'Taha': 12,  'Mahsa': NaN, 'Negar': 11},
     'C++':    {'Ali': NaN, 'Sara': 19,  'Taha': 8,   'Mahsa': NaN, 'Negar': 15},
     'Java':   {'Ali': NaN, 'Sara': NaN, 'Taha': NaN, 'Mahsa': NaN, 'Negar': NaN},
     'R':      {'Ali': 18,  'Sara': 6,   'Taha': 19,  'Mahsa': 15,  'Negar': 18}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.fillna(-1)

In [None]:
df.fillna('o')

In [None]:
df.fillna({'C++': -1, 'Java': 'o'})

In [None]:
df.mean()

In [None]:
df.fillna(df.mean())

In [None]:
df.mean()['Python']

In [None]:
df.fillna(df.mean()['Python'])

In [None]:
df.mean()[['Python']]

In [None]:
df.fillna(df.mean()[['Python']])

limit & axis

In [None]:
# limit: maximam tedade maghadire motavalie NaN baraye por kardan be jelo/aghab.

In [None]:
df

In [None]:
df.fillna('o', limit=1)

In [None]:
df.fillna('o', limit=2)

In [None]:
df

In [None]:
df.fillna('o', limit=1, axis=1)

In [None]:
df.fillna('o', limit=2, axis=1)

#### combine_first (other)

In [None]:
df1 = pd.DataFrame({'C++': [None, 12], 'Python': [None, 14]}, index=['Ali', 'Taha'])
df2 = pd.DataFrame({'C++': [None, 15], 'Python': [13, None]}, index=['Ali', 'Taha'])

In [None]:
display(df1, df2)

In [None]:
df1.combine_first(df2)

In [None]:
df2.combine_first(df1)

In [None]:
#---------------

In [None]:
df1 = pd.DataFrame({'C++': [None, 12], 'Python': [14, None]}, index=['Ali', 'Taha'])
df2 = pd.DataFrame({'Python': [13, 20, None], 'Java': [None, 11, 17]}, index=['Ali', 'Taha', 'Mahsa'])

In [None]:
display(df1, df2)

In [None]:
df1.combine_first(df2)

In [None]:
df2.combine_first(df1)

### ffill & bfill (axis, inplace, limit)

In [None]:
d = {'Python': {'Ali': 12,  'Sara': NaN, 'Taha': 12,  'Mahsa': 11,  'Negar': NaN},
     'C++':    {'Ali': NaN, 'Sara': 19,  'Taha': 8,   'Mahsa': NaN, 'Negar': 15},
     'Java':   {'Ali': NaN, 'Sara': NaN, 'Taha': 15,  'Mahsa': NaN, 'Negar': NaN},
     'R':      {'Ali': 18,  'Sara': 6,   'Taha': 19,  'Mahsa': NaN, 'Negar': 18}}

In [None]:
df = pd.DataFrame(d); df

ffill

In [None]:
df.ffill()

In [None]:
df

In [None]:
df.ffill(axis=1)

In [None]:
# limit

In [None]:
df

In [None]:
df.ffill()

In [None]:
df.ffill(limit=1)

In [None]:
df.ffill(limit=2)

In [None]:
df

In [None]:
df.ffill(axis=1)

In [None]:
df.ffill(axis=1, limit=1)

In [None]:
df.ffill(axis=1, limit=2)

bfill

In [None]:
df

In [None]:
df.bfill()

In [None]:
df

In [None]:
df.bfill(axis=1)

In [None]:
# limit

In [None]:
df

In [None]:
df.bfill()

In [None]:
df.bfill(limit=1)

In [None]:
df.bfill(limit=2)

In [None]:
df

In [None]:
df.bfill(axis=1)

In [None]:
df.bfill(axis=1, limit=1)

In [None]:
df.bfill(axis=1, limit=2)

### replace (to_replace, value, inplace)

In [None]:
d = [[12, NaN, 18], [NaN, 14, 6], [12, 9, 19]]
name = ['Ali', 'Sara', 'Taha']
dars = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, name, dars); df

In [None]:
df.replace(NaN, 0)

In [None]:
df.replace({9: 10})

In [None]:
df.replace({9: 10, NaN: 0})

In [None]:
df.replace([9, NaN], [10, 0])

In [None]:
#---------------

In [None]:
df['C++'].replace({NaN: 0, 9: 10})

In [None]:
df[['C++']].replace({NaN: 0, 9: 10})

In [None]:
df.replace({'C++': {NaN: 0, 9: 10}})

In [None]:
#---------------

In [None]:
df.replace({'Python': NaN, 'Java': 6}, 'o')

..........

In [None]:
d = {'Age': {0: 22, 1: 38,2: 26,3: 35,4: 35,5: 34,6: 54,7: 2,8: 27,9: 14},
     'Sex': {0: 'male',1: 'female',2: 'female',3: 'female',4: 'male',5: 'male',6: 'male',7: 'male',8: 'female',9: 'female'}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.replace({'Sex': {'female': 0, 'male': 1}})

In [None]:
# or:

In [None]:
s = sorted(df['Sex'].unique())
z = zip(s, range(0, len(s) + 1))
dz = dict(z); dz

In [None]:
df['Sex'].replace(dz)

In [None]:
df['Sex'].map(dz)

In [None]:
# or:

In [None]:
l = sorted(df['Sex'].unique()); l

In [None]:
df['Sex'].apply(l.index)

### duplicate

#### duplicated (subset, keep)

In [None]:
d = {'col1': ['a', 'b', 'a', 'b', 'b', 'a', 'b'],
     'col2': [10,  10,  20,  40,  30,  30,  40],
     'col3': ['a', 'c', 'e', 'f', 'c', 'e', 'f']}

In [None]:
frame = pd.DataFrame(d); frame

In [None]:
frame.duplicated()

In [None]:
~frame.duplicated()

In [None]:
frame.duplicated(keep='last')

In [None]:
# where

In [None]:
frame.where(frame.duplicated())

In [None]:
frame.where(frame.duplicated(keep='last'))

subset

In [None]:
frame

In [None]:
frame['col1'].duplicated()

In [None]:
frame.duplicated(subset='col1')

In [None]:
frame.duplicated(subset='col1', keep='last')

In [None]:
frame.duplicated(subset='col1', keep=False)

In [None]:
frame.duplicated(subset='col3', keep=False)

In [None]:
frame

In [None]:
frame.duplicated(subset='col1')                    

In [None]:
~frame.duplicated(subset='col1')                     # azaye uniq: avvalin ozv az har nemoone.

In [None]:
frame[~frame.duplicated(subset='col1')]

In [None]:
~frame.duplicated(subset='col1', keep='last')        # azaye uniq: akharin ozv az har nemoone.

In [None]:
frame[~frame.duplicated(subset='col1', keep='last')]  

In [None]:
frame

In [None]:
frame.duplicated(subset=['col1', 'col3'])               # harkat az bala be paiin 

In [None]:
frame.duplicated(subset=['col1', 'col3'], keep='last')  # harkat az paiin be bala 

In [None]:
# where

In [None]:
frame.where(frame.duplicated(subset=['col1', 'col3']))

In [None]:
frame.where(frame.duplicated(subset=['col1', 'col3'], keep='last'))

#### drop_duplicates (subset, keep, inplace, ignore_index)

In [None]:
frame

In [None]:
frame.drop_duplicates()

In [None]:
frame.drop_duplicates(keep='last')

In [None]:
frame.drop_duplicates(keep='last', ignore_index=True)

In [None]:
frame.drop_duplicates(keep=False)    # hazfe kamele nemoone haye moshabeh

subset

In [None]:
frame

In [None]:
frame.drop_duplicates(subset='col1')

In [None]:
frame.drop_duplicates(subset='col1', keep='last')

In [None]:
frame.drop_duplicates(subset='col1', keep=False)

In [None]:
frame

In [None]:
frame.drop_duplicates(subset='col2')

In [None]:
frame.drop_duplicates(subset='col2', keep='last')

In [None]:
frame.drop_duplicates(subset='col2', keep=False)

In [None]:
frame

In [None]:
frame.drop_duplicates(subset=['col1', 'col3'])

In [None]:
frame.drop_duplicates(subset=['col1', 'col3'], keep='last')

In [None]:
frame.drop_duplicates(subset=['col1', 'col3'], keep=False)

### value_counts(subset, normalize, sort, ascending, dropna)

In [None]:
d = {'num_legs':  {'falcon': 2, 'dog': 4, 'cat': 4, 'ant': 6, 'cr': None},
     'num_wings': {'falcon': 2, 'dog': 0, 'cat': 0, 'ant': 0, 'cr': 2}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.value_counts()

In [None]:
df.value_counts(sort=False)

In [None]:
df.value_counts(ascending=True)

In [None]:
df.value_counts(normalize=True)

In [None]:
df.value_counts(dropna=False)

In [None]:
df.value_counts(subset='num_wings')

In [None]:
df.value_counts(subset='num_legs')

In [None]:
df.value_counts(subset='num_legs', dropna=False)

In [None]:
df['num_legs'].value_counts(dropna=False)

### sort_values (by, axis, ascending, inplace, na_position, ignore_index, key)

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
dars = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, name, dars); df

In [None]:
df.sort_values(by='Python')

In [None]:
df.sort_values(by=['Python', 'C++'])

In [None]:
df.sort_values(by=['Python', 'C++'], ascending=False)

In [None]:
df.sort_values(by=['Python', 'C++'], ascending=[False, True])

In [None]:
df

In [None]:
df.sort_values(by='Mahsa', axis=1)

In [None]:
#---------------

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
df = pd.DataFrame(d); df

In [None]:
df.sort_values(by=0)

In [None]:
df.sort_values(by=[0, 1])

### nlargest (n, columns, keep)

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
dars = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, name, dars); df

In [None]:
df.nlargest(1, columns='Python')

In [None]:
df.nlargest(3, columns='Python')

In [None]:
df.nlargest(3, columns='Python', keep='last')

In [None]:
#---------------

In [None]:
df

In [None]:
df.nsmallest(2, columns='Python')

In [None]:
df.nsmallest(2, columns=['Python', 'C++'])

### is & not

In [None]:
d = [[12, 20, np.nan], [13, 14, 6], [13, np.nan, np.nan], [20, np.nan, 9]]
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
dars = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, name, dars); df

In [None]:
df.isin([12])

In [None]:
df.isin([12, 20])

In [None]:
df[df.isin([12, 20])]

In [None]:
df.where(df.isin([12, 20]))

### select_dtypes (include, exclude)

In [None]:
df = pd.DataFrame({'a': [1, 2]*2, 'b': [True, False]*2, 'c': [1.0, 2.0]*2, 'e': ['p', 'q']*2}); df

In [None]:
df.dtypes

In [None]:
# include

In [None]:
df.select_dtypes(include='bool')

In [None]:
df.select_dtypes(include=['object', 'int64'])

In [None]:
# exclude

In [None]:
df.select_dtypes(exclude='bool')

In [None]:
df.select_dtypes(exclude=['object', 'int64'])

### astype (dtype)

In [None]:
d = {'name':  {0: 'ali', 1: 'taha', 2: 'omid', 3: 'sara'},
     'score': {0: 1, 1: 0, 2: 0, 3: 1}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df['score']                    # dtype: int64

In [None]:
df['score'].astype('bool')     # dtype: bool

In [None]:
df

In [None]:
df[['score']].astype('bool')    

In [None]:
df

In [None]:
df.astype({'score': 'bool'})   

..........

In [None]:
n = ['ali', 'ali', 'ali', 'ali', 'sara', 'sara', 'sara', 'taha', 'taha']
s = [11, 20, 13, 14, 15, 6, 12, 18, 19]

In [None]:
df = pd.DataFrame({'name': n, 'score': s}); df

In [None]:
df['score']                 # dtype: int64

In [None]:
df['score'].astype('str')   # dtype: object

In [None]:
#---------------

In [None]:
df['name']

In [None]:
c = df['name'].astype('category'); c

In [None]:
c.values.categories

In [None]:
c.value_counts()

In [None]:
c.values.codes

In [None]:
c.isin(['sara'])

In [None]:
c[c.isin(['sara'])]

In [None]:
c[c.isin(['sara'])].cat.remove_unused_categories()

..........

In [None]:
import seaborn as sns
df = sns.load_dataset('iris'); df     

In [None]:
df['species'].nunique()

In [None]:
df['species'].unique()

In [None]:
c = df['species'].astype('category'); c

In [None]:
c.values.categories

In [None]:
c.value_counts()

In [None]:
c.values.codes

In [None]:
c.isin(['setosa'])

In [None]:
c[c.isin(['setosa'])]

In [None]:
c[c.isin(['setosa'])].cat.remove_unused_categories()

kam kardane hafeze eshghali ba astype()

In [None]:
df.groupby('species').apply(lambda x: x[:3])

In [None]:
df['species']                  # dtype: object

In [None]:
df.info()                      # memory usage: 6.0+ KB

In [None]:
df['species'] = df['species'].astype('category')

In [None]:
df['species']                  # dtype: category

In [None]:
df['species'].values.codes     # aknoon in satr ba in maghadire 0, 1, 2 dar hafeze zakhire mishavad.

In [None]:
df.info()                      # memory usage: 5.1 KB

## miscellaneous

### squeeze (axis)

In [None]:
d = {'C++': {'Ali': None, 'Sara': 14.0, 'Taha': 9.0}}
df = pd.DataFrame(d); df

In [None]:
df.squeeze()

In [None]:
df.index.name = 'Name'
df

In [None]:
df.squeeze()

In [None]:
#---------------

In [None]:
d = {'Python': {'Ali': 12.0, 'Sara': None, 'Taha': 12.0},
     'C++':    {'Ali': None, 'Sara': 14.0, 'Taha': 9.0},
     'Java':   {'Ali': 18, 'Sara': 6, 'Taha': 19}}

In [None]:
df = pd.DataFrame(d)
df.index.name = 'Name'
df

In [None]:
df['C++']

In [None]:
df[['C++']]

In [None]:
df[['C++']].squeeze()

In [None]:
df.loc['Ali']

In [None]:
df.loc[['Ali']]

In [None]:
df.loc[['Ali']].squeeze()

### head & tail (n)

In [None]:
df = pd.DataFrame(np.arange(1, 41).reshape((10, 4))); df

In [None]:
# head: Return the first n rows.

In [None]:
df.head()

In [None]:
df.head(3)

In [None]:
df.head(7)

In [None]:
# tail: Return the last n rows.

In [None]:
df.tail()

In [None]:
df.tail(2)

In [None]:
df.tail(6)

### sample (n, frac, replace, weights, random_state, axis, ignore_index)

In [None]:
df = pd.DataFrame(np.arange(1, 49).reshape((8, 6))); df

In [None]:
# n

In [None]:
df.sample()

In [None]:
df.sample(n=3)

In [None]:
df.sample(n=3, axis=1)

In [None]:
# frac

In [None]:
df.sample(frac=0.5)

In [None]:
df.sample(frac=0.5, axis=1)

In [None]:
# replace

In [None]:
#df.sample(10)          ValueError: Cannot take a larger sample than population when 'replace=False'

In [None]:
df.sample(10, replace=True)   

In [None]:
df.sample(10, replace=True, axis=1)   

In [None]:
df.sample(5, replace=True)

In [None]:
df.sample(5, replace=True, axis=1)   

In [None]:
# random_state

In [None]:
# dar chand bar run kardane selloole zir, har bar natayeje motafaveti migirim:

In [None]:
df.sample(n=3)   

In [None]:
# ba dastoore random_state, ba har bar run kardane sellool, haman natije bare avval hasel mishavad:

In [None]:
df.sample(n=3, random_state=1)   

..........

In [None]:
d = {'F1': {'s1': 7, 's2': 4, 's3': 6, 's4': 0, 's5': 3, 's6': 7},
     'F2': {'s1': 2, 's2': 1, 's3': 3, 's4': 0, 's5': 6, 's6': 2},
     'F3': {'s1': 5, 's2': 4, 's3': 1, 's4': 7, 's5': 2, 's6': 9},
     'F4': {'s1': 9, 's2': 4, 's3': 1, 's4': 7, 's5': 8, 's6': 1}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
# n

In [None]:
df.sample(n=2)

In [None]:
df.sample(n=2, axis=1)

In [None]:
s = df['F2'].sample(n=4); s

In [None]:
# frac

In [None]:
df.sample(frac=0.2)

In [None]:
df.sample(frac=0.2, axis=1)

In [None]:
# replace

In [None]:
df.sample(10, replace=True)   

In [None]:
df.sample(8, replace=True, axis=1)   

In [None]:
# weights

In [None]:
df

In [None]:
df.sample(3, weights='F3')  

### to_...()

to_dict

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
name = ['Ali', 'Sara', 'Taha']
dars = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, name, dars); df

In [None]:
df.to_dict()

In [None]:
d = {'Python': {'Ali': 12, 'Sara': 13, 'Taha': 12},
     'C++':    {'Ali': 20, 'Sara': 14, 'Taha': 8},
     'Java':   {'Ali': 18, 'Sara': 6,  'Taha': 19}}
pd.DataFrame(d)

In [None]:
#---------------

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
df1 = pd.DataFrame(d); df1

In [None]:
df1.to_dict()

to_numpy

In [None]:
df

In [None]:
df.to_numpy()

In [None]:
df.values

## arithmetic operation

### math

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
dars = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, name, dars); df

In [None]:
df + 5

In [None]:
df.add(5)

In [None]:
df.sub(3)

In [None]:
df.multiply(4)

In [None]:
df.divide(2)

In [None]:
df.pow(3)

In [None]:
df.mod(5)

add seri to dataframe

In [None]:
df

In [None]:
myser = df.loc['Ali']
myser

In [None]:
df.add(myser)         # broadcasting

In [None]:
#---------------

In [None]:
df

In [None]:
myser = df['Python']
myser

In [None]:
df.add(myser, axis=0)

2 DataFrames

In [None]:
arr1 = np.arange(12).reshape((4, 3)); arr1

In [None]:
df1 = pd.DataFrame(data=arr1, columns=list('abc')); df1

In [None]:
arr2 = np.arange(10).reshape((5, 2))
df2 = pd.DataFrame(data=arr2, columns=list('ab')); df2

In [None]:
df2.loc[1, 'b']

In [None]:
df2.loc[[1], ['b']]

In [None]:
df2.loc[1, 'b'] = np.nan

In [None]:
df1

In [None]:
df2

In [None]:
df1 + df2

In [None]:
df1.add(df2)

In [None]:
df1.add(df2, fill_value=0)

In [None]:
df1.sub(df2, fill_value=0)

### describe (axis)

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
dars = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, name, dars); df

In [None]:
df.describe()

In [None]:
df.count()

In [None]:
df.count(axis=1)

In [None]:
df.sum()

In [None]:
df.sum(axis=1)

In [None]:
df.mean()

In [None]:
df.mean(axis=1)

In [None]:
df.mean(axis=1, skipna=False)

..........

In [None]:
df = pd.DataFrame(np.random.randn(1000, 3))

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.min()

In [None]:
df.max()

In [None]:
# tabdile dade haye bozorgtar az 3 be 3 va koochektar az -3 be -3

In [None]:
df[np.abs(df) > 3] = np.sign(df) * 3

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.min()

In [None]:
df.max()

### eq , ne , gt , ge , lt , le

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
name = ['Ali', 'Sara', 'Taha']
dars = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, name, dars); df

In [None]:
df == 12

In [None]:
df.eq(12)

In [None]:
df != 20

In [None]:
df.ne(20)

In [None]:
df.gt(12)

In [None]:
df.ge(12)

In [None]:
df.lt(14)

In [None]:
df.le(14)

### max,idxmax & min,idxmin (axis)

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
name = ['Ali', 'Sara', 'Taha']
dars = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, name, dars); df

In [None]:
df.max()

In [None]:
df.idxmax()

In [None]:
df.max(axis=1)

In [None]:
df.idxmax(axis=1)

In [None]:
#---------------

In [None]:
df.min()

In [None]:
df.idxmin()

In [None]:
df.min(axis=1)

In [None]:
df.idxmin(axis=1)

### cumsum , cumprod , cummax , cummin (axis, skipna)

In [None]:
d = [[12, 20, 18], [13, 14, 6], [np.nan, 8, 19], [20, 16, 9]]
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
dars = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, name, dars); df

In [None]:
df.cumsum()

In [None]:
df.cumsum(skipna=False)

In [None]:
df.cumsum(axis=1)

In [None]:
#---------------

In [None]:
df.cumprod()

In [None]:
df.cumprod(axis=1)

In [None]:
#---------------

In [None]:
df.cummax()

In [None]:
df.cummax(axis=1)

In [None]:
#---------------

In [None]:
df.cummin()

In [None]:
df.cummin(axis=1)

## change structure

### Transpose & ustack

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19], [20, 16, 9]]
name = ['Ali', 'Sara', 'Taha', 'Mahsa']
dars = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, name, dars); df

In [None]:
df.T

In [None]:
df.transpose()

In [None]:
#---------------

In [None]:
pd.DataFrame([['Omid', 'boy'], ['Sara', 'girl'], ['Taha', 'boy']])

In [None]:
pd.DataFrame([['Omid', 'Sara', 'Taha'], ['boy', 'girl', 'boy']]).T

unstack()

In [None]:
df

In [None]:
df.stack()

In [None]:
s = df.stack()
display(type(s), s.index)

In [None]:
s

In [None]:
s.unstack()

In [None]:
s.swaplevel()

In [None]:
s.swaplevel().unstack()

### pivot (columns, index, values)

In [None]:
d = {'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
     'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
     'baz': [1, 2, 3, 4, 5, 6],
     'zoo': ['x', 'y', 'z', 'q', 'w', 't']}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.pivot(index='foo', columns='bar', values='baz')

In [None]:
df.pivot(index='foo', columns='bar', values=['baz', 'zoo'])

In [None]:
#---------------

In [None]:
df = pd.DataFrame(d).set_index('foo'); df

In [None]:
df.pivot(columns='bar', values='baz')

In [None]:
df.pivot(columns='bar', values=['baz', 'zoo'])

..........

In [None]:
d = {"lev1":   [1, 1, 1, 2, 2, 2],
     "lev2":   [1, 1, 2, 1, 1, 2],
     "lev3":   [1, 2, 1, 2, 1, 2],
     "lev4":   [1, 2, 3, 4, 5, 6],
     "values": [0, 1, 2, 3, 4, 5]}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.pivot(index="lev1", columns=["lev2", "lev3"], values="values")

In [None]:
df.pivot(index=["lev1", "lev2"], columns="lev3", values="values")

..........

In [None]:
d = {"A": ['one', 'one', 'one', 'two', 'two', 'two'],
     "B": [1, 1, 2, 1, 1, 2],
     "C": [1, 2, 1, 2, 1, 2],
     "V": [0, 1, 2, 3, 4, 5]}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.pivot(index="A", columns=["B", "C"], values="V")

In [None]:
df.pivot(index=["A", "B"], columns="C", values="V")

### melt (id_vars, value_vars, var_name, value_name, col_level, ignore_index)

In [None]:
d = {'Name': ['Ali', 'Sara', 'Mahsa'], 'C++': [12, 13, 20], 'Python': [14, 16, 8]}
df = pd.DataFrame(d); df

In [None]:
df.melt()                    

In [None]:
df.melt(id_vars=['Name'])                    

In [None]:
df.melt(id_vars=['Name'], value_vars=['C++'])

In [None]:
df.melt(id_vars=['Name'], value_vars=['C++'], var_name='Dars', value_name='Score')

In [None]:
df.melt(id_vars=['Name'], value_vars=['C++', 'Python'], var_name='Dars', value_name='Score')

In [None]:
#---------------

In [None]:
m = pd.melt(df, id_vars=['Name']); m

In [None]:
p = m.pivot(index='Name', columns='variable', values='value'); p

In [None]:
p.reset_index()

In [None]:
df

MultiColumns

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
i = ['Ali', 'Sara', 'Taha']
c = [['a', 'b', 'c'], ['Python', 'C++', 'Java']]

In [None]:
df = pd.DataFrame(d, i, c); df

In [None]:
df.melt()  

In [None]:
df.melt(var_name=['sath', 'Dars'], ignore_index=False)  

In [None]:
df.melt(col_level=0)  

In [None]:
df.melt(col_level=1)  

In [None]:
df.melt(col_level=1, value_vars=['C++', 'Java'], var_name='Dars', value_name='Score', ignore_index=False)  

MultiIndex

In [None]:
d = [[12, 20], [13, 14], [12, 8]]
i = [['Ali', 'Sara', 'Taha'], ['b', 'g', 'b']]
c = ['Python', 'C++']

In [None]:
df = pd.DataFrame(d, i, c); df

In [None]:
df.melt()  

In [None]:
df.melt(ignore_index=False)  

In [None]:
#---------------

In [None]:
df.index.names = ['Name', 'Gen']; df

In [None]:
df.melt(ignore_index=False)  

In [None]:
#df.melt(id_vars='Name')        KeyError: 'Name'

In [None]:
df.reset_index(level=0, inplace=True); df

In [None]:
df.melt(id_vars='Name', ignore_index=False)     

In [None]:
df.melt(id_vars='Name', value_vars=['C++'], var_name='Dars', value_name='Score', ignore_index=False)     

In [None]:
#---------------

In [None]:
df.reset_index(inplace=True); df

In [None]:
df.melt()  

In [None]:
df.melt(id_vars=['Name'])                    

In [None]:
df.melt(id_vars=['Name', 'Gen'])                    

In [None]:
df.melt(id_vars=['Name', 'Gen', 'Python'])                    

In [None]:
df.melt(id_vars=['Name', 'Gen', 'Python', 'C++'])                    

## applying function

### apply (func, axis, args, kwargs)

Apply a function along an axis of the DataFrame.

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
name = ['Ali', 'Sara', 'Taha']
dars = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, name, dars); df

In [None]:
df.apply(min)

In [None]:
df.apply(min, axis=1)

In [None]:
#---------------

In [None]:
df

In [None]:
df.apply(sum)

In [None]:
df.apply(sum, axis=1)

In [None]:
#---------------

In [None]:
df

In [None]:
f = lambda x: x.min()

In [None]:
df.apply(f)

In [None]:
df.apply(f, axis=1)

In [None]:
#---------------

In [None]:
df

In [None]:
f = lambda x: x-2
df.apply(f)

In [None]:
f = lambda x, y: x-y
df.apply(f, args=(2,))

In [None]:
#---------------

In [None]:
df

In [None]:
f = lambda x: pd.Series([x.min(), x.max()], index=['min', 'max'])

In [None]:
df.apply(f)

In [None]:
df.apply(f, axis=1)

taghir dadane yek satr ya sotoon az dataframe

In [None]:
df

In [None]:
df['Python']

In [None]:
df['Python'].apply(lambda x: x-2)

In [None]:
#---------------

In [None]:
df

In [None]:
def f(x):
    if x >15:
        return 'A'
    elif x >= 10:
        return 'B'
    else:
        return 'C'        

In [None]:
# tabe f besoorate onsor be onsor emal mishavad, pas ba map kar mikonad na apply

#df.apply(f)       ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [None]:
df['C++'].apply(f)

In [None]:
df['Java'].apply(f)

In [None]:
df['Java'].apply(f).value_counts()

In [None]:
#---------------

In [None]:
df.loc['Ali']

In [None]:
df.loc['Ali'].apply(lambda x: x - 2)

In [None]:
df

In [None]:
df.loc['Ali'] = df.loc['Ali'].apply(lambda x: x - 2); df

In [None]:
#---------------

In [None]:
d = {'color':   {1: 'G', 2: 'F', 3: 'E', 4: 'I', 5: 'J', 6: 'J', 7: 'I', 8: 'H', 9: 'E', 10: 'H'},
     'clarity': {1: 1, 2: 2, 3: 4, 4: 3, 5: 1, 6: 5, 7: 6, 8: 2, 9: 3, 10: 4}}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df['color'] = df['color'].apply(list('JIHGFED').index); df

### map (func, kwargs)

Apply a function to a Dataframe elementwise.

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
name = ['Ali', 'Sara', 'Taha']
dars = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, name, dars); df

In [None]:
# df.map(min)        Error

In [None]:
# df.map(sum)        Error

In [None]:
df.map(lambda x: x-5)

In [None]:
df.map(lambda x: '%.2f'%x)

In [None]:
#df.apply(lambda x: '%.2f'%x)     TypeError: cannot convert the series to <class 'float'>

In [None]:
#---------------

In [None]:
df

In [None]:
def f(x):
    if x >15:
        return 'A'
    elif x >= 10:
        return 'B'
    else:
        return 'C'        

In [None]:
df.map(f)

In [None]:
# apply dar serie ha besoorate onsor be onsor kar mikonad va moshkeli nadarad.

df['Python'].apply(f)

In [None]:
# tabe f besoorate onsor be onsor emal mishavad, pas ba map kar mikonad na apply
#df.apply(f)      ValueError

In [None]:
#---------------

In [None]:
df['Java'].map({6: 0})

### transform (func, axis, args, kwargs)

Call func on self producing a DataFrame with the same axis shape as self.

Only perform transforming type operations.

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
name = ['Ali', 'Sara', 'Taha']
dars = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, name, dars); df

In [None]:
df.transform(np.sqrt)

In [None]:
df.transform([np.sqrt])

In [None]:
df.transform([np.sqrt, np.square])

In [None]:
df.transform([np.square, lambda x: x-5])

In [None]:
df.transform([np.square, lambda x: x-5], axis=1)

### agg (func, axis, args, kwargs)

Aggregate using one or more operations over the specified axis.

Only perform aggregating type operations.

In [None]:
d = [[12, 20, 18], [13, 14, 6], [12, 8, 19]]
name = ['Ali', 'Sara', 'Taha']
dars = ['Python', 'C++', 'Java']

In [None]:
df = pd.DataFrame(d, name, dars); df

In [None]:
df.agg('min')

In [None]:
df.T.agg('min')

In [None]:
df.agg('min', axis=1)

In [None]:
df.agg(['min'])

In [None]:
df.agg(['min'], axis=1)

In [None]:
df.agg(['min', 'max', 'sum', 'average'])

In [None]:
df.agg(['min', 'max', 'sum', 'average'], axis=1)

### combine (other, func, fill_value)

Perform column-wise combine with another DataFrame.

In [None]:
f = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2
df1 = pd.DataFrame({'A': [5, 0], 'B': [2, 6]})
df2 = pd.DataFrame({'A': [1, 1], 'B': [4, 5]})

In [None]:
display(df1, df2)

In [None]:
df1.combine(df2, f)

In [None]:
df1.combine(df2, np.minimum)

In [None]:
#---------------

In [None]:
f = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2
df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]})
df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})

In [None]:
display(df1, df2)

In [None]:
df1.combine(df2, f, fill_value=1)

In [None]:
df1.combine(df2, f, fill_value=3)

### groupby (by, axis, level, group_keys, dropna)

In [None]:
df = pd.DataFrame({'Brand': ['BMW', 'BMW', 'Benz', 'Benz'], 'Max Speed': [220, 180, 230, 200]}); df

In [None]:
df.groupby('Brand').apply(lambda x: x)

In [None]:
df.groupby('Brand').describe()

In [None]:
df.groupby(['Brand']).max()

In [None]:
df.set_index('Brand', inplace=True); df

In [None]:
df.groupby(['Brand']).max()

In [None]:
df.groupby(level=0).max()

..........

In [None]:
l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
df = pd.DataFrame(l, columns=["a", "b", "c"]); df

In [None]:
df.groupby('b').apply(lambda x: x)

In [None]:
df.groupby('b', dropna=False).apply(lambda x: x)

In [None]:
df.groupby("b").sum()

In [None]:
df.groupby(by="b", dropna=False).sum()

In [None]:
#---------------

In [None]:
l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
df = pd.DataFrame(l, columns=["a", "b", "c"]); df

In [None]:
df.groupby("a").sum()

In [None]:
df.groupby("a", dropna=False).sum()

..........

In [None]:
d = {'key1' : ['ali', 'ali', 'ali', 'sara', 'sara', 'sara', 'sara'],
     'key2' : ['one', 'one', 'two', 'one', 'one', 'two', 'two'],
     'data' : [12, 16, 13, 20, 8, 17, 10]}

In [None]:
df = pd.DataFrame(d); df

In [None]:
df.groupby('key1').apply(lambda x: x)

In [None]:
df.groupby('key1').describe()

In [None]:
df.groupby('key1').max()

In [None]:
df.groupby('key1').max()[['data']]

In [None]:
df.groupby(['key1', 'key2']).apply(lambda x: x)

In [None]:
df.groupby(['key1', 'key2']).max()

In [None]:
#---------------

In [None]:
df

In [None]:
df['key1']

In [None]:
df['data']

In [None]:
df['data'].groupby(df['key1']).apply(lambda x: x)

In [None]:
df.groupby(df['key1']).apply(lambda x: x)['data']

In [None]:
df[['data']]

In [None]:
df[['data']].groupby(df['key1']).apply(lambda x: x)

In [None]:
df.groupby(df['key1']).apply(lambda x: x)[['data']]

In [None]:
df['data'].groupby(df['key1']).max()

In [None]:
df[['data']].groupby(df['key1']).max()

In [None]:
df.groupby(df['key1']).max()[['data']]

In [None]:
#---------------

In [None]:
df

In [None]:
df[df['key1']=='ali']

In [None]:
l = list(df.groupby('key1'))
display(l, l[0][1])

In [None]:
d = dict(list(df.groupby('key1')))
display(d, d['ali'])

..........

In [None]:
a = [5, 6, 7, 8, 9, 10, 11, 12, 13]
b = [15, 16, 17, 18, 19, 20, 21, 22, 23]

In [None]:
df = pd.DataFrame({'col1': a, 'col2': b}); df

In [None]:
q = pd.cut(df['col1'], 4); q

In [None]:
df.groupby(q, observed=True).apply(lambda x: x)

In [None]:
df['col1'].groupby(q, observed=True).apply(lambda x: x)

In [None]:
df['col2'].groupby(q, observed=True).apply(lambda x: x)

In [None]:
df[['col2']].groupby(q, observed=True).apply(lambda x: x)

In [None]:
#---------------

In [None]:
f = lambda g: [g.max(), g.count()]

In [None]:
df['col2'].groupby(q, observed=True).apply(f) 

In [None]:
#---------------

In [None]:
myfunc = lambda g: {'max': g.max(), 'count': g.count()}

In [None]:
df['col2'].groupby(q, observed=True).apply(myfunc) 

In [None]:
df['col2'].groupby(q, observed=True).apply(myfunc).unstack()

In [None]:
df['col2'].groupby(q, observed=True).agg(['max', 'count'])

..........

In [None]:
n = ['ali', 'ali', 'ali', 'ali', 'sara', 'sara', 'sara', 'taha', 'taha']
s = [11, 20, 13, 14, 15, 6, 12, 18, 19]

In [None]:
df = pd.DataFrame({'name': n, 'score': s}); df

In [None]:
df.groupby('name').apply(lambda x: x)

In [None]:
df.groupby('name').score.apply(lambda x: x)

In [None]:
df.groupby('name')['score'].apply(lambda x: x)

In [None]:
df.groupby('name')[['score']].apply(lambda x: x)

In [None]:
df.groupby('name').apply(lambda x: x)[['score']]

In [None]:
#---------------

In [None]:
df.groupby('name').max()

In [None]:
df.groupby('name').score.max()

In [None]:
df.groupby('name')['score'].max()

In [None]:
df.groupby('name')[['score']].max()

transform

In [None]:
df

In [None]:
df.groupby('name').count()

In [None]:
df.groupby('name').apply('count')

In [None]:
df.groupby('name').transform('count')

In [None]:
#---------------

In [None]:
df.groupby('name').transform('max')

In [None]:
df.groupby('name')[['score']].transform('max')

In [None]:
df.groupby('name')['score'].transform('max')

In [None]:
#---------------

In [None]:
df.groupby('name').transform(lambda x: x.max())

In [None]:
df.groupby('name').transform(lambda x: x)

In [None]:
df.groupby('name').transform(lambda x: x - 1)

In [None]:
#---------------

In [None]:
g = df.groupby('name')['score']
(df['score'] - g.transform('mean')) / g.transform('std')

In [None]:
g = df.groupby('name')['score']
(g.transform(lambda x: x) - g.transform('mean')) / g.transform('std')

...........

In [None]:
n = ['ali', 'ali', 'ali', 'ali', 'sara', 'sara', 'sara', 'taha', 'taha']
r = [7, 4, 9, 2, 4, 5, 1, 8, 9]
s = [11, 20, 13, 14, 15, 6, 12, 18, 19]

In [None]:
df = pd.DataFrame({'name': n, 'rank': r, 'score': s}); df

In [None]:
df.groupby('name').apply(lambda x: x)

In [None]:
df.groupby('name').score.apply(lambda x: x)

In [None]:
df.groupby('name')['score'].apply(lambda x: x)

In [None]:
df.groupby('name')[['score']].apply(lambda x: x)

In [None]:
df.groupby('name')[['score']].sum()

In [None]:
df.groupby('name')['rank'].apply(lambda x: x)

In [None]:
df.groupby('name')[['rank']].apply(lambda x: x)

In [None]:
df.groupby('name')[['rank', 'score']].apply(lambda x: x)

In [None]:
#---------------

In [None]:
df.groupby('name').transform('max')

In [None]:
df.groupby('name')['rank'].transform('max')

In [None]:
df.groupby('name')[['rank']].transform('max')

In [None]:
df.groupby('name')[['rank']].transform('mean')

In [None]:
df

In [None]:
df.groupby('name')[['rank']].transform(lambda x: x + 10)

In [None]:
df[['rank']] = df.groupby('name')[['rank']].transform(lambda x: x + 10); df

In [None]:
#---------------

In [None]:
df

In [None]:
df.groupby('name').min()

In [None]:
df.groupby('name').agg('min')

In [None]:
df.groupby('name').apply(lambda x: x.min())

In [None]:
df.groupby('name').transform('min')

...........

In [None]:
df = pd.DataFrame({'Bird' : ['A', 'A', 'B', 'B', 'B'],'Speed' : [380, 370, 24, 26,np.nan]}); df

In [None]:
df.groupby('Bird').mean()

In [None]:
df.groupby('Bird')['Speed'].apply(lambda x: x.fillna(x.mean()))

In [None]:
df.groupby('Bird')['Speed'].transform(lambda x: x.fillna(x.mean()))

In [None]:
df['Speed'] = df.groupby('Bird')['Speed'].transform(lambda x: x.fillna(x.mean())); df

...........

In [None]:
import seaborn as sns
df = sns.load_dataset('iris'); df

In [None]:
df.groupby(['species']).apply(lambda x: x)

In [None]:
df.groupby(['species']).apply(lambda x: x[:3])

In [None]:
#---------------

In [None]:
f = lambda x, n=3: x[:n]

In [None]:
df.groupby(['species']).apply(f)

In [None]:
df.groupby(['species']).apply(f, 2)

In [None]:
#---------------

In [None]:
df.groupby(['species']).min()

In [None]:
df.groupby(['species']).agg('min')

In [None]:
df.groupby(['species']).apply(lambda x: x.min())

In [None]:
df.groupby(['species']).transform('min')

In [None]:
#---------------

In [None]:
df

In [None]:
df.sort_values(by='sepal_length') 

In [None]:
df.sort_values(by='sepal_length')[:6] 

In [None]:
f = lambda frame, n=3: frame.sort_values(by='sepal_length')[:n]     

In [None]:
f(df, 8)

In [None]:
df.groupby(['species']).apply(f)

In [None]:
df.groupby(['species']).apply(f, 5)

MultiIndex

In [None]:
l = [['BMW','BMW','Benz','Benz'], ['A','B','A','B']]
mi = pd.MultiIndex.from_arrays(l, names=('Brand', 'Class'))
df = pd.DataFrame({'Max Speed': [220, 180, 230, 200]}, index=mi); df

In [None]:
df.groupby(level=0).apply(lambda x: x)

In [None]:
df.groupby(level=1).apply(lambda x: x)

In [None]:
df.groupby(level=0).describe()

In [None]:
df.groupby(level=0).mean()

In [None]:
#---------------

In [None]:
df.reset_index(('Brand', 'Class'), inplace=True); df

In [None]:
df.groupby(['Brand', 'Class']).apply(lambda x: x)

In [None]:
df.groupby('Brand').apply(lambda x: x)

In [None]:
df.groupby("Class").describe()

In [None]:
df.groupby("Class").max()

In [None]:
# MultiColumns

In [None]:
arr = np.array([[11, 12, 16, 4, 15],[17, 2, 18, 19, 10],[7, 15, 13, 14, 11],[8, 17, 13, 20, 12]])
ci = pd.MultiIndex.from_arrays([[1, 2, 3, 1, 2],['Ali', 'Ali', 'Ali', 'Sara', 'Sara']],names=['X', 'Y'])

In [None]:
df = pd.DataFrame(arr, columns=ci); df

In [None]:
df.groupby(level='Y', axis=1).max()

In [None]:
df.T

In [None]:
df.T.groupby(level='Y').max()

In [None]:
df.T.groupby(level='Y').max().T