In [1]:
import pandas as pd
import numpy as np

### Creating a series

In [2]:
data = {'Country': 'Belgium',
'Capital': 'Brussels',
'Population': 11190846}
data = pd.Series(data)
data

Country        Belgium
Capital       Brussels
Population    11190846
dtype: object

In [3]:
data = {'Country': ['Belgium', 'India', 'Brazil'],
'Capital': ['Brussels', 'New Delhi', 'Brasília'],
'Population': [11190846, 1303171035, 207847528]}
data = pd.Series(data)
data

Country                [Belgium, India, Brazil]
Capital         [Brussels, New Delhi, Brasília]
Population    [11190846, 1303171035, 207847528]
dtype: object

In [4]:
data = {'Country': ['Belgium', 'India', 'Brazil'],
'Capital': ['Brussels', 'New Delhi', 'Brasília'],
'Population': [11190846, 1303171035, 207847528]}
data = pd.DataFrame(data)
data.head()

Unnamed: 0,Country,Capital,Population
0,Belgium,Brussels,11190846
1,India,New Delhi,1303171035
2,Brazil,Brasília,207847528


### Creating a dataframe

In [5]:
data = {'Country': ['Belgium', 'India', 'Brazil'],
'Capital': ['Brussels', 'New Delhi', 'Brasília'],
'Population': [11190846, 1303171035, 207847528]}
data = pd.DataFrame(data,
columns=['Country', 'Capital', 'Population'])
data.head()

Unnamed: 0,Country,Capital,Population
0,Belgium,Brussels,11190846
1,India,New Delhi,1303171035
2,Brazil,Brasília,207847528


In [6]:
data.values

array([['Belgium', 'Brussels', 11190846],
       ['India', 'New Delhi', 1303171035],
       ['Brazil', 'Brasília', 207847528]], dtype=object)

In [7]:
data = {'Country': ['Belgium', 'India', 'Brazil'],
'Capital': ['Brussels', 'New Delhi', 'Brasília'],
'Population': [11190846, 1303171035, 207847528]}
data = pd.DataFrame(data)
data.head()

Unnamed: 0,Country,Capital,Population
0,Belgium,Brussels,11190846
1,India,New Delhi,1303171035
2,Brazil,Brasília,207847528


### Renaming Columns

In [8]:
data.rename(columns ={'Population': 'Pop'})

Unnamed: 0,Country,Capital,Pop
0,Belgium,Brussels,11190846
1,India,New Delhi,1303171035
2,Brazil,Brasília,207847528


In [9]:
data.rename(columns ={'Population': 0})

Unnamed: 0,Country,Capital,0
0,Belgium,Brussels,11190846
1,India,New Delhi,1303171035
2,Brazil,Brasília,207847528


### Reading EXCEL file

In [10]:
Temp = pd.read_excel('Temp.xlsx', sheet_name = 1)
Temp

Unnamed: 0,Name2,City2
0,Kalyan2,RCPM2
1,Vishwas2,BGLR2
2,Manoj2,ANPR2


In [11]:
Temp = pd.read_excel('Temp.xlsx', sheet_name = [0,1])
Temp

{0:       Name  City
 0   Kalyan  RCPM
 1  Vishwas  BGLR
 2    Manoj  ANPR,
 1:       Name2  City2
 0   Kalyan2  RCPM2
 1  Vishwas2  BGLR2
 2    Manoj2  ANPR2}

### Selecting Elements

In [12]:
data = [['Alex',10],['Alex',20], ['Kalyan', np.nan],['Bob',12],['Clarke',20]]
df = pd.DataFrame(data,columns=['Name','Age'],dtype=float)
print (df)

     Name   Age
0    Alex  10.0
1    Alex  20.0
2  Kalyan   NaN
3     Bob  12.0
4  Clarke  20.0


In [13]:
df.iloc[0,0]

'Alex'

In [14]:
df.iloc[0:,0]

0      Alex
1      Alex
2    Kalyan
3       Bob
4    Clarke
Name: Name, dtype: object

In [15]:
df.iloc[0,0:]

Name    Alex
Age     10.0
Name: 0, dtype: object

In [16]:
df.loc[0,'Age']

10.0

In [17]:
df.loc[0,['Name','Age']]

Name    Alex
Age     10.0
Name: 0, dtype: object

In [18]:
# df.loc[:, (df>10).any()]

### Boolean Indexing

### Don't forget brackets when using more than one conditions

In [19]:
df[(df['Name'] == "Alex") | (df['Age'] > 10)]

Unnamed: 0,Name,Age
0,Alex,10.0
1,Alex,20.0
3,Bob,12.0
4,Clarke,20.0


### Dropping rows or columns

In [20]:
df.drop(0)

Unnamed: 0,Name,Age
1,Alex,20.0
2,Kalyan,
3,Bob,12.0
4,Clarke,20.0


In [21]:
df.drop('Name', axis = 1)

Unnamed: 0,Age
0,10.0
1,20.0
2,
3,12.0
4,20.0


### Sort and Rank

In [22]:
df.sort_index()

Unnamed: 0,Name,Age
0,Alex,10.0
1,Alex,20.0
2,Kalyan,
3,Bob,12.0
4,Clarke,20.0


In [23]:
df.sort_values(by= 'Age',ascending = False)

Unnamed: 0,Name,Age
1,Alex,20.0
4,Clarke,20.0
3,Bob,12.0
0,Alex,10.0
2,Kalyan,


### Rank

####  default_rank: this is the default behaviour obtained without using any parameter.

####  max_rank: setting method = 'max' the records that have the same values are ranked using the highest rank (e.g.: since ‘cat’ and ‘dog’ are both in the 2nd and 3rd position, rank 3 is assigned.)

####  NA_bottom: choosing na_option = 'bottom', if there are records with NaN values they are placed at the bottom of the ranking.

####  pct_rank: when setting pct = True, the ranking is expressed as percentile rank.

In [24]:
df.rank()

Unnamed: 0,Name,Age
0,1.5,1.0
1,1.5,3.5
2,5.0,
3,3.0,2.0
4,4.0,3.5


In [25]:
df.rank(method = 'min')

Unnamed: 0,Name,Age
0,1.0,1.0
1,1.0,3.0
2,5.0,
3,3.0,2.0
4,4.0,3.0


In [26]:
df.rank(method = 'min', na_option = 'bottom')

Unnamed: 0,Name,Age
0,1.0,1.0
1,1.0,3.0
2,5.0,5.0
3,3.0,2.0
4,4.0,3.0


### Basic Information

In [27]:
df.shape

(5, 2)

In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Name    5 non-null      object 
 1   Age     4 non-null      float64
dtypes: float64(1), object(1)
memory usage: 208.0+ bytes


In [29]:
df.count()

Name    5
Age     4
dtype: int64

### Summary

In [30]:
df.sum()

Name    AlexAlexKalyanBobClarke
Age                        62.0
dtype: object

In [31]:
df.cumsum()

Unnamed: 0,Name,Age
0,Alex,10.0
1,AlexAlex,30.0
2,AlexAlexKalyan,
3,AlexAlexKalyanBob,42.0
4,AlexAlexKalyanBobClarke,62.0


In [32]:
df.min()

Name    Alex
Age     10.0
dtype: object

In [33]:
df.max()

Name    Kalyan
Age       20.0
dtype: object

In [34]:
idx = pd.MultiIndex.from_arrays([

    ['warm', 'warm', 'cold', 'cold'],

    ['dog', 'falcon', 'fish', 'spider']],

    names=['blooded', 'animal'])

s = pd.Series([4, 2, 0, 8], name='legs', index=idx)
s

blooded  animal
warm     dog       4
         falcon    2
cold     fish      0
         spider    8
Name: legs, dtype: int64

In [35]:
s.max()

8

In [36]:
s.max(level = 'blooded')

blooded
warm    4
cold    8
Name: legs, dtype: int64

In [37]:
s.max(level = 'animal')

animal
dog       4
falcon    2
fish      0
spider    8
Name: legs, dtype: int64

In [38]:
df['Age'].idxmin()

0

In [39]:
df['Age'].idxmax()

1

In [40]:
df.describe()

Unnamed: 0,Age
count,4.0
mean,15.5
std,5.259911
min,10.0
25%,11.5
50%,16.0
75%,20.0
max,20.0


In [41]:
df.mean()

Age    15.5
dtype: float64

In [42]:
df.median()

Age    16.0
dtype: float64

### Applying Functions

In [43]:
f = lambda x: x**2

#### To apply function row wise

In [44]:
df['age_sq'] = df['Age'].apply(f)
df.head()

Unnamed: 0,Name,Age,age_sq
0,Alex,10.0,100.0
1,Alex,20.0,400.0
2,Kalyan,,
3,Bob,12.0,144.0
4,Clarke,20.0,400.0


In [45]:
# df.apply(f)

In [46]:
df['age_sq'] = df['Age'].map(f)
df.head()

Unnamed: 0,Name,Age,age_sq
0,Alex,10.0,100.0
1,Alex,20.0,400.0
2,Kalyan,,
3,Bob,12.0,144.0
4,Clarke,20.0,400.0


#### First major difference: DEFINITION

####     map is defined on Series ONLY
####     applymap is defined on DataFrames ONLY
####     apply is defined on BOTH


### Handling Missing Values

### Operating on Null Values

In [47]:
df.isnull()

Unnamed: 0,Name,Age,age_sq
0,False,False,False
1,False,False,False
2,False,True,True
3,False,False,False
4,False,False,False


In [48]:
df.notnull()

Unnamed: 0,Name,Age,age_sq
0,True,True,True
1,True,True,True
2,True,False,False
3,True,True,True
4,True,True,True


### Dropping null or none values

In [49]:
df.dropna()

Unnamed: 0,Name,Age,age_sq
0,Alex,10.0,100.0
1,Alex,20.0,400.0
3,Bob,12.0,144.0
4,Clarke,20.0,400.0


In [50]:
df.dropna(how = 'all')

Unnamed: 0,Name,Age,age_sq
0,Alex,10.0,100.0
1,Alex,20.0,400.0
2,Kalyan,,
3,Bob,12.0,144.0
4,Clarke,20.0,400.0


In [51]:
df.dropna(axis = 1)

Unnamed: 0,Name
0,Alex
1,Alex
2,Kalyan
3,Bob
4,Clarke


### Filling null values

In [52]:
df.fillna(0)

Unnamed: 0,Name,Age,age_sq
0,Alex,10.0,100.0
1,Alex,20.0,400.0
2,Kalyan,0.0,0.0
3,Bob,12.0,144.0
4,Clarke,20.0,400.0


In [53]:
df.fillna(method = 'ffill')

Unnamed: 0,Name,Age,age_sq
0,Alex,10.0,100.0
1,Alex,20.0,400.0
2,Kalyan,20.0,400.0
3,Bob,12.0,144.0
4,Clarke,20.0,400.0


In [54]:
df.fillna(method = 'bfill')

Unnamed: 0,Name,Age,age_sq
0,Alex,10.0,100.0
1,Alex,20.0,400.0
2,Kalyan,12.0,144.0
3,Bob,12.0,144.0
4,Clarke,20.0,400.0


In [55]:
df.fillna(value = df.mean())

Unnamed: 0,Name,Age,age_sq
0,Alex,10.0,100.0
1,Alex,20.0,400.0
2,Kalyan,15.5,261.0
3,Bob,12.0,144.0
4,Clarke,20.0,400.0


In [56]:
df.fillna(value = {'Age':df['Age'].mean()})

Unnamed: 0,Name,Age,age_sq
0,Alex,10.0,100.0
1,Alex,20.0,400.0
2,Kalyan,15.5,
3,Bob,12.0,144.0
4,Clarke,20.0,400.0


In [57]:
df.interpolate(method = 'linear')

Unnamed: 0,Name,Age,age_sq
0,Alex,10.0,100.0
1,Alex,20.0,400.0
2,Kalyan,16.0,272.0
3,Bob,12.0,144.0
4,Clarke,20.0,400.0


### Methods of MultiIndex Creation

In [58]:
df_mi = pd.DataFrame(np.random.rand(4, 2),
index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
columns=['data1', 'data2'])
df_mi.index.names = ['state', 'year']
df_mi.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,data1,data2
state,year,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0.980149,0.009838
a,2,0.45623,0.194718
b,1,0.744739,0.812603
b,2,0.621665,0.517168


In [59]:
df_mi.max(level = 0)

Unnamed: 0_level_0,data1,data2
state,Unnamed: 1_level_1,Unnamed: 2_level_1
a,0.980149,0.194718
b,0.744739,0.812603


In [60]:
df_mi.max(level = 1)

Unnamed: 0_level_0,data1,data2
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.980149,0.812603
2,0.621665,0.517168


### Indexing and Slicing a MultiIndex

In [61]:
data = {('California', 2000): 33871648,
('California', 2010): 37253956,
('Texas', 2000): 20851820,
('Texas', 2010): 25145561,
('New York', 2000): 18976457,
('New York', 2010): 19378102}
pop = pd.Series(data)

In [62]:
pop['California']

2000    33871648
2010    37253956
dtype: int64

In [63]:
pop[['California']]

California  2000    33871648
            2010    37253956
dtype: int64

In [64]:
area = pd.Series({'California': 423967, 'Texas': 695662,
'New York': 141297, 'Florida': 170312,
'Illinois': 149995})
pop = pd.Series({'California': 38332521, 'Texas': 26448193,
'New York': 19651127, 'Florida': 19552860,
'Illinois': 12882135})
data = pd.DataFrame({'area':area, 'pop':pop})
data.head()

Unnamed: 0,area,pop
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127
Florida,170312,19552860
Illinois,149995,12882135


In [65]:
data['density'] = data['pop'] / data['area']

In [66]:
data.loc[data.density > 100, ['pop', 'density']]

Unnamed: 0,pop,density
New York,19651127,139.076746
Florida,19552860,114.806121


In [67]:
data[data.density > 100][['pop', 'density']]

Unnamed: 0,pop,density
New York,19651127,139.076746
Florida,19552860,114.806121


### Combining datasets

In [68]:
ser1 = pd.Series(['A', 'B', 'C'], index=[1, 2, 3])
ser2 = pd.Series(['D', 'E', 'F'], index=[4, 5, 6])
pd.concat([ser1, ser2])

1    A
2    B
3    C
4    D
5    E
6    F
dtype: object

In [69]:
df1 = pd.DataFrame({'employee': ['Bob', 'Jake', 'Lisa', 'Sue'],
'group': ['Accounting', 'Engineering', 'Engineering', 'HR']})
df2 = pd.DataFrame({'employee': ['Lisa', 'Jake','Kal', 'Kum'],
'hire_date': [2004, 2008, 2012, 2016]})
df1

Unnamed: 0,employee,group
0,Bob,Accounting
1,Jake,Engineering
2,Lisa,Engineering
3,Sue,HR


In [70]:
df2

Unnamed: 0,employee,hire_date
0,Lisa,2004
1,Jake,2008
2,Kal,2012
3,Kum,2016


In [71]:
df3 = pd.merge(df1, df2)
df3

Unnamed: 0,employee,group,hire_date
0,Jake,Engineering,2008
1,Lisa,Engineering,2004


In [72]:
df3 = pd.merge(df1, df2, how='outer', on ='employee')
df3

Unnamed: 0,employee,group,hire_date
0,Bob,Accounting,
1,Jake,Engineering,2008.0
2,Lisa,Engineering,2004.0
3,Sue,HR,
4,Kal,,2012.0
5,Kum,,2016.0


In [73]:
df3 = pd.merge(df1, df2, how='left', on ='employee')
df3

Unnamed: 0,employee,group,hire_date
0,Bob,Accounting,
1,Jake,Engineering,2008.0
2,Lisa,Engineering,2004.0
3,Sue,HR,


In [74]:
df3 = pd.merge(df1, df2, how='right', on ='employee')
df3

Unnamed: 0,employee,group,hire_date
0,Lisa,Engineering,2004
1,Jake,Engineering,2008
2,Kal,,2012
3,Kum,,2016


### Group by

In [117]:
df = pd.DataFrame({'key': ['A', 'B', 'C', 'A', 'B', 'C'],
'data': range(6)}, columns=['key', 'data'])
df

Unnamed: 0,key,data
0,A,0
1,B,1
2,C,2
3,A,3
4,B,4
5,C,5


In [118]:
df.groupby(by= 'key').sum().reset_index()

Unnamed: 0,key,data
0,A,3
1,B,5
2,C,7


In [119]:
df.groupby(by= 'key')['data']

<pandas.core.groupby.generic.SeriesGroupBy object at 0x0000029C4F087700>

In [120]:
df.groupby(by= 'key')['data'].sum().reset_index()

Unnamed: 0,key,data
0,A,3
1,B,5
2,C,7


In [121]:
dffff = df.groupby(by= 'key').sum().reset_index()
dffff[dffff['data'] == dffff['data'].max()]['key'].values[0]

'C'

### Mulitple aggregation functions with group by

In [122]:
df.groupby(by= 'key').aggregate(['min', 'max'])

Unnamed: 0_level_0,data,data
Unnamed: 0_level_1,min,max
key,Unnamed: 1_level_2,Unnamed: 2_level_2
A,0,3
B,1,4
C,2,5


In [123]:
df.groupby(by= 'key').aggregate(['min', 'max']).reset_index()

Unnamed: 0_level_0,key,data,data
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max
0,A,0,3
1,B,1,4
2,C,2,5


In [124]:
# df4.groupby(level=0).agg({'a':lambda x:sum(x)/len(x),
# 'b': np.sum})

In [127]:
df.groupby(by= 'key').agg({'data':[lambda x:sum(x)/len(x), 'max']})

Unnamed: 0_level_0,data,data
Unnamed: 0_level_1,<lambda_0>,max
key,Unnamed: 1_level_2,Unnamed: 2_level_2
A,1.5,3
B,2.5,4
C,3.5,5


### Pivot tables

In [84]:
import seaborn as sns
titanic = sns.load_dataset('titanic')

#### The difference between pivot tables and GroupBy can sometimes cause confusion; it helps me to think of pivot tables as
#### essentially a multidimensional version of GroupBy aggregation. That is, you split-apply-combine, but both the split and the combine happen across not
#### an onedimensional index, but across a two-dimensional grid.

## pivot tables as essentially a multi-dimensional version of GroupBy aggregation

In [85]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [86]:
titanic.pivot_table(values = 'survived', index='sex', columns='class')

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [87]:
titanic.pivot_table(values = 'survived', index='sex', columns='class', aggfunc = 'max')

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,1,1,1
male,1,1,1


In [88]:
titanic.pivot_table(values = 'survived', index='sex', columns='class', aggfunc = 'count')

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,94,76,144
male,122,108,347


In [89]:
titanic.pivot_table(values = 'survived', index='sex', columns='class', aggfunc = {'survived':['mean', 'median']})

Unnamed: 0_level_0,mean,mean,mean,median,median,median
class,First,Second,Third,First,Second,Third
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
female,0.968085,0.921053,0.5,1.0,1.0,0.5
male,0.368852,0.157407,0.135447,0.0,0.0,0.0


### Pivot table using Group by

In [90]:
titanic.groupby(['sex', 'class'])['survived'].aggregate('mean').unstack()

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [91]:
titanic.groupby(['sex', 'class'])['survived'].aggregate(['mean', 'median']).unstack()

Unnamed: 0_level_0,mean,mean,mean,median,median,median
class,First,Second,Third,First,Second,Third
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
female,0.968085,0.921053,0.5,1.0,1.0,0.5
male,0.368852,0.157407,0.135447,0.0,0.0,0.0


In [92]:
titanic.groupby(['sex', 'class']).aggregate({'survived':['mean', 'median']}).unstack()

Unnamed: 0_level_0,survived,survived,survived,survived,survived,survived
Unnamed: 0_level_1,mean,mean,mean,median,median,median
class,First,Second,Third,First,Second,Third
sex,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
female,0.968085,0.921053,0.5,1.0,1.0,0.5
male,0.368852,0.157407,0.135447,0.0,0.0,0.0


In [93]:
titanic.groupby(['sex', 'class'])['survived'].mean().unstack()

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


## Pandas String operations

In [94]:
monte = pd.Series(['Graham Chapman', 'John Cleese', 'Terry Gilliam',
'Eric Idle', 'Terry Jones', 'Michael Palin'])

In [95]:
monte.str.lower()

0    graham chapman
1       john cleese
2     terry gilliam
3         eric idle
4       terry jones
5     michael palin
dtype: object

In [96]:
# monte.lower()

In [97]:
monte.str.isnumeric()

0    False
1    False
2    False
3    False
4    False
5    False
dtype: bool

In [98]:
monte.str.split()

0    [Graham, Chapman]
1       [John, Cleese]
2     [Terry, Gilliam]
3         [Eric, Idle]
4       [Terry, Jones]
5     [Michael, Palin]
dtype: object

In [99]:
monte.str.extract('([A-Za-z]+)')

Unnamed: 0,0
0,Graham
1,John
2,Terry
3,Eric
4,Terry
5,Michael


In [100]:
monte.str.findall(r'^[^AEIOU].*[^aeiou]$')

0    [Graham Chapman]
1                  []
2     [Terry Gilliam]
3                  []
4       [Terry Jones]
5     [Michael Palin]
dtype: object

### Reshaping data sets

In [101]:
df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},

                   'B': {0: 1, 1: 3, 2: 5},

                   'C': {0: 2, 1: 4, 2: 6}})
df

Unnamed: 0,A,B,C
0,a,1,2
1,b,3,4
2,c,5,6


In [102]:
df.melt(id_vars = 'A', value_vars = ['B', 'C'])

Unnamed: 0,A,variable,value
0,a,B,1
1,b,B,3
2,c,B,5
3,a,C,2
4,b,C,4
5,c,C,6


### How to handle catergorical varibles

In [103]:
df = pd.DataFrame({'A': ['a', 'b', 'c', 'a'],

                   'C': [1, 2, 3,4]})
df

Unnamed: 0,A,C
0,a,1
1,b,2
2,c,3
3,a,4


In [104]:
pd.get_dummies(df, drop_first = True)

Unnamed: 0,C,A_b,A_c
0,1,0,0
1,2,1,0
2,3,0,1
3,4,0,0


### Pandas QUantile

In [105]:
df = pd.DataFrame({"A":[1, 5, 3, 4, 2],
                   "B":[3, 2, 4, 3, 4],
                   "C":[2, 2, 7, 3, 4], 
                   "D":[4, 3, 6, 12, 7]})
df

Unnamed: 0,A,B,C,D
0,1,3,2,4
1,5,2,2,3
2,3,4,7,6
3,4,3,3,12
4,2,4,4,7


In [106]:
df.quantile(0.75)

A    4.0
B    4.0
C    4.0
D    7.0
Name: 0.75, dtype: float64