In [2]:
import pandas as pd

In [3]:
data = { 'age':[25,22,18,30,45,50,35,20,55,40],
        'gender':['M','F','F','M','M','F','M','F','M','M'],
         'score':[90,80,75,95,70,85,75,90,95,85]}
data
        

{'age': [25, 22, 18, 30, 45, 50, 35, 20, 55, 40],
 'gender': ['M', 'F', 'F', 'M', 'M', 'F', 'M', 'F', 'M', 'M'],
 'score': [90, 80, 75, 95, 70, 85, 75, 90, 95, 85]}

In [4]:
df = pd.DataFrame(data)
df

Unnamed: 0,age,gender,score
0,25,M,90
1,22,F,80
2,18,F,75
3,30,M,95
4,45,M,70
5,50,F,85
6,35,M,75
7,20,F,90
8,55,M,95
9,40,M,85


# value_counts()

In [5]:
df.gender.value_counts()

gender
M    6
F    4
Name: count, dtype: int64

In [6]:
#Relative Frequency
df['gender'].value_counts(normalize = True)

gender
M    0.6
F    0.4
Name: proportion, dtype: float64

# where()

In [7]:
df[df.where(df.age >30,other = 0).all(1)]

Unnamed: 0,age,gender,score
4,45,M,70
5,50,F,85
6,35,M,75
8,55,M,95
9,40,M,85


In [8]:
f1 = df.age > 30
f2 = df.gender == 'M'

In [9]:
df[df.where(f1 & f2,other =0).all(1)]

Unnamed: 0,age,gender,score
4,45,M,70
6,35,M,75
8,55,M,95
9,40,M,85


# isin()

In [10]:
df[df.age.isin([25,35])]

Unnamed: 0,age,gender,score
0,25,M,90
6,35,M,75


In [11]:
df[df[['age','gender']].isin({'age':[25,35],'gender': ['M']}).all(1)]

Unnamed: 0,age,gender,score
0,25,M,90
6,35,M,75


# cut and qcut

In [12]:
bin_names = ['Very Low','Low','Medium','High']
df['scoce_bins']=pd.cut(df.score,bins = [60,70,80,90,100],labels = bin_names)

In [13]:
df

Unnamed: 0,age,gender,score,scoce_bins
0,25,M,90,Medium
1,22,F,80,Low
2,18,F,75,Low
3,30,M,95,High
4,45,M,70,Very Low
5,50,F,85,Medium
6,35,M,75,Low
7,20,F,90,Medium
8,55,M,95,High
9,40,M,85,Medium


In [14]:
bin_names = ['young','mid-young','mid-adult','adult']
df['age_bins'] =pd.qcut(df.age,4,labels =bin_names)

In [15]:
df

Unnamed: 0,age,gender,score,scoce_bins,age_bins
0,25,M,90,Medium,mid-young
1,22,F,80,Low,young
2,18,F,75,Low,young
3,30,M,95,High,mid-young
4,45,M,70,Very Low,adult
5,50,F,85,Medium,adult
6,35,M,75,Low,mid-adult
7,20,F,90,Medium,young
8,55,M,95,High,adult
9,40,M,85,Medium,mid-adult


# groupby()

In [19]:
grouped = df.groupby('gender')

In [22]:
grouped['score'].mean()

gender
F    82.5
M    85.0
Name: score, dtype: float64

In [23]:
df.groupby('gender')['score'].mean()

gender
F    82.5
M    85.0
Name: score, dtype: float64

In [24]:
grouped['score'].agg(['mean','sum','count'])

Unnamed: 0_level_0,mean,sum,count
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
F,82.5,330,4
M,85.0,510,6


In [27]:
df.groupby('gender').agg({'age':'max', 'score': 'mean'})


Unnamed: 0_level_0,age,score
gender,Unnamed: 1_level_1,Unnamed: 2_level_1
F,50,82.5
M,55,85.0


# pivot Table

In [28]:
df.pivot_table(index ='gender',values ='score',aggfunc = 'mean')

Unnamed: 0_level_0,score
gender,Unnamed: 1_level_1
F,82.5
M,85.0


In [30]:
df.pivot_table(index =['gender','age'],values ='score',aggfunc = 'mean')

Unnamed: 0_level_0,Unnamed: 1_level_0,score
gender,age,Unnamed: 2_level_1
F,18,75
F,20,90
F,22,80
F,50,85
M,25,90
M,30,95
M,35,75
M,40,85
M,45,70
M,55,95


In [31]:
df.pivot_table(index ='gender',values =['score','age'],aggfunc = 'mean')

Unnamed: 0_level_0,age,score
gender,Unnamed: 1_level_1,Unnamed: 2_level_1
F,27.5,82.5
M,38.333333,85.0


In [32]:
df.pivot_table(index ='gender',values =['score','age'],aggfunc = ['sum','mean','count'])

Unnamed: 0_level_0,sum,sum,mean,mean,count,count
Unnamed: 0_level_1,age,score,age,score,age,score
gender,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
F,110,330,27.5,82.5,4,4
M,230,510,38.333333,85.0,6,6


# nlargest and nsmallest

In [33]:
df['score'].nlargest(3)

3    95
8    95
0    90
Name: score, dtype: int64

In [34]:
df.nlargest(3,'score')

Unnamed: 0,age,gender,score,scoce_bins,age_bins
3,30,M,95,High,mid-young
8,55,M,95,High,adult
0,25,M,90,Medium,mid-young


In [35]:
df['score'].nsmallest(3)

4    70
2    75
6    75
Name: score, dtype: int64

# Query

In [37]:
df.query('age > 25 and gender =="F"')

Unnamed: 0,age,gender,score,scoce_bins,age_bins
5,50,F,85,Medium,adult


# Apply

In [40]:
df['gender_lowercase'] = df.gender.apply(str.lower)
df

Unnamed: 0,age,gender,score,scoce_bins,age_bins,gender_lowercase
0,25,M,90,Medium,mid-young,m
1,22,F,80,Low,young,f
2,18,F,75,Low,young,f
3,30,M,95,High,mid-young,m
4,45,M,70,Very Low,adult,m
5,50,F,85,Medium,adult,f
6,35,M,75,Low,mid-adult,m
7,20,F,90,Medium,young,f
8,55,M,95,High,adult,m
9,40,M,85,Medium,mid-adult,m


In [43]:
df['age_Score'] = df.age.apply(lambda x : x**2)
df

Unnamed: 0,age,gender,score,scoce_bins,age_bins,gender_lowercase,age_Score
0,25,M,90,Medium,mid-young,m,625
1,22,F,80,Low,young,f,484
2,18,F,75,Low,young,f,324
3,30,M,95,High,mid-young,m,900
4,45,M,70,Very Low,adult,m,2025
5,50,F,85,Medium,adult,f,2500
6,35,M,75,Low,mid-adult,m,1225
7,20,F,90,Medium,young,f,400
8,55,M,95,High,adult,m,3025
9,40,M,85,Medium,mid-adult,m,1600
