# Groupby and Duplicate Rows
We use groupby for grouping the values based on specific criteria and perform various operations like filling missing values based on the group. 

In [1]:
#Importing libraries
import pandas as pd

In [2]:
#Loading the data
df=pd.read_csv('titanic.csv',usecols=['Pclass','Age','Embarked','Cabin'])
df.head()

Unnamed: 0,Pclass,Age,Cabin,Embarked
0,3,22.0,,S
1,1,38.0,C85,C
2,3,26.0,,S
3,1,35.0,C123,S
4,3,35.0,,S


In [3]:
#sorting the values based on Pclass and Embarked
columns=['Pclass','Embarked']
df.sort_values(columns)

Unnamed: 0,Pclass,Age,Cabin,Embarked
1,1,38.0,C85,C
30,1,40.0,,C
31,1,,B78,C
34,1,28.0,,C
52,1,49.0,D33,C
...,...,...,...,...
878,3,,,S
881,3,33.0,,S
882,3,22.0,,S
884,3,25.0,,S


In [4]:
#Mean of the 'Age' column from the data
df.Age.mean()

29.69911764705882

In [5]:
#Finding mean 'Age' for each 'Embarked' using .groupby()
df.groupby('Embarked').Age.mean()

Embarked
C    30.814769
Q    28.089286
S    29.445397
Name: Age, dtype: float64

In [6]:
#Finding mean 'Age' for each 'Pclass' using .groupby()
df.groupby('Pclass').Age.mean()

Pclass
1    38.233441
2    29.877630
3    25.140620
Name: Age, dtype: float64

Grouping based on the specific value of the column

In [7]:
#Dataframe whose 'Embarked' contains only 'C'
df[df.Embarked=='C'].head()

Unnamed: 0,Pclass,Age,Cabin,Embarked
1,1,38.0,C85,C
9,2,14.0,,C
19,3,,,C
26,3,,,C
30,1,40.0,,C


In [8]:
#Maximum for each type of 'Embarked' value
df.groupby('Embarked').Age.max()

Embarked
C    71.0
Q    70.5
S    80.0
Name: Age, dtype: float64

In [9]:
#Minimum for each type of 'Embarked' value
df.groupby('Embarked').Age.min()

Embarked
C    0.42
Q    2.00
S    0.67
Name: Age, dtype: float64

In [10]:
#Getting all mean, max, min, count while grouping the 'Embarked' values
df.groupby('Embarked').Age.agg(['count','mean','max','min'])

Unnamed: 0_level_0,count,mean,max,min
Embarked,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C,130,30.814769,71.0,0.42
Q,28,28.089286,70.5,2.0
S,554,29.445397,80.0,0.67


The End