# Grouping using `groupby`

In [1]:
import pandas as pd

In [2]:
city_temp = {'City' : ['Anaheim', 'Baltimore', 'Clearwater',
                       'Anaheim', 'Baltimore', 'Clearwater'],
             'Temp (C)' : [31, 21, 26, 30, 22, 27],
             'Humidity':[34, 24, 23, 24, 20, 26]}

df_city_temp = pd.DataFrame(city_temp)

In [3]:
df_city_temp

Unnamed: 0,City,Temp (C),Humidity
0,Anaheim,31,34
1,Baltimore,21,24
2,Clearwater,26,23
3,Anaheim,30,24
4,Baltimore,22,20
5,Clearwater,27,26


## 1. Display Average Temperature.

In [4]:
df_city_temp['Temp (C)'].mean()

26.166666666666668

## 2. Display Average Temperature by City

In [5]:
df_city_temp.groupby('City')['Temp (C)'].mean()

City
Anaheim       30.5
Baltimore     21.5
Clearwater    26.5
Name: Temp (C), dtype: float64

In [6]:
df_city_temp['Temp (C)'].groupby(df_city_temp['City']).mean()

City
Anaheim       30.5
Baltimore     21.5
Clearwater    26.5
Name: Temp (C), dtype: float64

## 3. Display All The Groups (Iterate Over The Groups)

In [7]:
cities = df_city_temp.groupby('City')

print(cities)

print("")

for key, value in cities:
    print(key)
    print(value)
    print()

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x11f3419d0>

Anaheim
      City  Temp (C)  Humidity
0  Anaheim        31        34
3  Anaheim        30        24

Baltimore
        City  Temp (C)  Humidity
1  Baltimore        21        24
4  Baltimore        22        20

Clearwater
         City  Temp (C)  Humidity
2  Clearwater        26        23
5  Clearwater        27        26



## 4. Display The Indices In Each Group.

In [8]:
# cities = df_city_temp.groupby('City')
cities.groups

{'Anaheim': [0, 3], 'Baltimore': [1, 4], 'Clearwater': [2, 5]}

## 5. Display The First Entry In All The Groups Formed.

In [9]:
# cities = df_city_temp.groupby('City')
cities.first()

Unnamed: 0_level_0,Temp (C),Humidity
City,Unnamed: 1_level_1,Unnamed: 2_level_1
Anaheim,31,34
Baltimore,21,24
Clearwater,26,23


In [10]:
# cities = df_city_temp.groupby('City')
cities.last()

Unnamed: 0_level_0,Temp (C),Humidity
City,Unnamed: 1_level_1,Unnamed: 2_level_1
Anaheim,30,24
Baltimore,22,20
Clearwater,27,26


## 6. Count The Number of Entries In Each Group.

In [11]:
# cities = df_city_temp.groupby('City')
cities.size()

City
Anaheim       2
Baltimore     2
Clearwater    2
dtype: int64

## 7. Display A Specific Group Out of All The Groups.

In [12]:
# cities = df_city_temp.groupby('City')
cities.get_group('Baltimore')

Unnamed: 0,City,Temp (C),Humidity
1,Baltimore,21,24
4,Baltimore,22,20


## 8. Apply Functions To Groups [e.g. min, max and mean]

In [13]:
# cities = df_city_temp.groupby('City')
cities.min()

Unnamed: 0_level_0,Temp (C),Humidity
City,Unnamed: 1_level_1,Unnamed: 2_level_1
Anaheim,30,24
Baltimore,21,20
Clearwater,26,23


In [15]:
# cities = df_city_temp.groupby('City')
cities.max()

Unnamed: 0_level_0,Temp (C),Humidity
City,Unnamed: 1_level_1,Unnamed: 2_level_1
Anaheim,31,34
Baltimore,22,24
Clearwater,27,26


In [14]:
# cities = df_city_temp.groupby('City')
cities.mean()

Unnamed: 0_level_0,Temp (C),Humidity
City,Unnamed: 1_level_1,Unnamed: 2_level_1
Anaheim,30.5,29.0
Baltimore,21.5,22.0
Clearwater,26.5,24.5


In [16]:
# cities = df_city_temp.groupby('City')
mean_temp = df_city_temp.groupby('City')['Temp (C)'].mean()

mean_temp

City
Anaheim       30.5
Baltimore     21.5
Clearwater    26.5
Name: Temp (C), dtype: float64

## 9. Display Average Temperature CityWise With Prefix Mean_Temp_.

In [17]:
# cities = df_city_temp.groupby('City')
cities['Temp (C)'].mean().add_prefix('Avg_Temp_')

City
Avg_Temp_Anaheim       30.5
Avg_Temp_Baltimore     21.5
Avg_Temp_Clearwater    26.5
Name: Temp (C), dtype: float64

In [18]:
cities['Humidity'].mean().add_prefix('Avg_Humidity_')

City
Avg_Humidity_Anaheim       29.0
Avg_Humidity_Baltimore     22.0
Avg_Humidity_Clearwater    24.5
Name: Humidity, dtype: float64

## 10. Display Descriptive Statistics GroupWise.

In [19]:
# cities = df_city_temp.groupby('City')
cities.describe()

Unnamed: 0_level_0,Temp (C),Temp (C),Temp (C),Temp (C),Temp (C),Temp (C),Temp (C),Temp (C),Humidity,Humidity,Humidity,Humidity,Humidity,Humidity,Humidity,Humidity
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
City,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
Anaheim,2.0,30.5,0.707107,30.0,30.25,30.5,30.75,31.0,2.0,29.0,7.071068,24.0,26.5,29.0,31.5,34.0
Baltimore,2.0,21.5,0.707107,21.0,21.25,21.5,21.75,22.0,2.0,22.0,2.828427,20.0,21.0,22.0,23.0,24.0
Clearwater,2.0,26.5,0.707107,26.0,26.25,26.5,26.75,27.0,2.0,24.5,2.12132,23.0,23.75,24.5,25.25,26.0


## 11. Perform Statistical Computations Using agg() Function.

In [20]:
# cities = df_city_temp.groupby('City')
cities.agg(['min', 'max', 'mean'])

Unnamed: 0_level_0,Temp (C),Temp (C),Temp (C),Humidity,Humidity,Humidity
Unnamed: 0_level_1,min,max,mean,min,max,mean
City,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Anaheim,30,31,30.5,24,34,29.0
Baltimore,21,22,21.5,20,24,22.0
Clearwater,26,27,26.5,23,26,24.5


In [21]:
cities['Temp (C)'].agg(['min', 'max', 'mean'])

Unnamed: 0_level_0,min,max,mean
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Anaheim,30,31,30.5
Baltimore,21,22,21.5
Clearwater,26,27,26.5


In [22]:
cities['Humidity'].agg(['min', 'max', 'mean'])

Unnamed: 0_level_0,min,max,mean
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Anaheim,24,34,29.0
Baltimore,20,24,22.0
Clearwater,23,26,24.5


## 12. Create A Function To Get The Statistics GroupWise.

In [None]:
# cities = df_city_temp.groupby('City')

In [23]:
def func(city):
    return {'Min' : city.min(),
            'Mean': city.mean()}

In [24]:
city_temp_apply = df_city_temp.groupby('City').apply(func)

city_temp_apply

City
Anaheim       {'Min': [30, 24], 'Mean': [30.5, 29.0]}
Baltimore     {'Min': [21, 20], 'Mean': [21.5, 22.0]}
Clearwater    {'Min': [26, 23], 'Mean': [26.5, 24.5]}
dtype: object

## 13. Nesting grouping values

In [25]:
nested_grouping = df_city_temp.groupby(['City', 'Temp (C)'])

In [26]:
nested_grouping

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x11f3dd670>

In [27]:
for key, value in nested_grouping:
    print(key)
    print(value)
    print()

('Anaheim', 30)
      City  Temp (C)  Humidity
3  Anaheim        30        24

('Anaheim', 31)
      City  Temp (C)  Humidity
0  Anaheim        31        34

('Baltimore', 21)
        City  Temp (C)  Humidity
1  Baltimore        21        24

('Baltimore', 22)
        City  Temp (C)  Humidity
4  Baltimore        22        20

('Clearwater', 26)
         City  Temp (C)  Humidity
2  Clearwater        26        23

('Clearwater', 27)
         City  Temp (C)  Humidity
5  Clearwater        27        26

