# <center>Grouping Dataframes</center>

In [1]:
import pandas as pd
import numpy as np

In [2]:
premier_league = pd.read_excel("premier_league_games_full.xlsx")

In [3]:
premier_league.head(10)

Unnamed: 0,id,league_name,season,HomeTeam,AwayTeam,HomeGoals,AwayGoals
0,1729,England Premier League,2008/2009,Manchester United,Newcastle United,1,1
1,1730,England Premier League,2008/2009,Arsenal,West Bromwich Albion,1,0
2,1731,England Premier League,2008/2009,Sunderland,Liverpool,0,1
3,1732,England Premier League,2008/2009,West Ham United,Wigan Athletic,2,1
4,1733,England Premier League,2008/2009,Aston Villa,Manchester City,4,2
5,1734,England Premier League,2008/2009,Everton,Blackburn Rovers,2,3
6,1735,England Premier League,2008/2009,Middlesbrough,Tottenham Hotspur,2,1
7,1736,England Premier League,2008/2009,Bolton Wanderers,Stoke City,3,1
8,1737,England Premier League,2008/2009,Hull City,Fulham,2,1
9,1738,England Premier League,2008/2009,Chelsea,Portsmouth,4,0


In [4]:
premier_league["HomeGoals"].mean()

1.550986842105263

In [5]:
premier_league[["HomeGoals","AwayGoals"]].mean().round(2)

HomeGoals    1.55
AwayGoals    1.16
dtype: float64

In [6]:
premier_league.loc[:,["HomeGoals","AwayGoals"]].mean()

HomeGoals    1.550987
AwayGoals    1.159539
dtype: float64

In [7]:
premier_league["league_name"].unique()

array(['England Premier League'], dtype=object)

In [8]:
premier_league.loc[:,["HomeGoals","AwayGoals"]].std()

HomeGoals    1.311615
AwayGoals    1.144629
dtype: float64

### Grouping Dataframe allow you to aggregate the data at a different level

In [9]:
retail = pd.read_csv("retail_2016_2017.csv")

In [10]:
retail

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
0,1945944,2016-01-01,1,AUTOMOTIVE,0.000,0
1,1945945,2016-01-01,1,BABY CARE,0.000,0
2,1945946,2016-01-01,1,BEAUTY,0.000,0
3,1945947,2016-01-01,1,BEVERAGES,0.000,0
4,1945948,2016-01-01,1,BOOKS,0.000,0
...,...,...,...,...,...,...
1054939,3000883,2017-08-15,9,POULTRY,438.133,0
1054940,3000884,2017-08-15,9,PREPARED FOODS,154.553,1
1054941,3000885,2017-08-15,9,PRODUCE,2419.729,148
1054942,3000886,2017-08-15,9,SCHOOL AND OFFICE SUPPLIES,121.000,8


### To group data use the .groupby() method and specify a column to group by

In [11]:
retail.groupby("family")["sales"].mean().round(2).head()

family
AUTOMOTIVE       7.07
BABY CARE        0.25
BEAUTY           5.20
BEVERAGES     3306.44
BOOKS            0.20
Name: sales, dtype: float64

In [12]:
#using double bracket creates a dataframe
retail_family = retail.groupby("family")[["sales"]].mean().round(2)


In [13]:
retail_family.head()

Unnamed: 0_level_0,sales
family,Unnamed: 1_level_1
AUTOMOTIVE,7.07
BABY CARE,0.25
BEAUTY,5.2
BEVERAGES,3306.44
BOOKS,0.2


In [14]:
premier_league.groupby("HomeTeam")[["HomeGoals"]].mean().head()

Unnamed: 0_level_0,HomeGoals
HomeTeam,Unnamed: 1_level_1
Arsenal,2.013158
Aston Villa,1.177632
Birmingham City,1.0
Blackburn Rovers,1.289474
Blackpool,1.578947


In [15]:
premier_league.head()

Unnamed: 0,id,league_name,season,HomeTeam,AwayTeam,HomeGoals,AwayGoals
0,1729,England Premier League,2008/2009,Manchester United,Newcastle United,1,1
1,1730,England Premier League,2008/2009,Arsenal,West Bromwich Albion,1,0
2,1731,England Premier League,2008/2009,Sunderland,Liverpool,0,1
3,1732,England Premier League,2008/2009,West Ham United,Wigan Athletic,2,1
4,1733,England Premier League,2008/2009,Aston Villa,Manchester City,4,2


In [16]:
premier_league.groupby("season")["HomeGoals"].mean().sort_values(ascending=False)

season
2009/2010    1.697368
2010/2011    1.623684
2011/2012    1.589474
2013/2014    1.573684
2012/2013    1.557895
2015/2016    1.492105
2014/2015    1.473684
2008/2009    1.400000
Name: HomeGoals, dtype: float64

In [17]:
#You can groupby multiple columns by passing list of columns into groupby()
retail.head(10)

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
0,1945944,2016-01-01,1,AUTOMOTIVE,0.0,0
1,1945945,2016-01-01,1,BABY CARE,0.0,0
2,1945946,2016-01-01,1,BEAUTY,0.0,0
3,1945947,2016-01-01,1,BEVERAGES,0.0,0
4,1945948,2016-01-01,1,BOOKS,0.0,0
5,1945949,2016-01-01,1,BREAD/BAKERY,0.0,0
6,1945950,2016-01-01,1,CELEBRATION,0.0,0
7,1945951,2016-01-01,1,CLEANING,0.0,0
8,1945952,2016-01-01,1,DAIRY,0.0,0
9,1945953,2016-01-01,1,DELI,0.0,0


In [18]:
retail.groupby(["family","store_nbr"])[["sales"]].sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,sales
family,store_nbr,Unnamed: 2_level_1
AUTOMOTIVE,1,2524.000000
AUTOMOTIVE,2,3918.000000
AUTOMOTIVE,3,6790.000000
AUTOMOTIVE,4,2565.000000
AUTOMOTIVE,5,3667.000000
...,...,...
SEAFOOD,50,12773.966999
SEAFOOD,51,34250.948976
SEAFOOD,52,1219.475999
SEAFOOD,53,3745.180001


In [20]:
retail_sums = retail.groupby(["family","store_nbr"],as_index=False)[["sales"]].sum()

In [21]:
retail_sums

Unnamed: 0,family,store_nbr,sales
0,AUTOMOTIVE,1,2524.000000
1,AUTOMOTIVE,2,3918.000000
2,AUTOMOTIVE,3,6790.000000
3,AUTOMOTIVE,4,2565.000000
4,AUTOMOTIVE,5,3667.000000
...,...,...,...
1777,SEAFOOD,50,12773.966999
1778,SEAFOOD,51,34250.948976
1779,SEAFOOD,52,1219.475999
1780,SEAFOOD,53,3745.180001


In [22]:
premier_league.head(10)

Unnamed: 0,id,league_name,season,HomeTeam,AwayTeam,HomeGoals,AwayGoals
0,1729,England Premier League,2008/2009,Manchester United,Newcastle United,1,1
1,1730,England Premier League,2008/2009,Arsenal,West Bromwich Albion,1,0
2,1731,England Premier League,2008/2009,Sunderland,Liverpool,0,1
3,1732,England Premier League,2008/2009,West Ham United,Wigan Athletic,2,1
4,1733,England Premier League,2008/2009,Aston Villa,Manchester City,4,2
5,1734,England Premier League,2008/2009,Everton,Blackburn Rovers,2,3
6,1735,England Premier League,2008/2009,Middlesbrough,Tottenham Hotspur,2,1
7,1736,England Premier League,2008/2009,Bolton Wanderers,Stoke City,3,1
8,1737,England Premier League,2008/2009,Hull City,Fulham,2,1
9,1738,England Premier League,2008/2009,Chelsea,Portsmouth,4,0


In [27]:
premier_league.groupby("HomeTeam")[["HomeGoals"]].sum().head()

Unnamed: 0_level_0,HomeGoals
HomeTeam,Unnamed: 1_level_1
Arsenal,306
Aston Villa,179
Birmingham City,38
Blackburn Rovers,98
Blackpool,30


In [30]:
season_home = premier_league.groupby(["season","HomeTeam"],as_index=False)[["HomeGoals"]].sum()

In [31]:
season_home

Unnamed: 0,season,HomeTeam,HomeGoals
0,2008/2009,Arsenal,31
1,2008/2009,Aston Villa,27
2,2008/2009,Blackburn Rovers,22
3,2008/2009,Bolton Wanderers,21
4,2008/2009,Chelsea,33
...,...,...,...
155,2015/2016,Swansea City,20
156,2015/2016,Tottenham Hotspur,35
157,2015/2016,Watford,20
158,2015/2016,West Bromwich Albion,20


In [33]:
season_home[season_home["HomeTeam"]=="Arsenal"]

Unnamed: 0,season,HomeTeam,HomeGoals
0,2008/2009,Arsenal,31
20,2009/2010,Arsenal,48
40,2010/2011,Arsenal,33
60,2011/2012,Arsenal,39
80,2012/2013,Arsenal,47
100,2013/2014,Arsenal,36
120,2014/2015,Arsenal,41
140,2015/2016,Arsenal,31
