# Pandas: Filtering and Grouping


In [1]:
import pandas as pd


In [2]:
# Load data

bom = pd.read_excel('../../data/BOM_VIC_20210323.xlsx')

# Remove unused column
bom = bom.drop(['Station'], axis=1)

# Remove 2021 values for consistency
bom = bom[:-82]

## Filtering

In [4]:
temp_filter = bom['Maximum'] >= 45

bom[temp_filter]

Unnamed: 0,Year,Month,Day,Rainfall,Maximum,Minimum
30692,1939,1,13,0.0,45.6,16.9
56277,2009,1,30,0.0,45.1,25.7
56285,2009,2,7,0.0,46.4,18.7


In [5]:
rainfall_filter = bom['Rainfall'] > 100
bom[rainfall_filter]

Unnamed: 0,Year,Month,Day,Rainfall,Maximum,Minimum
39474,1963,1,29,108.0,21.2,16.1
54820,2005,2,3,113.4,19.5,11.0


In [7]:
temp_filter = bom['Maximum'] >= 30
rainfall_filter = bom['Rainfall'] >= 10
bom[temp_filter & rainfall_filter]

Unnamed: 0,Year,Month,Day,Rainfall,Maximum,Minimum
438,1856,3,14,10.7,31.0,20.3
1146,1858,2,20,34.5,34.7,11.7
2577,1862,1,21,10.9,32.3,13.9
3286,1863,12,31,19.1,30.0,16.8
4100,1866,3,24,13.2,30.0,16.3
5141,1869,1,28,15.5,36.7,16.7
6185,1871,12,8,16.5,31.7,15.5
7347,1875,2,12,19.6,34.1,15.8
8081,1877,2,15,13.7,31.0,17.3
12419,1889,1,1,26.4,31.1,20.0


## Grouping

In [9]:
# Mean monthly temperature
bom.columns

Index(['Year', 'Month', 'Day', 'Rainfall', 'Maximum', 'Minimum'], dtype='object')

In [11]:
mean_temperature = bom[['Year', 'Month', 'Maximum', 'Minimum']].groupby([bom['Year'], bom['Month']]).mean()
mean_temperature[-24:]

Unnamed: 0_level_0,Unnamed: 1_level_0,Year,Month,Maximum,Minimum
Year,Month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019,1,2019,1,28.377419,17.477419
2019,2,2019,2,26.007143,15.789286
2019,3,2019,3,24.76129,15.229032
2019,4,2019,4,22.046667,11.966667
2019,5,2019,5,17.819355,10.619355
2019,6,2019,6,15.473333,8.28
2019,7,2019,7,15.0,8.222581
2019,8,2019,8,14.474194,7.4
2019,9,2019,9,17.756667,9.083333
2019,10,2019,10,21.325806,10.719355


In [12]:
# Total rainfall per month
total_rainfall = bom[['Year', 'Month','Rainfall']].groupby([bom['Year'], bom['Month']]).sum()


In [14]:
total_rainfall = bom[['Year', 'Month', 'Rainfall']].groupby([bom['Year'],bom['Month']]).agg({'Year':'first','Month':'first','Rainfall':'sum'})
total_rainfall[-24:]

Unnamed: 0_level_0,Unnamed: 1_level_0,Year,Month,Rainfall
Year,Month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019,1,2019,1,11.2
2019,2,2019,2,18.6
2019,3,2019,3,12.0
2019,4,2019,4,7.2
2019,5,2019,5,53.4
2019,6,2019,6,49.8
2019,7,2019,7,45.2
2019,8,2019,8,51.4
2019,9,2019,9,41.0
2019,10,2019,10,24.0


In [None]:
# Mean monthly total rainfall


In [None]:
# Mean monthly temperature for last 40 years

In [None]:
# Mean monthly total rainfall for last 40 years

In [None]:
# Yearly mean temperature

In [None]:
# Yearly total rainfall