In [1]:
import pandas

In [2]:
weather = pandas.read_csv('BrisbaneWeather.csv', names=['Month', 'MinTemp', 'MaxTemp', 'Rainfall'], header=0, index_col='Month')
weather

Unnamed: 0_level_0,MinTemp,MaxTemp,Rainfall
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
January,20.7,29.4,159.6
February,20.6,29.0,158.3
March,19.4,28.0,140.7
April,16.6,26.1,92.5
May,13.3,23.2,73.7
June,10.9,20.9,67.8
July,9.5,20.4,56.5
August,10.3,21.8,45.9
September,12.9,24.0,45.7
October,15.8,26.1,75.4


In [3]:
# Show summary statistics for all numeric columns
weather.describe()

Unnamed: 0,MinTemp,MaxTemp,Rainfall
count,12.0,12.0,12.0
mean,15.658333,25.483333,95.533333
std,4.159427,3.317675,42.258411
min,9.5,20.4,45.7
25%,12.4,22.85,64.975
50%,16.2,26.1,83.95
75%,19.5,28.25,135.15
max,20.7,29.4,159.6


In [4]:
# Or just find the mean of the MinTemp column
weather.MinTemp.mean()

15.658333333333333

In [5]:
# Or just find the standard deviation of the MinTemp column
weather.MinTemp.std()

4.159427043876756

In [6]:
# Or just find the max of the MinTemp column
weather.MinTemp.max()

20.7

In [7]:
# Or just find the 25% quantile of the MinTemp column
weather.MinTemp.quantile(0.25)

12.4

In [8]:
# Or just find the 10% quantile of the MinTemp column
weather.MinTemp.quantile(0.1)

10.360000000000001

In [9]:
# Or just find the sum of the MinTemp column
weather.MinTemp.sum()

187.9

In [10]:
# Or just find the mean of the MinTemp column
weather.MinTemp.mean()

15.658333333333333

In [11]:
# Or the mean of all numeric columns
weather.mean()

MinTemp     15.658333
MaxTemp     25.483333
Rainfall    95.533333
dtype: float64

In [12]:
# Or the means for the columns listed
weather[['MinTemp','MaxTemp']].mean()

MinTemp    15.658333
MaxTemp    25.483333
dtype: float64

In [13]:
# Or a list of different summary operations of each of the columns listed
# Note that min and max are built in functions, but not mean  
weather[['MinTemp','MaxTemp']].aggregate([min, max, mean])

NameError: name 'mean' is not defined

In [14]:
# The mean function we want to apply in this case is from pandas.Series 
weather[['MinTemp','MaxTemp']].aggregate([min, max, pandas.Series.mean])

Unnamed: 0,MinTemp,MaxTemp
min,9.5,20.4
max,20.7,29.4
mean,15.658333,25.483333


In [15]:
# Or we could just specify the name of the aggregate function we want to apply as a string ('mean' in this example)
weather[['MinTemp','MaxTemp']].aggregate([min, max, 'mean'])

Unnamed: 0,MinTemp,MaxTemp
min,9.5,20.4
max,20.7,29.4
mean,15.658333,25.483333


In [16]:
# Specifying the string names for all three aggregate functions
weather[['MinTemp','MaxTemp']].aggregate(['min', 'max', 'mean'])

Unnamed: 0,MinTemp,MaxTemp
min,9.5,20.4
max,20.7,29.4
mean,15.658333,25.483333


In [17]:
# We can use a dictionary to specify which aggregate functions to apply to each of our columns
weather.agg({'MinTemp': min, 'MaxTemp': max, 'Rainfall': sum})

MinTemp        9.5
MaxTemp       29.4
Rainfall    1146.4
dtype: float64

In [18]:
# The dictionary can also specify a list of aggregate functions to  apply to each column
weather.agg({'MinTemp': [min, max], 'MaxTemp': max, 'Rainfall': sum})

Unnamed: 0,MinTemp,MaxTemp,Rainfall
min,9.5,,
max,20.7,29.4,
sum,,,1146.4


In [19]:
# See BrisbaneWeather.xlsx for how to column mean and sum for each columns

In [20]:
# Try creating some examples of your own ...