# Slicing and Indexing DataFrames
## 1.0 Explicit indexes

In [70]:
# import libraries
import numpy as np
import pandas as pd

In [71]:
# load the dataframe
temperatures = pd.read_csv("temperatures.csv", index_col = 0)

In [72]:
# inspect the dataframe's head
temperatures.head()

Unnamed: 0,date,city,country,avg_temp_c
0,2000-01-01,Abidjan,Côte D'Ivoire,27.293
1,2000-02-01,Abidjan,Côte D'Ivoire,27.685
2,2000-03-01,Abidjan,Côte D'Ivoire,29.061
3,2000-04-01,Abidjan,Côte D'Ivoire,28.162
4,2000-05-01,Abidjan,Côte D'Ivoire,27.547


In [73]:
# inspect the dataframe's information
temperatures.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 16500 entries, 0 to 16499
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   date        16500 non-null  object 
 1   city        16500 non-null  object 
 2   country     16500 non-null  object 
 3   avg_temp_c  16407 non-null  float64
dtypes: float64(1), object(3)
memory usage: 644.5+ KB


### 1.1 Setting and removing indexes

To set an existing column as an index, use  .set_index("col_name")

To remove the index, use    .reset_index( )

To remove and drop the index from the dataFrame, use    .reset_index(drop = True)

In [74]:
# Set the index of temperatures to "city", assigning to temperatures_ind.
temperatures_ind = temperatures.set_index("city")
temperatures_ind.head()

Unnamed: 0_level_0,date,country,avg_temp_c
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Abidjan,2000-01-01,Côte D'Ivoire,27.293
Abidjan,2000-02-01,Côte D'Ivoire,27.685
Abidjan,2000-03-01,Côte D'Ivoire,29.061
Abidjan,2000-04-01,Côte D'Ivoire,28.162
Abidjan,2000-05-01,Côte D'Ivoire,27.547


In [75]:
# Reset the index of temperatures_ind, keeping its contents.
temp_reset = temperatures_ind.reset_index()
temp_reset.head()

Unnamed: 0,city,date,country,avg_temp_c
0,Abidjan,2000-01-01,Côte D'Ivoire,27.293
1,Abidjan,2000-02-01,Côte D'Ivoire,27.685
2,Abidjan,2000-03-01,Côte D'Ivoire,29.061
3,Abidjan,2000-04-01,Côte D'Ivoire,28.162
4,Abidjan,2000-05-01,Côte D'Ivoire,27.547


In [76]:
# Reset the index of temperatures_ind, dropping its contents.
temp_drop_index = temperatures_ind.reset_index(drop = True)
temp_drop_index.head()

Unnamed: 0,date,country,avg_temp_c
0,2000-01-01,Côte D'Ivoire,27.293
1,2000-02-01,Côte D'Ivoire,27.685
2,2000-03-01,Côte D'Ivoire,29.061
3,2000-04-01,Côte D'Ivoire,28.162
4,2000-05-01,Côte D'Ivoire,27.547


### 1.2 Subsetting with .loc[ ]
.loc[ ]: a subsetting method that accepts index values. When you pass it a single argument, it will take a subset of rows.

In [77]:
# Create a list called cities that contains "Moscow" and "Saint Petersburg".
cities = ["Moscow", "Saint Petersburg"]

# Use [] subsetting to filter temperatures for rows where the city column takes a value in the cities list.
# Subset temperatures using square brackets
temperatures[temperatures["city"].isin(cities)]


Unnamed: 0,date,city,country,avg_temp_c
10725,2000-01-01,Moscow,Russia,-7.313
10726,2000-02-01,Moscow,Russia,-3.551
10727,2000-03-01,Moscow,Russia,-1.661
10728,2000-04-01,Moscow,Russia,10.096
10729,2000-05-01,Moscow,Russia,10.357
...,...,...,...,...
13360,2013-05-01,Saint Petersburg,Russia,12.355
13361,2013-06-01,Saint Petersburg,Russia,17.185
13362,2013-07-01,Saint Petersburg,Russia,17.234
13363,2013-08-01,Saint Petersburg,Russia,17.153


In [78]:
# Use .loc[] subsetting to filter temperatures_ind for rows where the city is in the cities list.
temperatures_ind.loc[cities]

Unnamed: 0_level_0,date,country,avg_temp_c
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Moscow,2000-01-01,Russia,-7.313
Moscow,2000-02-01,Russia,-3.551
Moscow,2000-03-01,Russia,-1.661
Moscow,2000-04-01,Russia,10.096
Moscow,2000-05-01,Russia,10.357
...,...,...,...
Saint Petersburg,2013-05-01,Russia,12.355
Saint Petersburg,2013-06-01,Russia,17.185
Saint Petersburg,2013-07-01,Russia,17.234
Saint Petersburg,2013-08-01,Russia,17.153


### 1.3 Setting multi level indexes a.k.a hierachical indexes
Indexes can also be made out of multiple columns, forming a multi-level index (sometimes called a hierarchical index). There is a trade-off to using these.

The benefit is that multi-level indexes make it more natural to reason about nested categorical variables. In the temperature dataset, the city is located in the country, so we can say a city is nested inside the country.

The main downside is that the code for manipulating indexes is different from the code for manipulating columns, so you have to learn two syntaxes and keep track of how your data is represented.



In [79]:
# Set the index of temperatures to the "country" and "city" columns, and assign this to temperatures_ind.
# Index temperatures by country & city
temperatures_ind = temperatures.set_index(["country", "city"])
temperatures_ind


Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1
Côte D'Ivoire,Abidjan,2000-01-01,27.293
Côte D'Ivoire,Abidjan,2000-02-01,27.685
Côte D'Ivoire,Abidjan,2000-03-01,29.061
Côte D'Ivoire,Abidjan,2000-04-01,28.162
Côte D'Ivoire,Abidjan,2000-05-01,27.547
...,...,...,...
China,Xian,2013-05-01,18.979
China,Xian,2013-06-01,23.522
China,Xian,2013-07-01,25.251
China,Xian,2013-08-01,24.528


In [80]:
# Specify two country/city pairs to keep: "Brazil"/"Rio De Janeiro" and "Pakistan"/"Lahore", assigning to rows_to_keep.
# List of tuples: Brazil, Rio De Janeiro & Pakistan, Lahore
rows_to_keep = [("Brazil", "Rio De Janeiro"),("Pakistan", "Lahore")]

# Subset for rows to keep
temperatures_ind.loc[rows_to_keep]



Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1
Brazil,Rio De Janeiro,2000-01-01,25.974
Brazil,Rio De Janeiro,2000-02-01,26.699
Brazil,Rio De Janeiro,2000-03-01,26.270
Brazil,Rio De Janeiro,2000-04-01,25.750
Brazil,Rio De Janeiro,2000-05-01,24.356
...,...,...,...
Pakistan,Lahore,2013-05-01,33.457
Pakistan,Lahore,2013-06-01,34.456
Pakistan,Lahore,2013-07-01,33.279
Pakistan,Lahore,2013-08-01,31.511


### 1.4 Sorting by index values
 use    .sort_index( ) to sort dataframes based on the index

 By default, sort_index( ) sorts all index values from outer to inner in ascending order.

 You can control the sorting by passing lists to the level and then the ascending arguments.

In [81]:
# Sort temperatures_ind by the index values.
temperatures_ind.sort_index()


Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,Kabul,2000-01-01,3.326
Afghanistan,Kabul,2000-02-01,3.454
Afghanistan,Kabul,2000-03-01,9.612
Afghanistan,Kabul,2000-04-01,17.925
Afghanistan,Kabul,2000-05-01,24.658
...,...,...,...
Zimbabwe,Harare,2013-05-01,18.298
Zimbabwe,Harare,2013-06-01,17.020
Zimbabwe,Harare,2013-07-01,16.299
Zimbabwe,Harare,2013-08-01,19.232


In [82]:
# Sort temperatures_ind by the index values at the "city" level.
temperatures_ind.sort_index(level = "city")

Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1
Côte D'Ivoire,Abidjan,2000-01-01,27.293
Côte D'Ivoire,Abidjan,2000-02-01,27.685
Côte D'Ivoire,Abidjan,2000-03-01,29.061
Côte D'Ivoire,Abidjan,2000-04-01,28.162
Côte D'Ivoire,Abidjan,2000-05-01,27.547
...,...,...,...
China,Xian,2013-05-01,18.979
China,Xian,2013-06-01,23.522
China,Xian,2013-07-01,25.251
China,Xian,2013-08-01,24.528


In [83]:
# Sort temperatures_ind by ascending country then descending city.
temperatures_ind.sort_index(level = ["country", "city"], ascending = [True, False])

Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,Kabul,2000-01-01,3.326
Afghanistan,Kabul,2000-02-01,3.454
Afghanistan,Kabul,2000-03-01,9.612
Afghanistan,Kabul,2000-04-01,17.925
Afghanistan,Kabul,2000-05-01,24.658
...,...,...,...
Zimbabwe,Harare,2013-05-01,18.298
Zimbabwe,Harare,2013-06-01,17.020
Zimbabwe,Harare,2013-07-01,16.299
Zimbabwe,Harare,2013-08-01,19.232


## 2.0 Slicing and subsetting with .loc( ) and iloc( )
### 2.1 Slicing index values

Slicing lets you select consecutive elements of an object using first:last syntax. DataFrames can be sliced by index values or by row/column number. Slicing by index values involves slicing inside the .loc[] method.

Compared to slicing lists, there are a few things to remember.

You can only slice an index if the index is sorted using .sort_index()).

To slice at the outer level, first and last can be strings.

To slice at inner levels, first and last should be tuples.

If you pass a single slice to .loc[ ], it will slice the rows.



In [84]:
# Sort the index of temperatures_ind
temperatures_srt = temperatures_ind.sort_index()

#slicing the outer index
# Subset rows from Pakistan to Russia
temperatures_srt.loc["Pakistan":"Russia"]



Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1
Pakistan,Faisalabad,2000-01-01,12.792
Pakistan,Faisalabad,2000-02-01,14.339
Pakistan,Faisalabad,2000-03-01,20.309
Pakistan,Faisalabad,2000-04-01,29.072
Pakistan,Faisalabad,2000-05-01,34.845
...,...,...,...
Russia,Saint Petersburg,2013-05-01,12.355
Russia,Saint Petersburg,2013-06-01,17.185
Russia,Saint Petersburg,2013-07-01,17.234
Russia,Saint Petersburg,2013-08-01,17.153


In [85]:
#slicing the inner index
# Subset rows from Pakistan, Lahore to Russia, Moscow
print(temperatures_srt.loc[("Pakistan", "Lahore"):("Russia","Moscow")])

                       date  avg_temp_c
country  city                          
Pakistan Lahore  2000-01-01      12.792
         Lahore  2000-02-01      14.339
         Lahore  2000-03-01      20.309
         Lahore  2000-04-01      29.072
         Lahore  2000-05-01      34.845
...                     ...         ...
Russia   Moscow  2013-05-01      16.152
         Moscow  2013-06-01      18.718
         Moscow  2013-07-01      18.136
         Moscow  2013-08-01      17.485
         Moscow  2013-09-01         NaN

[660 rows x 2 columns]


### 2.2 slicing both directions


In [86]:
# Use .loc[] slicing to subset rows from India, Hyderabad to Iraq, Baghdad.
temperatures_srt.loc[("India", "Hyderabad"):("Iraq", "Baghdad")]

Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1
India,Hyderabad,2000-01-01,23.779
India,Hyderabad,2000-02-01,25.826
India,Hyderabad,2000-03-01,28.821
India,Hyderabad,2000-04-01,32.698
India,Hyderabad,2000-05-01,32.438
...,...,...,...
Iraq,Baghdad,2013-05-01,28.673
Iraq,Baghdad,2013-06-01,33.803
Iraq,Baghdad,2013-07-01,36.392
Iraq,Baghdad,2013-08-01,35.463


In [87]:
# Use .loc[] slicing to subset columns from date to avg_temp_c
temperatures_srt.loc[:, "date":"avg_temp_c"]

Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,Kabul,2000-01-01,3.326
Afghanistan,Kabul,2000-02-01,3.454
Afghanistan,Kabul,2000-03-01,9.612
Afghanistan,Kabul,2000-04-01,17.925
Afghanistan,Kabul,2000-05-01,24.658
...,...,...,...
Zimbabwe,Harare,2013-05-01,18.298
Zimbabwe,Harare,2013-06-01,17.020
Zimbabwe,Harare,2013-07-01,16.299
Zimbabwe,Harare,2013-08-01,19.232


In [88]:
# Slice in both directions at once from Hyderabad to Baghdad, and date to avg_temp_c
temperatures_srt.loc[("India", "Hyderabad"):("Iraq", "Baghdad"), "date":"avg_temp_c"]

Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1
India,Hyderabad,2000-01-01,23.779
India,Hyderabad,2000-02-01,25.826
India,Hyderabad,2000-03-01,28.821
India,Hyderabad,2000-04-01,32.698
India,Hyderabad,2000-05-01,32.438
...,...,...,...
Iraq,Baghdad,2013-05-01,28.673
Iraq,Baghdad,2013-06-01,33.803
Iraq,Baghdad,2013-07-01,36.392
Iraq,Baghdad,2013-08-01,35.463


### 2.3 Slicing time series
Add the date column to the index, then use .loc[ ] to perform the subsetting. The important thing to remember is to keep your dates in ISO 8601 format, that is, "yyyy-mm-dd" for year-month-day, "yyyy-mm" for year-month, and "yyyy" for year.

Combine multiple Boolean conditions using logical operators, such as &. To do so in one line of code, you'll need to add parentheses () around each condition.

In [89]:
# Use Boolean conditions, not .isin() or .loc[], and the full date "yyyy-mm-dd", 
# to subset temperatures for rows in 2010 and 2011


temperatures_bool = temperatures[(temperatures["date"] >= "2010-01-01") & (temperatures["date"] <= "2011-12-31" )]
temperatures_bool


Unnamed: 0,date,city,country,avg_temp_c
120,2010-01-01,Abidjan,Côte D'Ivoire,28.270
121,2010-02-01,Abidjan,Côte D'Ivoire,29.262
122,2010-03-01,Abidjan,Côte D'Ivoire,29.596
123,2010-04-01,Abidjan,Côte D'Ivoire,29.068
124,2010-05-01,Abidjan,Côte D'Ivoire,28.258
...,...,...,...,...
16474,2011-08-01,Xian,China,23.069
16475,2011-09-01,Xian,China,16.775
16476,2011-10-01,Xian,China,12.587
16477,2011-11-01,Xian,China,7.543


In [90]:
# Set date as the index and sort the index
temperatures_ind = temperatures.set_index("date").sort_index()

# Use .loc[] to subset temperatures_ind for rows in 2010 and 2011
temperatures_ind.loc["2010":"2011"]

Unnamed: 0_level_0,city,country,avg_temp_c
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,Faisalabad,Pakistan,11.810
2010-01-01,Melbourne,Australia,20.016
2010-01-01,Chongqing,China,7.921
2010-01-01,São Paulo,Brazil,23.738
2010-01-01,Guangzhou,China,14.136
...,...,...,...
2010-12-01,Jakarta,Indonesia,26.602
2010-12-01,Gizeh,Egypt,16.530
2010-12-01,Nagpur,India,19.120
2010-12-01,Sydney,Australia,19.559


In [91]:
# Use .loc[] to subset temperatures_ind for rows from Aug 2010 to Feb 2011
temperatures_ind.loc["2010-08":"2011-02"]

Unnamed: 0_level_0,city,country,avg_temp_c
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-08-01,Calcutta,India,30.226
2010-08-01,Pune,India,24.941
2010-08-01,Izmir,Turkey,28.352
2010-08-01,Tianjin,China,25.543
2010-08-01,Manila,Philippines,27.101
...,...,...,...
2011-01-01,Dar Es Salaam,Tanzania,28.541
2011-01-01,Nairobi,Kenya,17.768
2011-01-01,Addis Abeba,Ethiopia,17.708
2011-01-01,Nanjing,China,0.144


### 2.4 Subsetting by rows / columns
iloc[ ] uses number indexes to subset dataframes

In [92]:
# Get 23rd row, 2nd column (index 22, 1)
temperatures.iloc[22,1]

'Abidjan'

In [93]:
# Use slicing to get the first 5 rows
temperatures.iloc[0:5, :]

Unnamed: 0,date,city,country,avg_temp_c
0,2000-01-01,Abidjan,Côte D'Ivoire,27.293
1,2000-02-01,Abidjan,Côte D'Ivoire,27.685
2,2000-03-01,Abidjan,Côte D'Ivoire,29.061
3,2000-04-01,Abidjan,Côte D'Ivoire,28.162
4,2000-05-01,Abidjan,Côte D'Ivoire,27.547


In [94]:
# Get all rows, columns 3 and 4 (index positions 2 to 4).
# Use slicing to get columns 3 to 4
temperatures.iloc[:, 2:4]

Unnamed: 0,country,avg_temp_c
0,Côte D'Ivoire,27.293
1,Côte D'Ivoire,27.685
2,Côte D'Ivoire,29.061
3,Côte D'Ivoire,28.162
4,Côte D'Ivoire,27.547
...,...,...
16495,China,18.979
16496,China,23.522
16497,China,25.251
16498,China,24.528


In [95]:
# Get the first 5 rows, columns 3 and 4.
# Use slicing in both directions at once
temperatures.iloc[0:5, 2:4]

Unnamed: 0,country,avg_temp_c
0,Côte D'Ivoire,27.293
1,Côte D'Ivoire,27.685
2,Côte D'Ivoire,29.061
3,Côte D'Ivoire,28.162
4,Côte D'Ivoire,27.547


## 3.0 Working with pivot tables
### 3.1 creating pivot tables

In [96]:
# Add a year column to temperatures, from the year component of the date column.
# Make a pivot table of the avg_temp_c column, with country and city as rows, and year as columns. 
# Assign to temp_by_country_city_vs_year

# view the temperatures dataframe
temperatures.head()

Unnamed: 0,date,city,country,avg_temp_c
0,2000-01-01,Abidjan,Côte D'Ivoire,27.293
1,2000-02-01,Abidjan,Côte D'Ivoire,27.685
2,2000-03-01,Abidjan,Côte D'Ivoire,29.061
3,2000-04-01,Abidjan,Côte D'Ivoire,28.162
4,2000-05-01,Abidjan,Côte D'Ivoire,27.547


In [103]:
# Add a year column to temperatures
temperatures["year"] = pd.DatetimeIndex(temperatures['date']).year
temperatures

Unnamed: 0,date,city,country,avg_temp_c,year
0,2000-01-01,Abidjan,Côte D'Ivoire,27.293,2000
1,2000-02-01,Abidjan,Côte D'Ivoire,27.685,2000
2,2000-03-01,Abidjan,Côte D'Ivoire,29.061,2000
3,2000-04-01,Abidjan,Côte D'Ivoire,28.162,2000
4,2000-05-01,Abidjan,Côte D'Ivoire,27.547,2000
...,...,...,...,...,...
16495,2013-05-01,Xian,China,18.979,2013
16496,2013-06-01,Xian,China,23.522,2013
16497,2013-07-01,Xian,China,25.251,2013
16498,2013-08-01,Xian,China,24.528,2013


In [105]:
# Pivot avg_temp_c by country and city vs year
temp_by_country_city_vs_year = temperatures.pivot_table("avg_temp_c", index = ["country", "city"], columns = "year")
temp_by_country_city_vs_year


Unnamed: 0_level_0,year,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Afghanistan,Kabul,15.822667,15.847917,15.714583,15.132583,16.128417,14.847500,15.798500,15.518000,15.479250,15.093333,15.676000,15.812167,14.510333,16.206125
Angola,Luanda,24.410333,24.427083,24.790917,24.867167,24.216167,24.414583,24.138417,24.241583,24.266333,24.325083,24.440250,24.150750,24.240083,24.553875
Australia,Melbourne,14.320083,14.180000,14.075833,13.985583,13.742083,14.378500,13.991083,14.991833,14.110583,14.647417,14.231667,14.190917,14.268667,14.741500
Australia,Sydney,17.567417,17.854500,17.733833,17.592333,17.869667,18.028083,17.749500,18.020833,17.321083,18.175833,17.999000,17.713333,17.474333,18.089750
Bangladesh,Dhaka,25.905250,25.931250,26.095000,25.927417,26.136083,26.193333,26.440417,25.951333,26.004500,26.535583,26.648167,25.803250,26.283583,26.587000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
United States,Chicago,11.089667,11.703083,11.532083,10.481583,10.943417,11.583833,11.870500,11.448333,10.242417,10.298333,11.815917,11.214250,12.821250,11.586889
United States,Los Angeles,16.643333,16.466250,16.430250,16.944667,16.552833,16.431417,16.623083,16.699917,17.014750,16.677000,15.887000,15.874833,17.089583,18.120667
United States,New York,9.969083,10.931000,11.252167,9.836000,10.389500,10.681417,11.519250,10.627333,10.641667,10.141833,11.357583,11.272250,11.971500,12.163889
Vietnam,Ho Chi Minh City,27.588917,27.831750,28.064750,27.827667,27.686583,27.884000,28.044000,27.866667,27.611417,27.853333,28.281750,27.675417,28.248750,28.455000


### 3.2 Subsetting pivot tables
A pivot table is just a DataFrame with sorted indexes. Use the .loc[ ] + slicing combination

In [106]:
# Subset for Egypt to India
temp_by_country_city_vs_year.loc["Egypt":"India"]

Unnamed: 0_level_0,year,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Egypt,Alexandria,20.7445,21.454583,21.456167,21.221417,21.064167,21.082333,21.148167,21.50775,21.739,21.6705,22.459583,21.1815,21.552583,21.4385
Egypt,Cairo,21.486167,22.330833,22.414083,22.1705,22.081917,22.0065,22.05,22.361,22.6445,22.625,23.71825,21.986917,22.48425,22.907
Egypt,Gizeh,21.486167,22.330833,22.414083,22.1705,22.081917,22.0065,22.05,22.361,22.6445,22.625,23.71825,21.986917,22.48425,22.907
Ethiopia,Addis Abeba,18.24125,18.296417,18.46975,18.320917,18.29275,18.312833,18.427083,18.142583,18.165,18.765333,18.29825,18.60675,18.448583,19.539
France,Paris,11.739667,11.37125,11.871333,11.9095,11.338833,11.552917,11.7885,11.750833,11.27825,11.464083,10.409833,12.32575,11.219917,11.011625
Germany,Berlin,10.963667,9.69025,10.264417,10.06575,9.822583,9.919083,10.545333,10.883167,10.65775,10.0625,8.606833,10.556417,9.964333,10.1215
India,Ahmadabad,27.436,27.198083,27.719083,27.403833,27.628333,26.828083,27.282833,27.511167,27.0485,28.095833,28.017833,27.290417,27.02725,27.608625
India,Bangalore,25.337917,25.528167,25.755333,25.92475,25.252083,25.4765,25.41825,25.464333,25.352583,25.72575,25.70525,25.362083,26.042333,26.6105
India,Bombay,27.203667,27.243667,27.628667,27.578417,27.31875,27.03575,27.3815,27.634667,27.17775,27.8445,27.765417,27.384917,27.1925,26.713
India,Calcutta,26.491333,26.515167,26.703917,26.561333,26.634333,26.729167,26.98625,26.584583,26.522333,27.15325,27.288833,26.406917,26.935083,27.36925


In [107]:
# Subset for Egypt, Cairo to India, Delhi
temp_by_country_city_vs_year.loc[("Egypt", "Cairo"):("India", "Delhi")]

Unnamed: 0_level_0,year,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Egypt,Cairo,21.486167,22.330833,22.414083,22.1705,22.081917,22.0065,22.05,22.361,22.6445,22.625,23.71825,21.986917,22.48425,22.907
Egypt,Gizeh,21.486167,22.330833,22.414083,22.1705,22.081917,22.0065,22.05,22.361,22.6445,22.625,23.71825,21.986917,22.48425,22.907
Ethiopia,Addis Abeba,18.24125,18.296417,18.46975,18.320917,18.29275,18.312833,18.427083,18.142583,18.165,18.765333,18.29825,18.60675,18.448583,19.539
France,Paris,11.739667,11.37125,11.871333,11.9095,11.338833,11.552917,11.7885,11.750833,11.27825,11.464083,10.409833,12.32575,11.219917,11.011625
Germany,Berlin,10.963667,9.69025,10.264417,10.06575,9.822583,9.919083,10.545333,10.883167,10.65775,10.0625,8.606833,10.556417,9.964333,10.1215
India,Ahmadabad,27.436,27.198083,27.719083,27.403833,27.628333,26.828083,27.282833,27.511167,27.0485,28.095833,28.017833,27.290417,27.02725,27.608625
India,Bangalore,25.337917,25.528167,25.755333,25.92475,25.252083,25.4765,25.41825,25.464333,25.352583,25.72575,25.70525,25.362083,26.042333,26.6105
India,Bombay,27.203667,27.243667,27.628667,27.578417,27.31875,27.03575,27.3815,27.634667,27.17775,27.8445,27.765417,27.384917,27.1925,26.713
India,Calcutta,26.491333,26.515167,26.703917,26.561333,26.634333,26.729167,26.98625,26.584583,26.522333,27.15325,27.288833,26.406917,26.935083,27.36925
India,Delhi,26.048333,25.862917,26.634333,25.721083,26.239917,25.716083,26.365917,26.145667,25.675,26.55425,26.52025,25.6295,25.889417,26.70925


In [108]:
# Subset in both directions at once
temp_by_country_city_vs_year.loc[("Egypt","Cairo"):("India", "Delhi"), 2005:2010]

Unnamed: 0_level_0,year,2005,2006,2007,2008,2009,2010
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Egypt,Cairo,22.0065,22.05,22.361,22.6445,22.625,23.71825
Egypt,Gizeh,22.0065,22.05,22.361,22.6445,22.625,23.71825
Ethiopia,Addis Abeba,18.312833,18.427083,18.142583,18.165,18.765333,18.29825
France,Paris,11.552917,11.7885,11.750833,11.27825,11.464083,10.409833
Germany,Berlin,9.919083,10.545333,10.883167,10.65775,10.0625,8.606833
India,Ahmadabad,26.828083,27.282833,27.511167,27.0485,28.095833,28.017833
India,Bangalore,25.4765,25.41825,25.464333,25.352583,25.72575,25.70525
India,Bombay,27.03575,27.3815,27.634667,27.17775,27.8445,27.765417
India,Calcutta,26.729167,26.98625,26.584583,26.522333,27.15325,27.288833
India,Delhi,25.716083,26.365917,26.145667,25.675,26.55425,26.52025


### 3.3 Calculating on a pivot table


In [109]:
# view the head of the pivot table
temp_by_country_city_vs_year.head()

Unnamed: 0_level_0,year,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Afghanistan,Kabul,15.822667,15.847917,15.714583,15.132583,16.128417,14.8475,15.7985,15.518,15.47925,15.093333,15.676,15.812167,14.510333,16.206125
Angola,Luanda,24.410333,24.427083,24.790917,24.867167,24.216167,24.414583,24.138417,24.241583,24.266333,24.325083,24.44025,24.15075,24.240083,24.553875
Australia,Melbourne,14.320083,14.18,14.075833,13.985583,13.742083,14.3785,13.991083,14.991833,14.110583,14.647417,14.231667,14.190917,14.268667,14.7415
Australia,Sydney,17.567417,17.8545,17.733833,17.592333,17.869667,18.028083,17.7495,18.020833,17.321083,18.175833,17.999,17.713333,17.474333,18.08975
Bangladesh,Dhaka,25.90525,25.93125,26.095,25.927417,26.136083,26.193333,26.440417,25.951333,26.0045,26.535583,26.648167,25.80325,26.283583,26.587


In [112]:
# Calculate the mean temperature for each year, assigning to mean_temp_by_year.
# Get the worldwide mean temp by year
mean_temp_by_year = temp_by_country_city_vs_year.mean()
mean_temp_by_year


year
2000    19.506243
2001    19.679352
2002    19.855685
2003    19.630197
2004    19.672204
2005    19.607239
2006    19.793993
2007    19.854270
2008    19.608778
2009    19.833752
2010    19.911734
2011    19.549197
2012    19.668239
2013    20.312285
dtype: float64

In [114]:
# Filter mean_temp_by_year for the year that had the highest mean temperature.
mean_temp_by_year[mean_temp_by_year == mean_temp_by_year.max()]

year
2013    20.312285
dtype: float64

In [115]:
# Calculate the mean temperature for each city (across columns), assigning to mean_temp_by_city.
# Get the mean temp by city
mean_temp_by_city = temp_by_country_city_vs_year.mean(axis="columns")
mean_temp_by_city

country        city            
Afghanistan    Kabul               15.541955
Angola         Luanda              24.391616
Australia      Melbourne           14.275411
               Sydney              17.799250
Bangladesh     Dhaka               26.174440
                                     ...    
United States  Chicago             11.330825
               Los Angeles         16.675399
               New York            10.911034
Vietnam        Ho Chi Minh City    27.922857
Zimbabwe       Harare              20.699000
Length: 100, dtype: float64

In [116]:
# Filter mean_temp_by_city for the city that had the lowest mean temperature.
mean_temp_by_city[mean_temp_by_city == mean_temp_by_city.min()]

country  city  
China    Harbin    4.876551
dtype: float64