In [41]:
import pandas as pd

In [42]:
drinks = pd.read_csv("http://bit.ly/drinksbycountry")

In [43]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [44]:
# Note in above 0,1,2 are indexes and 'country', 'beer_servings' are columns

In [45]:
drinks.shape

(193, 6)

In [46]:
drinks.loc[3,'beer_servings']

245

In [47]:
drinks.index

RangeIndex(start=0, stop=193, step=1)

In [48]:
drinks.columns

Index(['country', 'beer_servings', 'spirit_servings', 'wine_servings',
       'total_litres_of_pure_alcohol', 'continent'],
      dtype='object')

In [49]:
drinks.set_index('country', inplace=True)

In [50]:
drinks.head()

Unnamed: 0_level_0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa
Andorra,245,138,312,12.4,Europe
Angola,217,57,45,5.9,Africa


In [51]:
drinks.index

Index(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua & Barbuda', 'Argentina', 'Armenia', 'Australia', 'Austria',
       ...
       'Tanzania', 'USA', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Venezuela',
       'Vietnam', 'Yemen', 'Zambia', 'Zimbabwe'],
      dtype='object', name='country', length=193)

In [52]:
drinks.columns # Note country is now no longer part of dataframe

Index(['beer_servings', 'spirit_servings', 'wine_servings',
       'total_litres_of_pure_alcohol', 'continent'],
      dtype='object')

In [53]:
drinks.shape

(193, 5)

In [54]:
drinks.loc['Angola', 'beer_servings']

217

In [55]:
drinks.index.name

'country'

In [81]:
pd.read_csv("http://bit.ly/drinksbycountry", index_col='country').head() # set index while reading csv data

Unnamed: 0_level_0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa
Andorra,245,138,312,12.4,Europe
Angola,217,57,45,5.9,Africa


In [56]:
drinks.index.name = None
drinks.head()

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa
Andorra,245,138,312,12.4,Europe
Angola,217,57,45,5.9,Africa


In [57]:
# Reset index back to original state
# First set back the index name and then all reset_index with inplace=True

drinks.index.name = 'country'
drinks.reset_index(inplace=True)

In [58]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


## Index Alignment

In [59]:
drinks.continent.head() # It's a series

0      Asia
1    Europe
2    Africa
3    Europe
4    Africa
Name: continent, dtype: object

In [61]:
drinks.set_index('country', inplace=True)

In [62]:
drinks.head()

Unnamed: 0_level_0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa
Andorra,245,138,312,12.4,Europe
Angola,217,57,45,5.9,Africa


In [64]:
drinks.continent.head() # note the index of the series has now changed

country
Afghanistan      Asia
Albania        Europe
Algeria        Africa
Andorra        Europe
Angola         Africa
Name: continent, dtype: object

In [66]:
# Every series has an index and values
continent_val_count_series = drinks.continent.value_counts()

In [67]:
continent_val_count_series

Africa           53
Europe           45
Asia             44
North America    23
Oceania          16
South America    12
Name: continent, dtype: int64

In [68]:
continent_val_count_series.index

Index(['Africa', 'Europe', 'Asia', 'North America', 'Oceania',
       'South America'],
      dtype='object')

In [69]:
continent_val_count_series.values

array([53, 45, 44, 23, 16, 12], dtype=int64)

#### Sort Index vs Values

In [70]:
continent_val_count_series.sort_values()

South America    12
Oceania          16
North America    23
Asia             44
Europe           45
Africa           53
Name: continent, dtype: int64

In [71]:
continent_val_count_series.sort_index()

Africa           53
Asia             44
Europe           45
North America    23
Oceania          16
South America    12
Name: continent, dtype: int64

#### Joining series by index - alignment

In [74]:
population = pd.Series(data=[100000, 85000], index=['Angola', 'Algeria'], name='Population') # data represents values

In [75]:
population

Angola     100000
Algeria     85000
Name: Population, dtype: int64

In [76]:
drinks.beer_servings.head()

country
Afghanistan      0
Albania         89
Algeria         25
Andorra        245
Angola         217
Name: beer_servings, dtype: int64

In [77]:
drinks.beer_servings * population # avg_beer_servings * population would give us total beer_servings per country

Afghanistan                    NaN
Albania                        NaN
Algeria                  2125000.0
Andorra                        NaN
Angola                  21700000.0
Antigua & Barbuda              NaN
Argentina                      NaN
Armenia                        NaN
Australia                      NaN
Austria                        NaN
Azerbaijan                     NaN
Bahamas                        NaN
Bahrain                        NaN
Bangladesh                     NaN
Barbados                       NaN
Belarus                        NaN
Belgium                        NaN
Belize                         NaN
Benin                          NaN
Bhutan                         NaN
Bolivia                        NaN
Bosnia-Herzegovina             NaN
Botswana                       NaN
Brazil                         NaN
Brunei                         NaN
Bulgaria                       NaN
Burkina Faso                   NaN
Burundi                        NaN
Cabo Verde          

In [79]:
# If we want to append 'population' to drinks data again using alignment based on index
pd.concat([drinks, population], axis=1, sort=False).head() #sort param is to remove future warning

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent,Population
Afghanistan,0,0,0,0.0,Asia,
Albania,89,132,54,4.9,Europe,
Algeria,25,0,14,0.7,Africa,85000.0
Andorra,245,138,312,12.4,Europe,
Angola,217,57,45,5.9,Africa,100000.0
