In [97]:
import pandas as pd

In [98]:
data = pd.read_csv("../../datasets/drinks.csv")

In [99]:
data.head(5)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,AS
1,Albania,89,132,54,4.9,EU
2,Algeria,25,0,14,0.7,AF
3,Andorra,245,138,312,12.4,EU
4,Angola,217,57,45,5.9,AF


## usefull properties and methods

In [100]:
data.index

RangeIndex(start=0, stop=193, step=1)

In [101]:
data.columns

Index(['country', 'beer_servings', 'spirit_servings', 'wine_servings',
       'total_litres_of_pure_alcohol', 'continent'],
      dtype='object')

In [102]:
data.shape

(193, 6)

### get info and stats on the DataFrame

In [103]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193 entries, 0 to 192
Data columns (total 6 columns):
country                         193 non-null object
beer_servings                   193 non-null int64
spirit_servings                 193 non-null int64
wine_servings                   193 non-null int64
total_litres_of_pure_alcohol    193 non-null float64
continent                       170 non-null object
dtypes: float64(1), int64(3), object(2)
memory usage: 9.1+ KB


In [104]:
data.describe()

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
count,193.0,193.0,193.0,193.0
mean,106.160622,80.994819,49.450777,4.717098
std,101.143103,88.284312,79.697598,3.773298
min,0.0,0.0,0.0,0.0
25%,20.0,4.0,1.0,1.3
50%,76.0,56.0,8.0,4.2
75%,188.0,128.0,59.0,7.2
max,376.0,438.0,370.0,14.4


### get column(s)

In [105]:
# dot syntax is ok, if we need only 1 column
data.continent.head(3)

0    AS
1    EU
2    AF
Name: continent, dtype: object

In [106]:
# square brackets syntax is more flexible (check below)
data["continent"].head(3)

0    AS
1    EU
2    AF
Name: continent, dtype: object

In [107]:
data[["continent", "country"]].head(3)

Unnamed: 0,continent,country
0,AS,Afghanistan
1,EU,Albania
2,AF,Algeria


### get rows

In [108]:
# by specifying integer location:
data.iloc[0]

country                         Afghanistan
beer_servings                             0
spirit_servings                           0
wine_servings                             0
total_litres_of_pure_alcohol              0
continent                                AS
Name: 0, dtype: object

In [109]:
data.iloc[0:5]

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,AS
1,Albania,89,132,54,4.9,EU
2,Algeria,25,0,14,0.7,AF
3,Andorra,245,138,312,12.4,EU
4,Angola,217,57,45,5.9,AF


### get values

In [110]:
data.wine_servings.max()

370

In [111]:
data[data.wine_servings == data.wine_servings.max()]

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
61,France,127,151,370,11.8,EU


## set_index()

In [112]:
new_data = data.set_index("country")

In [113]:
data.head(3)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,AS
1,Albania,89,132,54,4.9,EU
2,Algeria,25,0,14,0.7,AF


In [114]:
new_data.head(3)

Unnamed: 0_level_0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,0,0,0,0.0,AS
Albania,89,132,54,4.9,EU
Algeria,25,0,14,0.7,AF


In [115]:
# remove the row index name
new_data.index.name = ""

### reset index (and return column back)

In [116]:
# we use loc[] to select by name location
new_data.loc['Albania']

beer_servings                    89
spirit_servings                 132
wine_servings                    54
total_litres_of_pure_alcohol    4.9
continent                        EU
Name: Albania, dtype: object

In [117]:
# the row above was returned as Series object, but
# the slice is returned as DataFrame object
new_data.loc["Algeria":"Bahamas"]

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
,,,,,
Algeria,25.0,0.0,14.0,0.7,AF
Andorra,245.0,138.0,312.0,12.4,EU
Angola,217.0,57.0,45.0,5.9,AF
Antigua & Barbuda,102.0,128.0,45.0,4.9,
Argentina,193.0,25.0,221.0,8.3,SA
Armenia,21.0,179.0,11.0,3.8,EU
Australia,261.0,72.0,212.0,10.4,OC
Austria,279.0,75.0,191.0,9.7,EU
Azerbaijan,21.0,46.0,5.0,1.3,EU


### reset the index (and return the column back)

In [118]:
new_data.index.name = "Country"

In [119]:
new_data.reset_index(inplace=True)
new_data.head(3)

Unnamed: 0,Country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,AS
1,Albania,89,132,54,4.9,EU
2,Algeria,25,0,14,0.7,AF
