Data Analysis

1. Ranking de consumo por tipo de bebida
Classificar os países com maior e menor consumo médio de cerveja, de vinho e de destilados por pessoa por ano.

2. Ranking geral de consumo de álcool
Classificar os países com maior e menor consumo médio total de álcool por pessoa.

3. Proporção entre os tipos de bebida por país
Analisar a proporção de cerveja, vinho e destilado em relação ao total consumido em cada país.

4. Países que mais concentram um único tipo de bebida
Identificar países onde o consumo de álcool é dominado majoritariamente por um só tipo (ex: só cerveja).

5. Correlação entre tipos de bebida
Verificar correlação entre o consumo de cerveja e vinho, por exemplo. Será que quem consome muito vinho consome menos cerveja?

6. Proporção do tipo de bebida mais consumido no mundo
Em média mundial, qual tipo de bebida representa a maior parte do álcool consumido?

Dependencies

In [663]:
import numpy as np
import pandas as pd

Dataset loading

In [664]:
datasetPath = '../data/drinks.csv'
df = pd.read_csv(datasetPath)

Exploratory data analysis

In [665]:
df.shape

(193, 5)

In [666]:
df.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
0,Afghanistan,0,0,0,0.0
1,Albania,89,132,54,4.9
2,Algeria,25,0,14,0.7
3,Andorra,245,138,312,12.4
4,Angola,217,57,45,5.9


In [667]:
df.describe()

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
count,193.0,193.0,193.0,193.0
mean,106.160622,80.994819,49.450777,4.717098
std,101.143103,88.284312,79.697598,3.773298
min,0.0,0.0,0.0,0.0
25%,20.0,4.0,1.0,1.3
50%,76.0,56.0,8.0,4.2
75%,188.0,128.0,59.0,7.2
max,376.0,438.0,370.0,14.4


In [668]:
df.dtypes

country                          object
beer_servings                     int64
spirit_servings                   int64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
dtype: object

In [669]:
df_numeric = df.select_dtypes(include=['int64', 'float64'])
corr_matrix = df_numeric.corr()
display(corr_matrix)

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
beer_servings,1.0,0.458819,0.527172,0.835839
spirit_servings,0.458819,1.0,0.194797,0.654968
wine_servings,0.527172,0.194797,1.0,0.667598
total_litres_of_pure_alcohol,0.835839,0.654968,0.667598,1.0


In [670]:
df.isna().sum()

country                         0
beer_servings                   0
spirit_servings                 0
wine_servings                   0
total_litres_of_pure_alcohol    0
dtype: int64

In [671]:
df[df['country'].isin(['Brazil', 'USA'])]

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
23,Brazil,245,145,16,7.2
184,USA,249,158,84,8.7


## 1. Ranking de consumo por tipo de bebida.
Classificação dos países com maior e menor consumo médio de cerveja, de vinho e de destilados por pessoa por ano.

In [672]:
# Aplica uma ordenação na coluna "beer_servings", de forma decrescente.
df_sorted_by_beer_servings_desc = df.query('beer_servings > 0').sort_values(by='beer_servings', ascending=False)

### 1.1. Consumo de cerveja.

In [673]:
# 5 países que mais consomem cerveja.
df_top_5_beer_servings = df_sorted_by_beer_servings_desc.head(5)
display(df_top_5_beer_servings)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
117,Namibia,376,3,1,6.8
45,Czech Republic,361,170,134,11.8
62,Gabon,347,98,59,8.9
65,Germany,346,117,175,11.3
98,Lithuania,343,244,56,12.9


In [674]:
# 5 países que menos consomem cerveja.
df_bottom_5_beer_servings = df_sorted_by_beer_servings_desc.tail(5)
display(df_bottom_5_beer_servings)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
168,Tajikistan,2,15,0,0.3
38,Comoros,1,3,1,0.1
73,Haiti,1,326,1,5.9
137,Qatar,1,42,7,0.9
171,Timor-Leste,1,1,4,0.1


### 1.2. Consumo de destilados.

In [675]:
# Aplica uma ordenação na coluna "spirit_servings", de forma decrescente.
df_sorted_by_spirit_servings_desc = df.query('spirit_servings > 0').sort_values(by='spirit_servings', ascending=False)

In [676]:
# 5 países que mais consomem destilados.
df_top_5_spirit_servings = df_sorted_by_spirit_servings_desc.head(5)
display(df_top_5_spirit_servings)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
68,Grenada,199,438,28,11.9
15,Belarus,142,373,42,14.4
73,Haiti,1,326,1,5.9
141,Russian Federation,247,326,73,11.5
144,St. Lucia,171,315,71,10.1


In [677]:
# 5 países que menos consomem destilados.
df_bottom_5_spirit_servings = df_sorted_by_spirit_servings_desc.tail(5)
display(df_bottom_5_spirit_servings)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
78,Indonesia,5,1,0,0.1
116,Myanmar,5,1,0,0.1
104,Mali,5,1,1,0.6
150,Senegal,9,1,7,0.3
171,Timor-Leste,1,1,4,0.1


### 1.3. Consumo de vinho.

In [678]:
# Aplica uma ordenação na coluna "wine_servings", de forma decrescente.
df_sorted_by_wine_servings_desc = df.query('wine_servings > 0').sort_values(by='wine_servings', ascending=False)

In [679]:
# 5 países que mais consomem vinho.
df_top_5_wine_servings = df_sorted_by_wine_servings_desc.head(5)
display(df_top_5_wine_servings)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
61,France,127,151,370,11.8
136,Portugal,194,67,339,11.0
3,Andorra,245,138,312,12.4
166,Switzerland,185,100,280,10.2
48,Denmark,224,81,278,10.4


In [680]:
# 5 países que menos consomem vinho.
df_bottom_5_wine_servings = df_sorted_by_wine_servings_desc.tail(5)
display(df_bottom_5_wine_servings)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
122,Nicaragua,78,118,1,3.5
169,Thailand,99,258,1,6.4
157,Solomon Islands,56,11,1,1.2
183,Tanzania,36,6,1,5.7
189,Vietnam,111,2,1,2.0


## 2. Ranking geral de consumo de álcool.
Classificação dos países com maior e menor consumo médio total de álcool por pessoa.

In [681]:
# Aplica uma ordenação na coluna "total_litres_of_pure_alcohol", de forma decrescente.
df_sorted_by_total_litres_of_pure_alcohol_desc = df.query('total_litres_of_pure_alcohol > 0').sort_values(by='total_litres_of_pure_alcohol', ascending=False)

In [682]:
# 5 países que mais consomem álcool.
df_top_5_total_litres_of_pure_alcohol = df_sorted_by_total_litres_of_pure_alcohol_desc.head(5)
display(df_top_5_total_litres_of_pure_alcohol)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
15,Belarus,142,373,42,14.4
98,Lithuania,343,244,56,12.9
3,Andorra,245,138,312,12.4
68,Grenada,199,438,28,11.9
45,Czech Republic,361,170,134,11.8


In [683]:
# 5 países que menos consomem álcool.
df_bottom_5_total_litres_of_pure_alcohol = df_sorted_by_total_litres_of_pure_alcohol_desc.tail(5)
display(df_bottom_5_total_litres_of_pure_alcohol)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
116,Myanmar,5,1,0,0.1
123,Niger,3,2,1,0.1
149,Saudi Arabia,0,5,0,0.1
171,Timor-Leste,1,1,4,0.1
190,Yemen,6,0,0,0.1


## 3. Proporção entre os tipos de bebida por país.
Analisar a proporção de cerveja, vinho e destilado em relação ao total consumido em cada país.

In [684]:
# Calcular o total de porções por país (soma das três bebidas).
df_portions_ratio = df.copy()
df_portions_ratio['total_servings'] = df_portions_ratio['beer_servings'] + df_portions_ratio['spirit_servings'] + df_portions_ratio['wine_servings']

# Evitar divisão por zero.
df_portions_ratio_nonzero = df_portions_ratio[df_portions_ratio['total_servings'] > 0].copy()

# Calcular proporção de cada tipo de bebida.
df_portions_ratio_nonzero['beer_ratio'] = df_portions_ratio_nonzero['beer_servings'] / df_portions_ratio_nonzero['total_servings']
df_portions_ratio_nonzero['spirit_ratio'] = df_portions_ratio_nonzero['spirit_servings'] / df_portions_ratio_nonzero['total_servings']
df_portions_ratio_nonzero['wine_ratio'] = df_portions_ratio_nonzero['wine_servings'] / df_portions_ratio_nonzero['total_servings']

# Mostrar os dados com as proporções.
display(df_portions_ratio_nonzero)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,total_servings,beer_ratio,spirit_ratio,wine_ratio
1,Albania,89,132,54,4.9,275,0.323636,0.480000,0.196364
2,Algeria,25,0,14,0.7,39,0.641026,0.000000,0.358974
3,Andorra,245,138,312,12.4,695,0.352518,0.198561,0.448921
4,Angola,217,57,45,5.9,319,0.680251,0.178683,0.141066
5,Antigua & Barbuda,102,128,45,4.9,275,0.370909,0.465455,0.163636
...,...,...,...,...,...,...,...,...,...
188,Venezuela,333,100,3,7.7,436,0.763761,0.229358,0.006881
189,Vietnam,111,2,1,2.0,114,0.973684,0.017544,0.008772
190,Yemen,6,0,0,0.1,6,1.000000,0.000000,0.000000
191,Zambia,32,19,4,2.5,55,0.581818,0.345455,0.072727


### 3.1 Consumo de cerveja.

In [685]:
df_portions_ratio_nonzero_sorted_by_beer_ratio_desc = df_portions_ratio_nonzero.query('beer_ratio > 0').sort_values(by='beer_ratio', ascending=False)

In [686]:
df_portions_ratio_nonzero_top_5_beer_ratio = df_portions_ratio_nonzero_sorted_by_beer_ratio_desc.head(5)
display(df_portions_ratio_nonzero_top_5_beer_ratio)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,total_servings,beer_ratio,spirit_ratio,wine_ratio
19,Bhutan,23,0,0,0.4,23,1.0,0.0,0.0
27,Burundi,88,0,0,6.3,88,1.0,0.0,0.0
190,Yemen,6,0,0,0.1,6,1.0,0.0,0.0
56,Eritrea,18,0,0,0.5,18,1.0,0.0,0.0
117,Namibia,376,3,1,6.8,380,0.989474,0.007895,0.002632


In [687]:
df_portions_ration_nonzero_bottom_5_beer_ratio = df_portions_ratio_nonzero_sorted_by_beer_ratio_desc.tail(5)
display(df_portions_ration_nonzero_bottom_5_beer_ratio)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,total_servings,beer_ratio,spirit_ratio,wine_ratio
7,Armenia,21,179,11,3.8,211,0.099526,0.848341,0.052133
167,Syria,5,35,16,1.0,56,0.089286,0.625,0.285714
77,India,9,114,0,2.2,123,0.073171,0.926829,0.0
137,Qatar,1,42,7,0.9,50,0.02,0.84,0.14
73,Haiti,1,326,1,5.9,328,0.003049,0.993902,0.003049


### 3.1 Consumo de destilados.

In [688]:
df_portions_ratio_nonzero_sorted_by_spirit_ratio_desc = df_portions_ratio_nonzero.query('spirit_ratio > 0').sort_values(by='spirit_ratio', ascending=False)

In [689]:
df_portions_ratio_nonzero_top_5_spirit_ratio = df_portions_ratio_nonzero_sorted_by_spirit_ratio_desc.head(5)
display(df_portions_ratio_nonzero_top_5_spirit_ratio)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,total_servings,beer_ratio,spirit_ratio,wine_ratio
149,Saudi Arabia,0,5,0,0.1,5,0.0,1.0,0.0
73,Haiti,1,326,1,5.9,328,0.003049,0.993902,0.003049
77,India,9,114,0,2.2,123,0.073171,0.926829,0.0
168,Tajikistan,2,15,0,0.3,17,0.117647,0.882353,0.0
96,Liberia,19,152,2,3.1,173,0.109827,0.878613,0.011561


In [690]:
df_portions_ration_nonzero_bottom_5_spirit_ratio = df_portions_ratio_nonzero_sorted_by_spirit_ratio_desc.tail(5)
display(df_portions_ration_nonzero_bottom_5_spirit_ratio)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,total_servings,beer_ratio,spirit_ratio,wine_ratio
164,Swaziland,90,2,2,4.7,94,0.957447,0.021277,0.021277
189,Vietnam,111,2,1,2.0,114,0.973684,0.017544,0.008772
39,Congo,76,1,9,1.7,86,0.883721,0.011628,0.104651
117,Namibia,376,3,1,6.8,380,0.989474,0.007895,0.002632
31,Cameroon,147,1,4,5.8,152,0.967105,0.006579,0.026316


### 3.1 Consumo de vinho.

In [691]:
df_portions_ratio_nonzero_sorted_by_wine_ratio_desc = df_portions_ratio_nonzero.query('wine_ratio > 0').sort_values(by='wine_ratio', ascending=False)

In [692]:
df_portions_ratio_nonzero_top_5_wine_ratio = df_portions_ratio_nonzero_sorted_by_wine_ratio_desc.head(5)
display(df_portions_ratio_nonzero_top_5_wine_ratio)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,total_servings,beer_ratio,spirit_ratio,wine_ratio
55,Equatorial Guinea,92,0,233,5.8,325,0.283077,0.0,0.716923
171,Timor-Leste,1,1,4,0.1,6,0.166667,0.166667,0.666667
92,Laos,62,0,123,6.2,185,0.335135,0.0,0.664865
83,Italy,85,42,237,6.5,364,0.233516,0.115385,0.651099
148,Sao Tome & Principe,56,38,140,4.2,234,0.239316,0.162393,0.598291


In [693]:
df_portions_ration_nonzero_bottom_5_wine_ratio = df_portions_ratio_nonzero_sorted_by_wine_ratio_desc.tail(5)
display(df_portions_ration_nonzero_bottom_5_wine_ratio)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,total_servings,beer_ratio,spirit_ratio,wine_ratio
134,Philippines,71,186,1,4.6,258,0.275194,0.72093,0.003876
73,Haiti,1,326,1,5.9,328,0.003049,0.993902,0.003049
169,Thailand,99,258,1,6.4,358,0.276536,0.72067,0.002793
117,Namibia,376,3,1,6.8,380,0.989474,0.007895,0.002632
72,Guyana,93,302,1,7.1,396,0.234848,0.762626,0.002525
