# Ex 8.1: Part II (Outside Class)  

In [1]:
import pandas as pd
import plotly.express as px

### Read data file  
- **Important Notes**:  
  - Some fips codes have leading zeros (start with zero).  If you load that into a numerical variable it will throw out the zero.
  - *dtype={fips:str}* tells pandas to treat it as a string, so it won't throw out any leading zeros   

 

In [2]:
tables = pd.read_html('https://en.wikipedia.org/wiki/List_of_countries_by_alcohol_consumption_per_capita#World_Health_Organization_(WHO)_data')

print(len(tables))
tables

14


[         Country 1996[7] 2016[8]
 0    Afghanistan       –     0.2
 1        Albania    2.59     7.5
 2        Algeria    0.27     0.9
 3        Andorra       –    11.3
 4         Angola    1.58     6.4
 ..           ...     ...     ...
 186    Venezuela    9.41     5.6
 187      Vietnam    1.21     8.3
 188        Yemen    0.15     0.1
 189       Zambia    0.63     4.8
 190     Zimbabwe    2.78     4.8
 
 [191 rows x 3 columns],
             Country  Total  Recordedconsumption  Unrecordedconsumption  \
 0           Estonia   16.9                 15.8                    1.1   
 1         Lithuania   15.0                 13.8                    1.2   
 2    Czech Republic   14.3                 12.4                    1.4   
 3        Seychelles   13.8                 12.4                    1.4   
 4           Germany   13.4                 11.3                    1.4   
 ..              ...    ...                  ...                    ...   
 184      Bangladesh    0.0             

In [3]:
tables[0]

Unnamed: 0,Country,1996[7],2016[8]
0,Afghanistan,–,0.2
1,Albania,2.59,7.5
2,Algeria,0.27,0.9
3,Andorra,–,11.3
4,Angola,1.58,6.4
...,...,...,...
186,Venezuela,9.41,5.6
187,Vietnam,1.21,8.3
188,Yemen,0.15,0.1
189,Zambia,0.63,4.8


In [4]:
df = pd.DataFrame(tables[0])  

print(df.shape)
df.head()

(191, 3)


Unnamed: 0,Country,1996[7],2016[8]
0,Afghanistan,–,0.2
1,Albania,2.59,7.5
2,Algeria,0.27,0.9
3,Andorra,–,11.3
4,Angola,1.58,6.4


In [5]:
df.dtypes

Country    object
1996[7]    object
2016[8]    object
dtype: object

## Cleaning data

In [6]:
df.drop("1996[7]", axis='columns', inplace=True)

print(df.shape)
df.head()

(191, 2)


Unnamed: 0,Country,2016[8]
0,Afghanistan,0.2
1,Albania,7.5
2,Algeria,0.9
3,Andorra,11.3
4,Angola,6.4


In [7]:
cols_to_rename = {
                   'Country':'Country_Name', 
                   '2016[8]' : '2016 Alcohol Consumption'
                 }
df.rename(columns = cols_to_rename, inplace=True)

print(df.shape)
df.head()

(191, 2)


Unnamed: 0,Country_Name,2016 Alcohol Consumption
0,Afghanistan,0.2
1,Albania,7.5
2,Algeria,0.9
3,Andorra,11.3
4,Angola,6.4


In [8]:
df['2016 Alcohol Consumption'] = df['2016 Alcohol Consumption'].str.replace('–','0.0')

In [9]:
df['2016 Alcohol Consumption'] = pd.to_numeric(df['2016 Alcohol Consumption']).astype('float')

In [10]:
print(df.shape)
df.head()

(191, 2)


Unnamed: 0,Country_Name,2016 Alcohol Consumption
0,Afghanistan,0.2
1,Albania,7.5
2,Algeria,0.9
3,Andorra,11.3
4,Angola,6.4


In [11]:
df.dtypes

Country_Name                 object
2016 Alcohol Consumption    float64
dtype: object

# Q2.1: World Alcohol Consumption in 2016

In [12]:
fig = px.choropleth(df, 
                    scope="world",    
                    locationmode='country names',  
                    locations='Country_Name',  # Column containing State Abbeviations!                    
                    
                    color='2016 Alcohol Consumption',  # Column determining map color for each State
                    
                    hover_name='Country_Name', # Sets top label of Tooltip
                    color_continuous_scale='PuBu', 
                    title="Q2.1: World Alcohol Consumption in 2016"
                   )

# fig.update_layout(margin={"r":0,"t":50,"l":0,"b":0})

fig.show()

# Q2.2: Europe Alcohol Consumption in 2016

In [13]:
fig = px.choropleth(df, 
                    scope="europe",    
                    locationmode='country names',  
                    locations='Country_Name',  # Column containing State Abbeviations!                    
                    
                    color='2016 Alcohol Consumption',  # Column determining map color for each State
                    
                    hover_name='Country_Name', # Sets top label of Tooltip
                    color_continuous_scale='Purples', 
                    title="Q2.2: Europe Alcohol Consumption in 2016"
                   )

# fig.update_layout(margin={"r":0,"t":50,"l":0,"b":0})

fig.show()

# Q2.3: Top 15 Alcohol Consuming Countries (2016)

In [14]:
df.sort_values(by="2016 Alcohol Consumption", ascending=False, inplace=True)
df.head()

Unnamed: 0,Country_Name,2016 Alcohol Consumption
111,Moldova,15.2
98,Lithuania,15.0
45,Czech Republic,14.4
151,Seychelles,13.8
65,Germany,13.4


In [15]:
df_top = df.iloc[ : 15]
df_top

Unnamed: 0,Country_Name,2016 Alcohol Consumption
111,Moldova,15.2
98,Lithuania,15.0
45,Czech Republic,14.4
151,Seychelles,13.8
65,Germany,13.4
126,Nigeria,13.4
81,Ireland,13.0
99,Luxembourg,13.0
93,Latvia,12.9
25,Bulgaria,12.7


In [16]:
df_top.sort_values(by="2016 Alcohol Consumption", ascending=True, inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [17]:
fig = px.bar(df_top,              
             x='2016 Alcohol Consumption', 
             y='Country_Name',
             orientation='h',   
             template='plotly_dark',
             title='Top 15 Alcohol Consuming Countries (2016)')

fig.show()