In [None]:
# packages

# standard
import numpy as np
import pandas as pd
import time

# plots
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

In [None]:
# load data / preview
df = pd.read_csv('../input/alcohol-consumption/gapminder_alcohol.csv')
df.head()

In [None]:
# remove rows with missing alcohol consumption
df = df[df.alcconsumption.notna()]

In [None]:
# basic stats
df.describe()

In [None]:
# top 10 countries - alcohol consumption
df.nlargest(10, 'alcconsumption')

In [None]:
# features
features = ['alcconsumption', 'incomeperperson', 'suicideper100th',
            'employrate', 'urbanrate']

In [None]:
# scatter plots
sns.pairplot(df[features], 
             kind='reg', 
             plot_kws={'line_kws':{'color':'magenta'}, 'scatter_kws': {'alpha': 0.25}})
plt.show()


In [None]:
# correlations
corr_pearson = df[features].corr(method='pearson')
corr_spearman = df[features].corr(method='spearman')

plt.figure(figsize=(16,5))
ax1 = plt.subplot(1,2,1)
sns.heatmap(corr_pearson, annot=True, cmap='RdYlGn', vmin=-1, vmax=+1)
plt.title('Pearson Correlation')

ax2 = plt.subplot(1,2,2, sharex=ax1)
sns.heatmap(corr_spearman, annot=True, cmap='RdYlGn', vmin=-1, vmax=+1)
plt.title('Spearman Correlation')
plt.show()

In [None]:
# interactive plot - suicide rate vs alcohol consumption 
fig = px.scatter(df, x='alcconsumption', y='suicideper100th',
                 size='alcconsumption',
                 hover_data=['country'],
                 opacity=0.5)
fig.update_layout(title='Suicide rate vs Alcohol consumption ',
                  xaxis_title='Alcohol consumption',  
                  yaxis_title='Suicide rate (per 100000)')
fig.show()