## PM2.5 Air Pollution, Mean Annual Exposure (Micrograms Per Cubic Meter)
Population-weighted exposure to ambient PM2.5 pollution is defined as the average level of exposure of a nation's population to concentrations of suspended particles measuring less than 2.5 microns in aerodynamic diameter, which are capable of penetrating deep into the respiratory tract and causing severe health damage. Exposure is calculated by weighting mean annual concentrations of PM2.5 by population in both urban and rural areas.

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.graph_objects as go
from plotly.subplots import make_subplots
df = pd.read_csv('/kaggle/input/pm25-global-air-pollution-20102017/PM2.5 Global Air Pollution 2010-2017.csv')
del df['Country Code']

In [None]:
df.head()

# EDA
- Transition in amount
- Top 20 Countries

## Top 20

In [None]:
ttl = df.melt(id_vars=('Country Name')).groupby('Country Name').sum().sort_values(by='value', ascending=False)
top20_countries = ttl[:20].index

is_top20 = [x in top20_countries for x in df['Country Name']]
top20 = df[is_top20]

colors = {x: 'rgba(0, 0, 0, {})'.format((x - 2009) / 10) for x in np.arange(2010, 2018)}

data = [
    go.Bar(
        name=str(year),
        marker_color= colors[year],
        marker={"line":{"width":0}},
        x=top20['Country Name'], 
        y=top20[str(year)]
    ) for year in np.arange(2010, 2018)
]

fig = go.Figure(data=data)

fig.update_layout(
    barmode='stack',
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    title='Top 20'
)
fig.show()

In [None]:
data = []
for country in top20['Country Name']:
    row = top20[top20['Country Name'] == country]
    x = np.arange(2010, 2018)
    yVal = row[[str(x) for x in np.arange(2010, 2018)]]
    y = [float(row[x]) for x in yVal]
    data.append(go.Scatter(
        x=x, y=y,
        name=country,
        mode='lines',
        line={"width":5}
    ))
fig = go.Figure(data=data)
fig.update_layout(
    #xaxis_showgrid=False,
    yaxis_showgrid=False,
    height=600,
    plot_bgcolor='rgba(0,0,0,1)',
    title="Top 20 Yearly Change"
)
fig.update_xaxes(
    gridcolor='rgba(255,255,255,0.4)',
    gridwidth=1
)
fig.show()

## Check increment over years

In [None]:
def offset(row):
    year_clms = [[str(x) for x in np.arange(2010, 2018)]]
    offset = row['2010']
    for x in year_clms:
        row[x] = row[x] - offset
    return row
    
off_df = df.apply(offset, axis=1)
top10_off = off_df.sort_values(by='2017', ascending=False)[:10]
top10_off

In [None]:
data = []
countries = top10_off['Country Name']
for country in countries:
    row = top10_off[top10_off['Country Name'] == country]
    x = np.arange(2010, 2018)
    yVal = row[[str(x) for x in np.arange(2010, 2018)]]
    y = [float(row[x]) for x in yVal]
    data.append(go.Scatter(
        x=x,
        y=y,
        name=country,
        mode="lines",
        line={
            "width": 5
        },
    ))
fig = go.Figure(data=data)
fig.update_layout(
    yaxis_showgrid=False,
    plot_bgcolor='black',
    title='Increment Analysis'
)
fig.update_xaxes(
    gridcolor="rgba(255,255,255,0.3)",
    gridwidth=3
)
fig.show()
