In [13]:
import numpy as np
import pandas as pd
import plotly.express as px
import pycountry
from sklearn.datasets import make_moons
from dash import Dash, html, dcc
import plotly.graph_objects as go
from dash.dependencies import Input,Output
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

In [14]:
app = Dash()
server = app.server

# Preprocessing

In [15]:
df = pd.read_csv("data/final.csv", delimiter=",", on_bad_lines="skip")
print(df.shape)
df = df.dropna()
print(df.shape)
df.head()

(5830, 15)
(2920, 15)


Unnamed: 0,Country Code,Country Name,Region,IncomeGroup,Year,Imports of goods and services (% of GDP),Exports of goods and services (% of GDP),"Foreign direct investment, net inflows (BoP, current US$)",GDP (current US$),GDP per capita (current US$),Gross savings (% of GDP),"Inflation, consumer prices (annual %)","Total reserves (includes gold, current US$)","Unemployment, total (% of total labor force) (modeled ILO estimate)",GDP growth (annual %)
88,AGO,Angola,Sub-Saharan Africa,Lower middle income,2000,62.861278,89.685831,878620000.0,9129595000.0,556.884244,39.207877,324.996872,1198212000.0,4.163,3.054624
89,AGO,Angola,Sub-Saharan Africa,Lower middle income,2001,74.946851,75.388944,2145470000.0,8936064000.0,527.463202,14.480222,152.561023,731866200.0,4.119,4.205999
90,AGO,Angola,Sub-Saharan Africa,Lower middle income,2002,48.216596,57.084996,1743504000.0,15285590000.0,872.657772,28.876816,108.897436,375546000.0,4.043,13.665689
91,AGO,Angola,Sub-Saharan Africa,Lower middle income,2003,49.579891,54.321339,3576972000.0,17812700000.0,982.805601,26.055461,98.224144,634199400.0,4.043,2.99
92,AGO,Angola,Sub-Saharan Africa,Lower middle income,2004,45.199589,58.380358,2197228000.0,23552050000.0,1254.695563,33.557122,43.542107,1379588000.0,3.972,10.95


In [16]:
df_float = df.select_dtypes(include=['float'])

# Several funcitons

In [17]:
country_list = []
for country in sorted(set(df["Country Name"].values)):
    country_list.append({
        "label": country,
        "value": country
    })

feature_list = []
for col in df_float.columns:
    feature_list.append({
        "label": col,
        "value": col
    })

min_year = df['Year'].min()
max_year = df['Year'].max()
year_list = []
for year in range(min_year, max_year+1):
    year_list.append({
        "label": year,
        "value": year
    })

region_list = []
for value in df["Region"].value_counts().keys():
    region_list.append({
        "label": value,
        "value": value
    })

incomegroup_list = ["All"]
incomegroup_list += list(df["IncomeGroup"].value_counts().keys())

x_list = []
for col in df_float.columns:
    x_list.append({
        "label": col,
        "value": col
    })
y_list = x_list

# HTML Layout renderer

In [18]:
app.layout = html.Div(
    [
        html.H1("Countries Economic in Glance", style={"textAlign":"center"}),
        html.Div(
            [
                html.Div("Country: ", style={"textAlign":"center"}),
                dcc.Dropdown(
                    id="country_dropdown",
                    multi=True,
                    style={"display": "block", "margin-left": "auto",
                            "margin-right": "auto", "width": "300px"},
                    options= country_list,
                    value=['Vietnam'] # Value as default
                ),
                html.Br(),
                html.Div("Feature to compare: ", style={"textAlign":"center"}),
                dcc.Dropdown(
                    id="feature_dropdown",
                    style={"display": "block", "margin-left": "auto",
                            "margin-right": "auto", "width": "300px"},
                    options= feature_list,
                    value= feature_list[0]['value']
                ),
                html.Br(),
                html.Div(
                    [
                        html.Div(
                            [
                                html.Span('From Year:'),
                                dcc.Dropdown(id='from_year',
                                             style={"display": "block", "margin-left": "auto",
                                                    "margin-right": "auto", "width": "150px"},
                                            value=min_year,
                                            options=year_list)
                            ], style={"textAlign": "center", "width": "150px"}
                        ),
                        html.Div(
                            [
                                html.Span('To Year:'),
                                dcc.Dropdown(id='to_year',
                                             style={"display": "block", "margin-left": "auto",
                                                    "margin-right": "auto", "width": "150px"},
                                            value=max_year,
                                            options=year_list)
                            ], style={"textAlign": "center", "width": "150px"}
                        )
                    ], style={"position": "relative", "display": "flex", "flex-wrap": "wrap", "justify-content": "center", "align-items": "center", "width": "100%"}
                ),
                html.Div(
                    [
                        dcc.Graph(id="bar_chart", style={"width":"500px", "height":"80%"}),
                        dcc.Graph(id="pie_chart"),
                    ],
                    style={"display": "flex", "flex-wrap": "wrap", "justify-content": "center", "align-items": "center"}
                ),
                html.Div(
                    [
                        dcc.Graph(id="line_chart", style={"width":"650px"}),
                        dcc.Graph(id="linear_chart", style={"width": "650px"})
                    ],
                    style={"position": "relative", "width": "100%", "display": "flex", "flex-wrap": "wrap", "justify-content": "center", "align-items": "center"}
                ),
            ]
        ), 
        html.Br(),
        html.Div([
            html.H1("Regions Economic in Glance", style={"textAlign":"center"}),
            html.Div("Region: ", style={"textAlign":"center"}),
            dcc.Dropdown(
                id="region_dropdown",
                style={"display": "block", "margin-left": "auto",
                        "margin-right": "auto", "width": "300px"},
                options= region_list,
                value='North America'
            ),
            html.Br(),
            html.Div("Feature to view: ", style={"textAlign":"center"}),
            dcc.Dropdown(
                id="geo_feature_dropdown",
                style={"display": "block", "margin-left": "auto",
                        "margin-right": "auto", "width": "300px"},
                options=feature_list,
                value=feature_list[0]['value']
            ),
            html.Br(),
            html.Div("Group of Income: ", style={"textAlign":"center"}),
            dcc.RadioItems(
                id='incomegroup_radio', 
                options=incomegroup_list,
                value="All",
                inline=True,
                style={"textAlign":"center"}
            ),
            dcc.Graph(id="geo_graph", style={"width": "800px", "height":"600px"}),
        ]),
        html.Br(),
        html.Div(
            [
                html.H1("Statistic and Machine Learning View", style={"textAlign":"center"}),
                html.Div(
                    [
                        html.Div(
                            [
                                html.Span('X value:'),
                                dcc.Dropdown(id='x_dropdown',
                                            style={"display": "block", "margin-left": "auto",
                                                "margin-right": "auto", "width": "250px"},
                                            value=x_list[0]['value'],
                                            options=x_list)
                            ], style={"textAlign": "center", "width": "250px"}
                        ),
                        html.Div(
                            [
                                html.Span('Y value:'),
                                dcc.Dropdown(id='y_dropdown',
                                            style={"display": "block", "margin-left": "auto",
                                                "margin-right": "auto", "width": "250px"},
                                            value=y_list[1]['value'],
                                            options=y_list)
                            ], style={"textAlign": "center", "width": "250px"}
                        ),
                        html.Div(
                            [
                                html.Span('Label:'),
                                dcc.Dropdown(id='label_dropdown',
                                                style={"display": "block", "margin-left": "auto",
                                                    "margin-right": "auto", "width": "150px"},
                                            value="IncomeGroup",
                                            options=[{"label": "IncomeGroup", "value": "IncomeGroup"}])
                            ], style={"textAlign": "center", "width": "150px"}
                        )
                    ], style={"position": "relative", "display": "flex", "flex-wrap": "wrap", "justify-content": "center", "align-items": "center", "width": "100%"}
                ),
                dcc.Graph(id="knn_graph"),
                html.P("Select number of neighbors:"),
                dcc.Slider(
                    id='slider-neighbors',
                    min=5, max=20, step=1, value=12,
                    marks={i: str(i) for i in range(5,21,5)})
            ]
        )
    ],
    style={"position":"relative", "width": "100%", "display": "flex", "flex-direction": "column", "justify-content": "center", "align-items": "center", "overflow":"hidden"}
)

# Interaction component A with component B

In [19]:
@app.callback(
    Output('to_year', 'options'),
    Output('to_year', 'value'),
    Input('from_year', 'value')
)

def year_change(from_year):
    new_list_year = [year for year in range(min_year, max_year+1) if year>from_year]
    return new_list_year, new_list_year[-1]


@app.callback(
    Output('incomegroup_radio', 'options'),
    Output('incomegroup_radio', 'value'),
    Input('region_dropdown', 'value')
)

def region_vs_groupincome(region_dropdown):
    new_list_option = df.loc[df["Region"]==region_dropdown]["IncomeGroup"].value_counts().keys()
    return ["All"]+list(new_list_option), "All"

# Interation for Countries Economic

In [20]:
# Callback
@app.callback(
    Output('bar_chart', 'figure'), 
    Output('line_chart', 'figure'), 
    Output('pie_chart', 'figure'),
    Output('linear_chart', 'figure'), # output that need to change when callback
    Input('country_dropdown','value'), 
    Input('feature_dropdown','value'), 
    Input('from_year','value'), 
    Input('to_year','value') # input that would affect to the change of callback
)

# the order of parameters must match with the input order
def update_charts(country_dropdown, feature_dropdown, from_year, to_year):
    if len(country_dropdown) > 0:
        f_df = df[df['Country Name'].isin(country_dropdown)]
    else:
        f_df = df[df['Country Name']=='Vietnam']

    # Plot bar chart
    bar_data = {
        'Country Name': [name for name in country_dropdown],
        feature_dropdown: [f_df.loc[f_df['Country Name']==name, feature_dropdown].max() for name in country_dropdown]
    }
    fig_bar = px.bar(bar_data, 
                    x='Country Name', 
                    y=feature_dropdown, 
                    title=f'Maximum of {feature_dropdown}')
    
    # Plot line chart
    unique_years = [year for year in f_df["Year"].unique() if (int(year)>=int(from_year) and int(year)<=int(to_year))]
    line_data = {'Years': unique_years}
    for country in country_dropdown:
        line_data[country] = [df.loc[((df["Year"]==year) & (df["Country Name"]==country)), feature_dropdown].max() for year in unique_years]
    newdf = pd.DataFrame(line_data)
    fig_line = px.line(newdf, 
                       x='Years', 
                       y=country_dropdown,
                       color_discrete_sequence=px.colors.qualitative.Safe,
                       title=f'{feature_dropdown} between {from_year} and {to_year}')
    fig_line.update_traces(line=dict(width=5))
    fig_line.update_xaxes(title_text='Years')
    fig_line.update_yaxes(title_text=feature_dropdown)

    # Plot pie chart
    g_df = f_df[["Country Name", feature_dropdown]].groupby("Country Name").mean().reset_index("Country Name")
    fig_pie = px.pie(g_df, 
                    values=feature_dropdown, 
                    names='Country Name', 
                    title=f"Total portion of countries in {feature_dropdown}")
    

    # Plot linear regression chart
    fig_lr = go.Figure()
    color_list = px.colors.qualitative.Safe[:len(country_dropdown)]
    traces = []
    for country, color in zip(country_dropdown, color_list):
        lr_line_data={
            'Years':  unique_years,
            'Country': [df.loc[((df["Year"]==year) & (df["Country Name"]==country)), feature_dropdown].max() for year in unique_years]
        }
        tmp_df = pd.DataFrame(lr_line_data).dropna()
        x = np.array(tmp_df['Years']).astype(np.int64)
        y = np.array(tmp_df['Country']).astype(np.float32)
        scat_trace = go.Scatter(x=x, 
                                y=y, 
                                mode='markers', 
                                name=country,
                                showlegend=True,
                                marker=dict(color=color,size=10))
        
        lr = LinearRegression()
        lr.fit(x.reshape(-1, 1), y)
        x_line = np.linspace(x.min(), x.max(), 100).reshape(-1, 1)
        y_line = lr.predict(x_line)
        line_trace = go.Scatter(x=x_line.reshape(-1),
                                y=y_line,
                                mode='lines',
                                name=f'{country} linear',
                                showlegend=True,
                                line=dict(color=color, width=2))
        traces.append(scat_trace)
        traces.append(line_trace)

    layout = go.Layout(
        title=f'Linear Regression on {feature_dropdown}',
        xaxis=dict(title='Years'),
        yaxis=dict(title=feature_dropdown)
    )
    fig_lr = go.Figure(data=traces, layout=layout)
    return fig_bar, fig_line, fig_pie, fig_lr

# Interaction for Region Economic

In [21]:
@app.callback(
    Output("geo_graph", "figure"), 
    Input("region_dropdown", "value"),
    Input("geo_feature_dropdown", "value"),
    Input("incomegroup_radio", "value")
)

def display_choropleth(region_dropdown, geo_feature_dropdown, incomegroup_radio):
    if (incomegroup_radio!='All'):
        f_df = df.loc[((df['Region']==region_dropdown) & (df['IncomeGroup']==incomegroup_radio))]
    else: f_df = df.loc[df['Region']==region_dropdown]
    f_df = f_df[['Country Code', geo_feature_dropdown]].groupby('Country Code').mean().reset_index('Country Code')
    country_names = [pycountry.countries.get(alpha_3=code).name for code in f_df['Country Code']]

    choropleth_trace = go.Choropleth(
        locations=country_names,
        z=f_df[geo_feature_dropdown],
        locationmode='country names',
        colorscale='Viridis',
        colorbar_title='Unit(s)'
    )

    # Create the layout
    layout = go.Layout(
        title=f'The average distribution of {geo_feature_dropdown} in {region_dropdown}'
    )

    # Create the figure
    figure = go.Figure(data=[choropleth_trace], layout=layout)

    return figure

In [22]:
labels = list(df["IncomeGroup"].value_counts().keys())
encode = dict(zip(labels, range(len(labels))))
encode

{'High income': 0,
 'Lower middle income': 1,
 'Upper middle income': 2,
 'Low income': 3}

In [23]:
@app.callback(
    Output("knn_graph", "figure"), 
    Input("slider-neighbors", "value"))

def train_and_display_model(k):
    X, y = make_moons(noise=0.3, random_state=0) # replace with your own data source
    xrange, yrange = build_range(X, y)
    xx, yy = np.meshgrid(xrange, yrange)
    test_input = np.c_[xx.ravel(), yy.ravel()]

    clf = KNeighborsClassifier(k, weights='uniform')
    clf.fit(X, y)
    Z = clf.predict_proba(test_input)[:, 1]
    Z = Z.reshape(xx.shape)
    fig = build_figure(X, y, Z, xrange, yrange)

    return fig


# ############ HELPER FUNCTIONS ############
def build_range(X, y, mesh_size=.02, margin=.25):
    """
    Create an x range and a y range for building meshgrid
    """
    x_min = X[:, 0].min() - margin
    x_max = X[:, 0].max() + margin
    y_min = X[:, 1].min() - margin
    y_max = X[:, 1].max() + margin

    xrange = np.arange(x_min, x_max, mesh_size)
    yrange = np.arange(y_min, y_max, mesh_size)
    return xrange, yrange


def build_figure(X, y, Z, xrange, yrange):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y.astype(str), test_size=0.25, random_state=0)

    trace_specs = [
        [X_train, y_train, '0', 'Train', 'square'],
        [X_train, y_train, '1', 'Train', 'circle'],
        [X_test, y_test, '0', 'Test', 'square-dot'],
        [X_test, y_test, '1', 'Test', 'circle-dot']
    ]

    fig = go.Figure(data=[
        go.Scatter(
            x=X[y==label, 0], y=X[y==label, 1],
            name=f'{split}, y={label}',
            mode='markers', marker_symbol=marker
        )
        for X, y, label, split, marker in trace_specs
    ])
    fig.update_traces(
        marker_size=12, marker_line_width=1.5,
        marker_color="lightyellow"
    )

    fig.add_trace(
        go.Contour(
            x=xrange, y=yrange, z=Z,
            showscale=False, colorscale='RdBu',
            opacity=0.4, name='Score', hoverinfo='skip'
        )
    )

    return fig

In [24]:
app.run_server(debug=True, 
               use_reloader=False
            #    host="172.16.4.193",
               # port="8000"
               )

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__'
 * Debug mode: on
