In [8]:
import dash
import dash_html_components as html
import dash_bootstrap_components as dbc
from jupyter_dash import JupyterDash
import dash_core_components as dcc
from dash.dependencies import Output, Input
import pandas as pd
from pathlib import Path
import os
import plotly.graph_objects as go

path = Path(os.getcwd())
base_dir = path.parent
data_dir = os.path.join(base_dir, "data")
data_dir

dv_runs = pd.read_csv(os.path.join(data_dir, "dv_runs.csv"), delimiter=',', quotechar='"', header='infer')


# app = dash.Dash(__name__, external_stylesheets=[dbc.themes.DARKLY])
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.COSMO])

app.layout = html.Div([
    html.H1('Gourmand Database',
        style={'color':'orange',
            'fontSize':'50px'}),
    html.H2('Businesses')
    ,html.Br()
    ,html.Div(id='chain_output'),
    dbc.Tabs([
        dbc.Tab([
            html.Ul([
                html.Li('Counts: '),
                html.Li('Unique: '),
                html.Li('Top Chain: '),
                html.Li('Highest Review Count: '),
            dbc.Col(
                html.Li('Computer screen <--> phone screen'), lg={'size': 3, 'offset': 
                1}, sm={'size': 7, 'offset': 
                5})
            ])
    ], label='Data at a Glance'),
        dbc.Tab([
            html.Ul([
                html.Br()
                ,html.Li([
                    'Github Repo: '
                    ,html.A('gourmand-data-pipelines', href='https://github.com/raindata5/gourmand-data-pipelines')
                ])
                ,html.Li(['Source: ',
                html.A('Yelp', href='https://www.yelp.com/developers/documentation/v3')
                ])
            ])
        ], label='Background Information')
    ,dbc.Tab([
        html.Br()
        ,dcc.Dropdown(id = 'dag_state_dropdown_menu', options=[
        {'label':state.title() , 'value':state} for state in list(dv_runs['dag_state'].unique())
    ])
    ,dcc.Dropdown(id = 'date_dropdown_menu',options=[
        {'label':date , 'value':date} for date in sorted(list(dv_runs['execution_date'].unique()))
    ])
    ,dcc.Graph(id='runtime_chart')
    ], label = 'Airflow Task Graph')
    ])

    
#     ,dbc.Row(
#             [
#                 dbc.Col('BusinessID',width=1 ),
#                 dbc.Col('BusinessName', width =2)
#                 ]
#             )
]
)

@app.callback(Output('runtime_chart','figure'),
             Input('date_dropdown_menu','value'),
             Input('dag_state_dropdown_menu','value'))
def plot_by_state_day(date, state):
    title_date = 'all days'
    title_state = 'all'
    if not date and not state:
        df_sorted = dv_runs.sort_values(['runtime_seconds'], ascending=False)
    elif state and not date :
        df = dv_runs.loc[dv_runs['dag_state'] == state]
        title_state = state
        df_sorted = df.sort_values(['runtime_seconds'], ascending=False)
    elif not state and date:
        df = dv_runs.loc[dv_runs['execution_date'] == date]
        df_sorted = df.sort_values(['runtime_seconds'], ascending=False)
        title_date = date
    else:
        df = dv_runs.loc[(dv_runs['dag_state'] == state) & (dv_runs['execution_date'] == date)]
        df_sorted = df.sort_values(['runtime_seconds'], ascending=False)
        title_state = state
        title_date = date
    fig = go.Figure()
    fig.add_bar(x=df_sorted['dag_id'], y=df_sorted['runtime_seconds'])
    fig.layout.title = f'airflow tasks runtimes for {title_date} and for {title_state} attempts'
    fig.layout.yaxis.title = 'runtime (seconds)'
    fig.layout.template = "ggplot2"
    return fig


# if __name__ == '__main__':
# app.run_server(mode='jupyterlab')

app.run_server(mode='jupyterlab', port=8050)

In [19]:
dv_runs.dag_id.value_counts().to_frame().style.background_gradient('cividis')

Unnamed: 0,dag_id
apache_airflow_perf,25
elt_validation,22
gourmand_data_pipeline,11
simple_dag,4


In [21]:
bus_cat_dataframe = pd.read_csv(os.path.join(data_dir, 'bus_cat.csv'),sep='|', low_memory=True)

In [23]:
bus_cat_dataframe.CountryName.value_counts()

US    120224
VI      6101
GB       111
CA        68
MX        11
AR         8
DE         3
IE         1
Name: CountryName, dtype: int64

In [44]:
from unicodedata import lookup
import numpy as np
def country_flag(letters):
    if not letters:
        return np.nan
    lookup_string1  = f'REGIONAL INDICATOR SYMBOL LETTER {letters[0]}'
    lookup_string2 = f'REGIONAL INDICATOR SYMBOL LETTER {letters[1]}'
#     L0 = lookup(f'REGIONAL INDICATOR SYMBOL LETTER {letters[0]}')
#     L1 = lookup(f'REGIONAL INDICATOR SYMBOL LETTER {letters[1]}')
    L0 = lookup(str.strip(lookup_string1))
    L1 = lookup(str.strip(lookup_string2))
    return L0 + L1


In [45]:
bus_cat_dataframe['flag'] = [country_flag(code) for code in bus_cat_dataframe['CountryName']]

In [47]:
bus_cat_dataframe.head()

Unnamed: 0,BusinessKey,BusinessName,ChainName,PaymentLevelName,Longitude,Latitude,BusinessCategoryName,CityName,CountyName,StateName,CountryName,flag
0,23,eastern-shore-street-eats-daphne,Eastern Shore Street Eats,Unknown,-87.868675,30.614799,American (Traditional),Daphne,Baldwin County,Alabama,US,🇺🇸
1,23,eastern-shore-street-eats-daphne,Eastern Shore Street Eats,Unknown,-87.868675,30.614799,Food Trucks,Daphne,Baldwin County,Alabama,US,🇺🇸
2,33,chris-hot-dogs-montgomery-2,Chris' Hot Dogs,Very Low,-86.306631,32.377287,Hot Dogs,Montgomery,Autauga County,Alabama,US,🇺🇸
3,122,benjas-thai-and-sushi-restaurant-mobile,Benja's Thai & Sushi Restaurant,Low,-88.190163,30.662022,Sushi Bars,Mobile,Mobile County,Alabama,US,🇺🇸
4,122,benjas-thai-and-sushi-restaurant-mobile,Benja's Thai & Sushi Restaurant,Low,-88.190163,30.662022,Thai,Mobile,Mobile County,Alabama,US,🇺🇸


In [50]:

bus_cat_dataframe.flag.value_counts()

🇺🇸    120224
🇻🇮      6101
🇬🇧       111
🇨🇦        68
🇲🇽        11
🇦🇷         8
🇩🇪         3
🇮🇪         1
Name: flag, dtype: int64

In [69]:
import plotly.graph_objects as go


In [None]:
fig = go.Figure()


In [None]:
fig.add_scatter(x=[1, 2, 3], y=[4, 2, 3])
fig.add_scatter(x=[1, 2, 3, 4], y=[4, 5, 2, 3])


In [None]:
fig.layout.title = 'Plotly Graph Title'
fig.layout.xaxis.title = 'xaxis title'
fig.layout.yaxis.title = 'yaxis title'

In [None]:
fig.write_html('html_plot.html',
 config={'toImageButtonOptions':
 {'format': 'svg'}})

In [None]:
fig.write_image('image_file.svg',
 height=600, width=850)

In [None]:
fig.show()

In [19]:
fig.show('json')

NameError: name 'fig' is not defined

In [34]:
dv_runs.groupby([dv_runs.index]).agg({'dag_id': 'first', 'execution_date': 'first','dag_state': 'first','runtime_seconds': 'first'})

Unnamed: 0,dag_id,execution_date,dag_state,runtime_seconds
0,apache_airflow_perf,2021-12-24,failed,419.6012
1,elt_validation,2021-12-20,success,196.4781
2,elt_validation,2022-01-11,success,27.7292
3,elt_validation,2022-01-09,success,50.7414
4,elt_validation,2021-12-25,success,941.7842
...,...,...,...,...
57,elt_validation,2021-12-18,failed,67.1007
58,apache_airflow_perf,2022-01-14,success,323.2926
59,apache_airflow_perf,2022-01-11,success,141.5506
60,elt_validation,2021-12-16,failed,158.6561


In [88]:
# import dash
# import dash_html_components as html
# import dash_bootstrap_components as dbc
# from jupyter_dash import JupyterDash
# import dash_core_components as dcc
# from dash.dependencies import Output, Input
# import pandas as pd
# from pathlib import Path
# import os
# path = Path(os.getcwd())
# base_dir = path.parent
# data_dir = os.path.join(base_dir, "data")
# data_dir

dv_runs = pd.read_csv(os.path.join(data_dir, "dv_runs.csv"), delimiter=',', quotechar='"', header='infer')



# app = dash.Dash(__name__, external_stylesheets=[dbc.themes.DARKLY])
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.COSMO])

app.layout = html.Div([
    dcc.Dropdown(id = 'dag_state_dropdown_menu', options=[
        {'label':state.title() , 'value':state} for state in list(dv_runs['dag_state'].unique())
    ])
    ,dcc.Dropdown(id = 'date_dropdown_menu',options=[
        {'label':date , 'value':date} for date in sorted(list(dv_runs['execution_date'].unique()))
    ])
    ,dcc.Graph(id='runtime_chart')
])
@app.callback(Output('runtime_chart','figure'),
             Input('date_dropdown_menu','value'),
             Input('dag_state_dropdown_menu','value'))
def plot_by_state_day(date, state):
    title_date = 'all days'
    title_state = 'all'
    if not date and not state:
        df_sorted = dv_runs.sort_values(['runtime_seconds'], ascending=False)
    elif state and not date :
        df = dv_runs.loc[dv_runs['dag_state'] == state]
        title_state = state
        df_sorted = df.sort_values(['runtime_seconds'], ascending=False)
    elif not state and date:
        df = dv_runs.loc[dv_runs['execution_date'] == date]
        df_sorted = df.sort_values(['runtime_seconds'], ascending=False)
        title_date = date
    else:
        df = dv_runs.loc[(dv_runs['dag_state'] == state) & (dv_runs['execution_date'] == date)]
        df_sorted = df.sort_values(['runtime_seconds'], ascending=False)
        title_state = state
        title_date = date
    fig = go.Figure()
    fig.add_bar(x=df_sorted['dag_id'], y=df_sorted['runtime_seconds'])
    fig.layout.title = f'airflow tasks runtimes for {title_date} and for {title_state} attempts'
    fig.layout.yaxis.title = 'runtime (seconds)'
    return fig
    
app.run_server(mode='jupyterlab', port=8050, debug=True)