In [226]:
# !pip install altair vega-datasets -q

In [227]:
import pandas as pd
import altair as alt 

alt.data_transformers.disable_max_rows()

columns = ['egtm', 'reportts', 'acnum', 'pos', 'dep', 'test_pred', 'train_pred']

dataset = pd.read_csv('./viz-baseline.csv', parse_dates=['reportts']) \
  .sort_values('reportts')[columns]

dataset['actype'] = 'A321-neo ' + dataset['acnum']

In [229]:
acnum_radio = alt.binding_radio(
    options=['VQ-BGU', 'VQ-BDU'], labels=['A321-neo VQ-BGU', 'A321-neo VQ-BDU'], name="Воздушное судно")
acnum_select = alt.selection_point(fields=['acnum'], bind=acnum_radio, value="VQ-BDU")

engine_radio = alt.binding_radio(
    options=[1, 2], labels=['Двигатель №1', 'Двигатель №2'], name="Позиция двигателя")
engine_select = alt.selection_point(fields=['pos'], bind=engine_radio, value=1)

aceng_condition = alt.condition(
    (acnum_select & engine_select),
    alt.value(1),
    alt.value(0)
)

leg = alt.Legend(
    title="",
    orient='none',
    legendX=930,
    legendY=30,
    fillColor='white',
    direction='vertical',
    strokeColor='gray',
    padding=5,
)

cols = [
    'EGT Margin (настоящее значение)', 
    'Prediction (тестовая выборка)', 
    'Prediction (обучающая выборка)'
]

data = dataset.rename(
    columns={
        'egtm': cols[0],
        'test_pred': cols[1],
        'train_pred': cols[2]
    }
).melt(['reportts', 'pos', 'acnum'], value_vars=cols)

brush = alt.selection_interval(encodings=['x'])

nearest = alt.selection_point(nearest=True, on='mouseover',
                        fields=['reportts'], empty=False)

timeline = alt.Chart(data.copy(), width=900, height=400) \
    .transform_filter(acnum_select & engine_select) \
    .mark_line(interpolate="basis") \
    .encode(
        x=alt.X('reportts:T', title=""),
        y=alt.Y('value', title="", scale=alt.Scale(zero=False)),
        opacity=aceng_condition,
        color=alt.Color('variable', legend=leg), 
    ) \
    .add_params(engine_select) \
    .add_params(acnum_select)

selectors = alt.Chart(data, width=900, height=400).mark_point() \
    .transform_filter(acnum_select & engine_select) \
    .encode(
        x=alt.X('reportts:T').scale(domain=brush),
        opacity=alt.value(0),
        tooltip=[
            alt.Tooltip('reportts', title='Дата'),
            alt.Tooltip('value', title='EGT Margin')
        ]
    ).add_params(
        nearest
    ) \
    .add_params(engine_select) \
    .add_params(acnum_select)

# Draw points on the line, and highlight based on selection
points = timeline.mark_point().encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)

# Draw a rule at the location of the selection
rules = alt.Chart(data, width=900, height=400) \
    .transform_filter(acnum_select & engine_select) \
    .mark_rule(color='gray').encode(
        x=alt.X('reportts:T').scale(domain=brush),
    ).transform_filter(
        nearest
    )


upper = timeline.encode(x=alt.X('reportts:T').scale(domain=brush)) \
    .properties(title="Модель EGT Margin (запас температуры EGT)")

lower = timeline.encode(alt.X('reportts:T', title='Время записи отчёта')) \
    .properties(height=30).add_params(brush)

egtm_timeline = alt.layer(upper, selectors, points, rules) & lower


In [230]:
quality = pd.DataFrame({
  'acnum': ['VQ-BGU', 'VQ-BGU', 'VQ-BDU', 'VQ-BDU'],
  'pos': [1, 2, 1, 2],
  'rmse': [2.721113, 5.1704643, 4.5466463510, 2.721113]
})

qual_bar = alt.Chart(quality).mark_bar().encode(
  x=alt.X('pos:N', title="№ двиг."),
  y=alt.Y('rmse:Q', title="RMSE (в градусах)"),
  column=alt.Column('acnum:N', title="")
) \
.add_params(engine_select) \
.add_params(acnum_select) \
.properties(title="Средняя ошибка модели (°C)")

In [231]:
dataset['yearmonth'] = dataset['reportts'].dt.year.astype(str) + ' ' + dataset['reportts'].dt.month.astype(str)

grouped = dataset \
  .rename(columns={
    'dep': 'Number of takeoffs'
  }) \
  .groupby(['yearmonth', 'acnum', 'pos']) \
  .agg({ 'egtm': 'mean', 'Number of takeoffs': 'count' }) \
  .reset_index()

grouped['egtm_diff'] = grouped.egtm.diff()


In [None]:
base = alt.Chart(grouped, width=500, title="Зависимость EGT margin от интенсивности полётов") \
  .transform_filter(acnum_select & engine_select) \
  .add_params(engine_select) \
  .add_params(acnum_select)

bar = base \
  .mark_bar() \
  .encode(
    y=alt.Y('Number of takeoffs', axis=alt.Axis(title="Число перелётов в месяц")),
    x=alt.X("yearmonth:N", axis=alt.Axis(title="Год / месяц")),
  ) \

lines = base \
  .mark_line() \
  .encode(
    y=alt.Y('egtm', axis=alt.Axis(title="Mean EGT Margin")),
    x=alt.X("yearmonth:N", axis=alt.Axis(title="Год / месяц")),
    color=alt.ColorValue("#ff9500")
  ) 

text_annotations = base.mark_text(
  align='left',
  baseline='middle',
  dx=10,
  dy=12,
  fontSize=13
).encode(
  y=alt.Y('egtm', axis=alt.Axis(title="")),
  x='yearmonth:N',
  text=alt.value('Mean EGT Margin'),
  color=alt.ColorValue("#d47c00"),
  opacity=alt.condition('datum.yearmonth == "2019 1"', alt.value(1), alt.value(0))
)

egtm_bars = alt.layer(bar, lines, text_annotations).resolve_scale(
  y='independent'
)

In [233]:
airports = pd.read_csv('https://datahub.io/core/airport-codes/r/airport-codes.csv')

airports_coord =  airports[['gps_code', 'coordinates', 'name', 'municipality', 'iata_code']]
airports_coord = dataset.merge(airports_coord, 'inner', left_on='dep', right_on='gps_code')
airports_coord[['long', 'lat']] = airports_coord['coordinates'].str.split(',', expand=True)

In [234]:
from vega_datasets import data
world = data.world_110m.url

base = alt.Chart(airports_coord, width=600, height=400) \
  .transform_filter(acnum_select & engine_select) \
  .add_params(engine_select) \
  .add_params(acnum_select) \
  .transform_aggregate( dep_count='count(dep)', groupby=['dep', 'lat', 'long', 'municipality', 'iata_code']) \
  .transform_calculate(
    dep_size='20 + datum.dep_count', 
    label="datum.iata_code + '(' + datum.dep_count + ')'", 
    descr="datum.municipality + ' / ' + datum.iata_code + ' (' + datum.dep_count + ' takeoffs)'", 
  )

map = alt.Chart(alt.topo_feature(world, 'countries'), width=600, height=400) \
  .properties(title="Карта частоты вылетов") \
  .mark_geoshape(fill='#ade9ff', stroke='#706545', opacity=0.9, strokeWidth=0.5) \
  .project(type='mercator', scale=210, translate=[70, 380])

dep_legend = alt.Legend(title='Число вылетов', 
  values=[20, 100, 200, 300],
  orient='none',
  direction='horizontal',
  legendX=950, legendY=560,
  fillColor='white',
  padding=4
)

points = base \
  .mark_circle().encode(
    latitude='lat:Q',
    longitude='long:Q',
    size=alt.Size('dep_size:Q', legend=dep_legend, scale=alt.Scale(range=[20, 700])),
    tooltip='descr:N',
    color=alt.ColorValue('#4c52fc'),
    opacity=alt.condition('datum.dep_count > 40', alt.value(1), alt.value(0.5)),
  ).project(type='mercator', scale=210)

map_labels = base.mark_text(dy=-15) \
  .encode(
    latitude='lat:Q',
    longitude='long:Q',
    text='label:N',
    opacity=alt.condition('datum.dep_count > 40', alt.value(1), alt.value(0))
  ).project(type='mercator', scale=210)

choropleth = alt.layer(map, points, map_labels)

In [235]:
dash = alt.hconcat(qual_bar, egtm_timeline) & alt.hconcat(egtm_bars, choropleth)

In [237]:
# dash.save("dashboard_2.html")