# 可視化

`./scatter_preprocess.ipynb`で作成した：

- `all_res.csv`
- `umamusume.csv`

を利用して，Plotlyで散布図を描く．

## 環境構築

In [1]:
# Notebook初期設定
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import warnings
warnings.filterwarnings('ignore')

In [18]:
import os
import pandas as pd

In [3]:
# plotly関連
!pip install plotly
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go



In [4]:
DIR_IN = '../data/scatter_preprocess'
DIR_OUT = '../data/scatter_plot'

In [5]:
FN_ALL = 'all_res.csv'
FN_UM = 'umamusume.csv'

In [6]:
DISTANCE_CLASSES = [
    'short',
    'mile',
    'intermediate',
    'long'
]

In [7]:
INDICES= [
    'turf_index',
    'dart_index',
    'short_index',
    'mile_index',
    'intermediate_index',
    'long_index',
    'nige_index',
    'senko_index',
    'sashi_index',
    'oikomi_index',
]

In [8]:
SUBPLOT_TITLES = [
    '短距離（1,600m未満）',
    'マイル（1,600m～2,000m未満）',
    '中距離（2,000m～2,500m未満）',
    '長距離（2,500m以上）'
]

## 可視化

In [9]:
def make_hover_text(
        horse_name, horse_age, 
        race_name, turf, dart, date, distance,
        seconds_total, seconds_3f,
        speed_total, speed_3f,
        arrival_order, prize):
    
    if turf:
        race_type = '芝'
    elif dart:
        race_type = 'ダート'
    else:
        race_type = ''
        
    text = f'''<b>{horse_name}</b> ({horse_age}歳) <br><br>
    レース：{race_name} ({date}, {race_type}, {distance}m)<br>
    タイム： {seconds_total} 秒 (上り: {seconds_3f} 秒) <br>
    平均速度: {speed_total:.4} km/h （上り: {speed_3f:.4} km/h） <br>
    着順：{arrival_order}<br>
    賞金：{prize}万円
    '''
    return text

In [10]:
# ホバー用のカラムを追加
def add_hover_text_to_df(df):
    df_new = df.copy()
    df_new['hover_text'] = \
        df_new[
            ['horse_name', 'horse_age', 
             'race_name', 'turf', 'dart', 'date', 'distance', 
             'seconds_total', 'seconds_3f',
             'speed_total', 'speed_3f',
             'arrival_order', 'prize']].apply(
        lambda x: make_hover_text(*x), axis=1)
    return df_new

In [11]:
def get_min_and_max_of_col(df, col):
    """dfのcolのminとmaxを取得"""
    col_min = df[col].min() * 0.9
    col_max = df[col].max() * 1.1
    return (col_min, col_max)

In [12]:
def make_df_for_plot(df, dc, color_col, asc):
    """scatter描画用のdfを生成"""
    df_tmp = \
        df[df['distance_class']==dc].reset_index(drop=True)
    df_tmp = df_tmp.sort_values(
        color_col, ascending=asc, ignore_index=True)
    return df_tmp

In [13]:
def add_scatter_trace_to_fig(
        fig, x, y, color, text, name, i,
        opacity=1., symbol='circle', size=10, 
        hover=True, linecolor='White'):
    """figに対しscatterを追加"""
    fig.add_trace(
        go.Scatter(
            x=x,
            y=y,
            mode='markers',
            marker_symbol=symbol,
            marker_size=size,
            opacity=opacity,
            hoverinfo='text' if hover else 'skip',
            marker={
                'color': color,
                'coloraxis':'coloraxis',
                'line':{
                    'color': linecolor,
                    'width': 1},
            },
            text=text,
            hovertemplate='%{text}' if hover else None,
            name=name,
        ),
    i//2+1, i%2+1)

In [14]:
def update_colorbar_of_fig(fig, color_title):
    """figのカラーバーに関する設定"""
    fig.update_layout(
        showlegend=False,
        coloraxis_colorbar={'title': color_title},
        )

In [15]:
def update_axis_ranges_of_fig(fig, x_min, x_max, y_min, y_max):
    """figの描画範囲を更新"""
    fig.update_xaxes(range=[x_min, x_max])
    fig.update_yaxes(range=[y_min, y_max])

In [16]:
def update_axis_titles_of_fig(fig, x_title, y_title):
    """figの各軸名を修正"""
    fig.update_xaxes(title_text=x_title)
    fig.update_yaxes(title_text=y_title)

In [21]:
def subplots_scatter_by_distance_class(
        df, color_col, color_title, asc=True):
    """距離区分ごとにsubplotでscatterを描画"""
    fig = make_subplots(
        rows=2, cols=2, subplot_titles=SUBPLOT_TITLES)
    x_min, x_max = get_min_and_max_of_col(df, 'speed_total')
    y_min, y_max = get_min_and_max_of_col(df, 'speed_3f')
    for i, dc in enumerate(DISTANCE_CLASSES):
        df_tmp = make_df_for_plot(df, dc, color_col, asc)
        add_scatter_trace_to_fig(
            fig, x=df_tmp['speed_total'], y=df_tmp['speed_3f'],
            color=df_tmp[color_col], text=df_tmp['hover_text'],
            name=dc, i=i)
    update_colorbar_of_fig(fig, color_title)
    update_axis_ranges_of_fig(
        fig, x_min=x_min, x_max=x_max, 
        y_min=y_min, y_max=y_max)
    update_axis_titles_of_fig(
        fig, 
        x_title='レース全体の平均速度[km/h]',
        y_title='上り3ハロンの平均速度[km/h]')
    return fig

### 全レース結果の散布図（`scatter_all.html`）

In [19]:
df_all = pd.read_csv(os.path.join(DIR_IN, FN_ALL))

In [20]:
# ホバー表示用のカラムを追加
df_all = add_hover_text_to_df(df_all)

In [22]:
df_all.head().T

Unnamed: 0,0,1,2,3,4
race_id,198605020811,198605020811,198605020811,198605020811,198605020811
date,1986-05-11,1986-05-11,1986-05-11,1986-05-11,1986-05-11
place,東京,東京,東京,東京,東京
race_name,第36回安田記念(G1),第36回安田記念(G1),第36回安田記念(G1),第36回安田記念(G1),第36回安田記念(G1)
distance,1600,1600,1600,1600,1600
dart,False,False,False,False,False
dart_cond,,,,,
turf,True,True,True,True,True
turf_cond,良,良,良,良,良
steeple,False,False,False,False,False


In [23]:
fig = subplots_scatter_by_distance_class(df_all, 'prize', '獲得賞金')
fig.write_html(f'{DIR_OUT}/scatter_all.html')