# DataFrameの準備　機械学習の定番　アヤメのデータセット

In [None]:
from sklearn import datasets
import pandas as pd
 
iris = datasets.load_iris()
df_iris = pd.DataFrame(iris.data, columns=iris.feature_names)
df_iris['target'] = iris.target_names[iris.target]
df_iris.head()

# 何はともあれ、DataFrame.describe(  )

In [None]:
df_iris.describe()

# pandas_profiling.ProfileReport(DataFrame)

In [None]:
import pandas_profiling as pdp  # pip install pandas-profiling

pdp.ProfileReport(df_iris)

# pixiedust   display(DataFrame)

In [None]:
import pixiedust  # pip install pixiedust

display(df_iris)

# plotly.express

In [None]:
import plotly.express as px

fig = px.scatter(df_iris, x='petal length (cm)', y='petal width (cm)', color='target')
fig

In [None]:
fig = px.scatter_matrix(df_iris, color='target', width=800, height=700,
                        dimensions=['sepal length (cm)', 'sepal width (cm)', 
                                    'petal length (cm)','petal width (cm)'])
fig

# pixiedust   display(DataFrame)　緯度・経度

In [None]:
'''
ＧＴＦＳ（静的データ）の取得
'''

import pandas as pd

df_trips = pd.read_csv('gtfs/trips.txt')
df_trips = df_trips.dropna(how='all', axis=1)

df_stop_times = pd.read_csv('gtfs/stop_times.txt')
df_stop_times = df_stop_times.dropna(how='all', axis=1)

df_stops = pd.read_csv('gtfs/stops.txt')
df_stops = df_stops.dropna(how='all', axis=1)

df_routes = pd.read_csv('gtfs/routes.txt')
df_routes = df_routes.dropna(how='all', axis=1)

df_shapes = pd.read_csv('gtfs/shapes.txt')
df_shapes = df_shapes.dropna(how='all', axis=1)

'''
ＧＴＦＳ（静的データ）の処理
'''

df_route = pd.merge(df_trips, df_routes, on='route_id') 
df_bus = df_route.drop_duplicates(subset='trip_id')

df_bus_stop = pd.merge(df_stop_times, df_stops, on='stop_id')

sr_shape_id = df_shapes.shape_id.unique()

df_all_routes = pd.DataFrame(index=[], columns=['shape_id', 'shapes'])
for shape_id in sr_shape_id:
    df_shape = df_shapes[df_shapes.shape_id==shape_id]
    tpl_shape = tuple(zip(df_shape.shape_pt_lon, df_shape.shape_pt_lat))
    sr_shape = pd.Series([shape_id, tpl_shape], index=df_all_routes.columns)
    df_all_routes = df_all_routes.append(sr_shape, ignore_index=True) # 全路線データ

'''
ＧＴＦＳ-RT（動的データ）の取得
'''

from google.transit import gtfs_realtime_pb2 # pip install --upgrade gtfs-realtime-bindings
import requests
from retry import retry

COLS = ['trip_id',
        'vehicle',
        'timestamp',
        'stop_sq',
        'status',
        'latitude',
        'longitude']

@retry(tries=3, delay=2, backoff=2)
def get_realtime_data():
    feed = gtfs_realtime_pb2.FeedMessage()
    response = requests.get('http://opendata.sagabus.info/vehicle.pb')
    feed.ParseFromString(response.content)

    df_result = pd.DataFrame(columns=COLS)

    for entity in feed.entity:
        if entity.HasField('vehicle'):
            sr_data = pd.Series([
                entity.vehicle.trip.trip_id,
                entity.vehicle.vehicle.id,
                entity.vehicle.timestamp,
                entity.vehicle.current_stop_sequence,
                entity.vehicle.current_status,
                entity.vehicle.position.latitude,
                entity.vehicle.position.longitude 
            ], index=df_result.columns)
            df_result = df_result.append(sr_data, ignore_index=True)          
    return df_result # 運行中のバスデータ

df_vehicle = get_realtime_data()
df_bus = pd.merge(df_vehicle, df_bus, on='trip_id') # 運行中のバスデータ


In [None]:
import pixiedust

display(df_bus)

# ご清聴ありがとうございました

## 本日の資料：https://github.com/malo21st/DAD4D200620