<!--BOOK_INFORMATION-->
<img style="float: right; width: 100px" src="https://raw.github.com/pyomeca/design/master/logo/logo_cropped.svg?sanitize=true">

# Pyosim in the cloud :cloud:
## with [pyomeca](https://github.com/pyomeca/pyomeca)
Romain Martinez (martinez.staps@gmail.com | [GitHub](https://github.com/romainmartinez))


<!--NAVIGATION-->
< [Get the data (include cleaning)](01.00-Get-the-data.ipynb) | [Contents](Index.ipynb) |

# Get Catapult by period data

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
from src.data.get_catapult_by_period import GetCatapultPeriod
from src.data.tools import export_data, read_dtypes_json

In [2]:
RAW_DATA_PATH = Path("../data/raw/catapult/")
PROCESSED_DATA_PATH = Path("../data/processed/")

## Import

In [3]:
??GetCatapultPeriod

[0;31mInit signature:[0m [0mGetCatapultPeriod[0m[0;34m([0m[0minput_dir[0m[0;34m,[0m [0mby[0m[0;34m,[0m [0moutput_filename[0m[0;34m,[0m [0mblacklist[0m[0;34m)[0m[0;34m[0m[0m
[0;31mDocstring:[0m      <no docstring>
[0;31mSource:[0m        
[0;32mclass[0m [0mGetCatapultPeriod[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0m__init__[0m[0;34m([0m[0mself[0m[0;34m,[0m [0minput_dir[0m[0;34m,[0m [0mby[0m[0;34m,[0m [0moutput_filename[0m[0;34m,[0m [0mblacklist[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0;34m"""[0m
[0;34m        Parameters[0m
[0;34m        ----------[0m
[0;34m        input_dir : Path[0m
[0;34m            Data directory containing csv files[0m
[0;34m        by : str[0m
[0;34m            return data by session (mean of period) or by period[0m
[0;34m        output_filename : Path[0m
[0;34m            filename where to export data (support feather & csv extension)[0m
[0;34m        black

In [4]:
# we want to drop the `session` data (mean of all periods)
by = "period"

# we want to drop somes fields (containing the following strings)
blacklist = ["hr", "heart", "time", "trimp"]

output_filename = PROCESSED_DATA_PATH / "catapult_by_period.csv"
GetCatapultPeriod(RAW_DATA_PATH / "by_period", by, output_filename, blacklist);

memory usage:
	39.99 MB
cols dropped:
	['hr_0-50%_mins', 'hr_50-65%_mins', 'hr_65-75%_mins', 'hr_75-85%_mins', 'hr_85-90%_mins', 'hr_>90%_mins', 'minimum_heart_rate', 'mean_heart_rate', 'maximum_heart_rate', 'start_time', 'end_time', 'trimp', 'trimp/min', 'trimp/pl']
cols converted to total seconds:
	['total_duration', 'player_load_band_1_total_duration', 'player_load_band_2_total_duration', 'player_load_band_3_total_duration', 'player_load_band_4_total_duration', 'player_load_band_5_total_duration', 'player_load_band_6_total_duration', 'player_load_band_7_total_duration', 'player_load_band_8_total_duration', 'total_work_bout_duration', 'total_work_duration']
optimized memory usage:
	6.51 MB
`catapult_by_period` writtten in: `/home/romain/Documents/codes/rocket/data/processed`


## Cleaning

In [5]:
by_period = pd.read_csv(
    output_filename, dtype=read_dtypes_json(output_filename), parse_dates=["date"]
)

you may want to convert ['date'] to datetime


### Fix strings

In [6]:
periods = {
    "1ST": "1 ST",
    " 1 ST": "1 ST",
    "2 nd": "2 ND",
    "Quarter 3": "3 RD",
    "Quarter 4": "OT",
    "EXTRA": "OVERTIME",
}
positions = {"D": "Defenseman", "RW": "Winger", "LW": "Winger", "C": "Center"}

by_period = by_period.assign(
    player_name=by_period["player_name"].apply(
        lambda a: f"{a.split(' ')[1].upper()}, {a.split(' ')[0]}"
    ),
    position_name=by_period["position_name"].replace(positions),
    period_name=by_period["period_name"].replace(periods),
)

### Fix assigment mistake

In [7]:
by_period.loc[
    by_period["player_name"] == "MCCARRON, Michael", "position_name"
] = "Center"

by_period["player_name"].cat.rename_categories(
    {", Trevor": "OWENS, Trevor"}, inplace=True
)

In [9]:
def replace_session_type(l):
    by_period.loc[
        (by_period["date"] == l[0]) & (by_period["session_type"] == l[1]),
        "session_type",
    ] = l[2]


r = [
    ["2018-04-11", "SKILL", "PRACTICE"],
    ["2017-10-07", "MORNING", "HEALTHY"],
    ["2017-12-01", "MORNING", "OPT"],
    ["2018-02-09", "MORNING", "OPT"],
    ["2018-02-23", "MORNING", "HEALTHY"],
    ["2018-03-02", "MORNING", "HEALTHY"],
    ["2018-03-07", "MORNING", "OPT"],
    ["2018-04-13", "MORNING", "OPT"],
]


for i in r:
    replace_session_type(i)

In [10]:
def drop_rows(l):
    return by_period[
        ~(
            (by_period["player_name"] == l[0])
            & (by_period["session_type"] == l[1])
            & (by_period["date"] == l[2])
        )
    ]


d = [
    ["TAORMINA, Matt", "GAME", "2017-10-06"],
    ["BROLL, David", "GAME", "2017-10-13"],
    ["BAUN, Kyle", "GAME", "2017-10-20"],
    ["BAUN, Kyle", "GAME", "2017-10-21"],
    ["BROLL, David", "GAME", "2017-10-28"],
    ["JERABEK, Jakub", "GAME", "2017-11-11"],
    ["PARISI, Thomas", "GAME", "2017-11-11"],
    ["AUDETTE, Daniel", "GAME", "2017-11-17"],
    ["GELINAS, Eric", "GAME", "2017-11-29"],
    ["AUDETTE, Daniel", "GAME", "2017-12-01"],
    ["LERNOUT, Brett", "GAME", "2017-12-02"],
    ["BROLL, David", "GAME", "2017-12-02"],
    ["TERRY, Chris", "GAME", "2017-12-09"],
    ["BOURQUE, Simon", "GAME", "2017-12-09"],
    ["BROLL, David", "GAME", "2018-01-13"],
    ["GREGOIRE, Jeremy", "GAME", "2018-01-19"],
    ["TERRY, Chris", "GAME", "2018-01-27"],
    ["VEILLEUX, Yannick", "GAME", "2018-01-27"],
    ["AUDETTE, Daniel", "GAME", "2018-02-10"],
    ["EBBING, Thomas", "GAME", "2018-02-10"],
    ["AUDETTE, Daniel", "GAME", "2018-02-23"],
    ["BROLL, David", "GAME", "2018-02-23"],
    ["PARISI, Thomas", "GAME", "2018-02-23"],
    ["BROLL, David", "GAME", "2018-03-02"],
    ["GELINAS, Eric", "GAME", "2018-03-16"],
    ["AUSTIN, Johnny", "GAME", "2018-03-28"],
    ["EISENSCHMID, Markus", "GAME", "2018-03-28"],
    ["GREGOIRE, Jeremy", "GAME", "2018-03-31"],
    ["VEILLEUX, Yannick", "GAME", "2018-04-04"],
    ["EBBING, Thomas", "GAME", "2018-04-14"],
    ["WAKED, Antoine", "GAME", "2018-04-14"],
    ["PARISI, Thomas", "GAME", "2018-02-03"],
    ["PARISI, Thomas", "GAME", "2018-02-07"],
    ["PARISI, Thomas", "GAME", "2018-02-09"],
    ["PARISI, Thomas", "GAME", "2018-02-10"],
]

for i in d:
    by_period = drop_rows(i)

## Export data

In [11]:
export_data(by_period, output_filename, optimize_dtypes=True)

memory usage:
	8.71 MB
optimized memory usage:
	6.49 MB
`catapult_by_period` writtten in: `/home/romain/Documents/codes/rocket/data/processed`


<!--NAVIGATION-->
< [Get the data (include cleaning)](01.00-Get-the-data.ipynb) | [Contents](Index.ipynb) |