# **Injury prediction based on journal data from the Dutch national men’s volleyball team**

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.style.use('fivethirtyeight')

**Data Processing**

1. Player Jumps Dataset
(Height of all performed jumps)

In [36]:
jump_data = pd.read_csv('./data/Jumps.csv', sep=';')
print(f"Shape of Players Jump dataset : {jump_data.shape}")

#convert Date from object to pd.datetime format
jump_data['Date'] = pd.to_datetime(jump_data['Date'], format='%d-%m-%Y')

jump_data.head()

Shape of Players Jump dataset : (3633, 3)


Unnamed: 0,PlayerID,Date,HeightInCm
0,4,2018-05-10,70
1,4,2018-05-10,73
2,4,2018-05-10,55
3,4,2018-05-10,55
4,4,2018-05-10,56


2. Player Training Dataset (Information of volleyball-specific sessions)

In [14]:
training_data = pd.read_excel('./data/PlayerTrainingData.xlsx')
print(f"Shape of Players Training dataset : {training_data.shape}")
training_data.head()

Shape of Players Training dataset : (106, 4)


Unnamed: 0,TrainingID,PlayerID,RPE,Duration
0,34,4,3.0,0 days 01:14:33.980000000
1,35,4,5.0,0 days 03:00:00.000000000
2,36,4,6.0,0 days 02:25:00.000000000
3,37,4,4.0,0 days 01:07:48.390000000
4,38,4,7.0,0 days 01:38:54.370000000


3. Player Strength Training Dataset (Information of strength training) \
(reps = repetitions, prct = percentage of personal maximum weight)

In [32]:
strength_data = pd.read_csv('./data/StrengthTraining.csv', sep=';')

# Replacing Dutch decimals with std decimals
strength_data['Prct'] = strength_data['Prct'].str.replace(',', '.').astype(float)

In [19]:
print(f"Shape of Players Strength Training dataset : {strength_data.shape}")

strength_data.head()

Shape of Players Strength Training dataset : (255, 7)


Unnamed: 0,PlayerID,Date,Focus,Exercise,Reps,Prct,Weight
0,4,23-7-2018,Maximaalkracht,Fullbody,4.0,0.9,50
1,4,23-7-2018,Maximaalkracht,Upper,12.0,0.85,19
2,4,23-7-2018,Maximaalkracht,Upper,12.0,0.85,186
3,4,23-7-2018,Maximaalkracht,Lower,3.0,0.93,105
4,4,23-7-2018,Maximaalkracht,Lower,4.0,0.9,100


4. Player Wellness Dataset 
    1) 	Answers wellness questions on a 10-point Likert scale ranging from 1 (very bad) to 10 (excellent)  
    2) 	Answers to OSTRC questions

In [30]:
wellness_data = pd.read_csv('./data/Wellness.csv', sep=';')
# wellness_data.head()
wellness_data['Date'] = pd.to_datetime(wellness_data['Date'], format='%d-%m-%Y')

In [29]:
print(f"Shape of Players Wellness dataset : {wellness_data.shape}")
wellness_data.head()

Shape of Players Wellness dataset : (85, 14)


Unnamed: 0,PlayerID,Date,Wellness,Mood,Recovered,Muscle Soreness,Sick/healthy,Injury,Sleep quality,Hours of sleep,Difficultparticipating,Reducedtraining,Affectedperformance,Symptomscomplaints
0,4,2018-05-10,47,7,7,7,10,10,6,8.0,0,0,0,0
1,4,2018-05-11,46,7,6,6,10,10,7,9.0,0,0,0,0
2,4,2018-05-12,46,7,6,6,10,10,7,9.0,1,0,1,1
3,4,2018-05-13,48,7,7,7,10,10,7,8.0,0,0,0,0
4,4,2018-05-14,42,6,5,5,10,10,6,8.0,1,1,1,1


5. Player Exercise Training Dataset \
    (Information about the exercises in volleyball-specific sessions)

In [45]:
exercise_data = pd.read_csv('./data/ExerciseTrainingData.csv', sep=';')
exercise_data['Date'] = pd.to_datetime(exercise_data['Date'], format='%d-%m-%Y')
exercise_data.head()

Unnamed: 0,Date,PlayerID,DateTime,DateEndTime,DateStartTime,Exercise #,ExerciseID,TrainingID,TrainingSubtype,TrainingType,Duration,Duration_s,Duration_m
0,2018-05-11,4,"00:00,0","32:16,3","28:15,8",Exercise 3,11.0,34.0,Complex technique,Technique,0 days 00:04:00.440000000,240.0,4.0
1,2018-05-11,4,"00:00,0","54:59,8","32:16,9",Exercise 4,40.0,34.0,Team pass,Complex 1,0 days 00:22:42.890000000,1362.0,22.0
2,2018-05-11,4,"00:00,0","20:39,1","05:55,4",Exercise 1,82.0,34.0,Warming up & Cooling down,Physical,0 days 00:14:43.670000000,883.0,14.0
3,2018-05-11,4,"00:00,0","28:15,2","20:39,6",Exercise 2,2.0,34.0,Complex technique,Technique,0 days 00:07:35.590000000,455.0,7.0
4,2018-05-11,4,"00:00,0","10:44,7","30:44,7",Exercise 1,82.0,35.0,Warming up & Cooling down,Physical,0 days 00:40:00.000000000,2400.0,40.0


In [52]:
# for column in ['DateTime', 'DateEndTime', 'DateStartTime']:
#     exercise_data[column] = exercise_data[column].str.replace(',', '.')
#     exercise_data[column] = pd.to_datetime(exercise_data[column], format='%M:%S.%f')

In [53]:
exercise_data.head()

Unnamed: 0,Date,PlayerID,DateTime,DateEndTime,DateStartTime,Exercise #,ExerciseID,TrainingID,TrainingSubtype,TrainingType,Duration,Duration_s,Duration_m
0,2018-05-11,4,1900-01-01,1900-01-01 00:32:16.300,1900-01-01 00:28:15.800,Exercise 3,11.0,34.0,Complex technique,Technique,0 days 00:04:00.440000000,240.0,4.0
1,2018-05-11,4,1900-01-01,1900-01-01 00:54:59.800,1900-01-01 00:32:16.900,Exercise 4,40.0,34.0,Team pass,Complex 1,0 days 00:22:42.890000000,1362.0,22.0
2,2018-05-11,4,1900-01-01,1900-01-01 00:20:39.100,1900-01-01 00:05:55.400,Exercise 1,82.0,34.0,Warming up & Cooling down,Physical,0 days 00:14:43.670000000,883.0,14.0
3,2018-05-11,4,1900-01-01,1900-01-01 00:28:15.200,1900-01-01 00:20:39.600,Exercise 2,2.0,34.0,Complex technique,Technique,0 days 00:07:35.590000000,455.0,7.0
4,2018-05-11,4,1900-01-01,1900-01-01 00:10:44.700,1900-01-01 00:30:44.700,Exercise 1,82.0,35.0,Warming up & Cooling down,Physical,0 days 00:40:00.000000000,2400.0,40.0


In [54]:
exercise_data.dtypes

Date               datetime64[ns]
PlayerID                    int64
DateTime           datetime64[ns]
DateEndTime        datetime64[ns]
DateStartTime      datetime64[ns]
Exercise #                 object
ExerciseID                float64
TrainingID                float64
TrainingSubtype            object
TrainingType               object
Duration                   object
Duration_s                float64
Duration_m                float64
dtype: object