# Kaggle CareerCon 2019 Training Data EDA
Nam D. Nguyen

[CareerCon 2019 Competition Page](https://www.kaggle.com/c/career-con-2019)

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import os

init_notebook_mode(connected=True)

## Explore X Training Data

In [2]:
train = pd.read_csv('../data/raw/X_train.csv')

In [3]:
train.head()

Unnamed: 0,row_id,series_id,measurement_number,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z
0,0_0,0,0,-0.75853,-0.63435,-0.10488,-0.10597,0.10765,0.017561,0.000767,-0.74857,2.103,-9.7532
1,0_1,0,1,-0.75853,-0.63434,-0.1049,-0.106,0.067851,0.029939,0.003385,0.33995,1.5064,-9.4128
2,0_2,0,2,-0.75853,-0.63435,-0.10492,-0.10597,0.007275,0.028934,-0.005978,-0.26429,1.5922,-8.7267
3,0_3,0,3,-0.75852,-0.63436,-0.10495,-0.10597,-0.013053,0.019448,-0.008974,0.42684,1.0993,-10.096
4,0_4,0,4,-0.75852,-0.63435,-0.10495,-0.10596,0.005135,0.007652,0.005245,-0.50969,1.4689,-10.441


In [4]:
train.shape

(487680, 13)

In [5]:
train.describe()

Unnamed: 0,series_id,measurement_number,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z
count,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0
mean,1904.5,63.5,-0.01805,0.075062,0.012458,-0.003804,0.000178,0.008338,-0.019184,0.129281,2.886468,-9.364886
std,1099.853353,36.949327,0.685696,0.708226,0.105972,0.104299,0.117764,0.088677,0.229153,1.8706,2.140067,2.845341
min,0.0,0.0,-0.9891,-0.98965,-0.16283,-0.15662,-2.371,-0.92786,-1.2688,-36.067,-121.49,-75.386
25%,952.0,31.75,-0.70512,-0.68898,-0.089466,-0.10606,-0.040752,-0.033191,-0.090743,-0.530833,1.9579,-10.193
50%,1904.5,63.5,-0.10596,0.237855,0.031949,-0.018704,8.4e-05,0.005412,-0.005335,0.12498,2.8796,-9.3653
75%,2857.0,95.25,0.651803,0.80955,0.12287,0.097215,0.040527,0.048068,0.064604,0.792263,3.7988,-8.5227
max,3809.0,127.0,0.9891,0.98898,0.15571,0.15477,2.2822,1.0791,1.3873,36.797,73.008,65.839


In [6]:
train.info(null_counts=True, verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 487680 entries, 0 to 487679
Data columns (total 13 columns):
row_id                   487680 non-null object
series_id                487680 non-null int64
measurement_number       487680 non-null int64
orientation_X            487680 non-null float64
orientation_Y            487680 non-null float64
orientation_Z            487680 non-null float64
orientation_W            487680 non-null float64
angular_velocity_X       487680 non-null float64
angular_velocity_Y       487680 non-null float64
angular_velocity_Z       487680 non-null float64
linear_acceleration_X    487680 non-null float64
linear_acceleration_Y    487680 non-null float64
linear_acceleration_Z    487680 non-null float64
dtypes: float64(10), int64(2), object(1)
memory usage: 48.4+ MB


Look at the correlation between features, after removing ID features.

In [29]:
corr = train.iloc[:, 2:].corr()
corr.style.background_gradient(cmap='coolwarm', axis=None).set_precision(3)

Unnamed: 0,measurement_number,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z
measurement_number,1.0,-0.000336,0.000604,0.000607,-0.000334,-0.000334,0.000963,0.00111,-0.00221,-0.00233,-0.0011
orientation_X,-0.000336,1.0,-0.186,-0.134,0.998,0.0015,0.047,-0.0585,-0.00255,-0.0096,-0.0021
orientation_Y,0.000604,-0.186,1.0,0.998,-0.226,0.000572,0.0211,-0.0257,-0.00388,0.012,0.0025
orientation_Z,0.000607,-0.134,0.998,1.0,-0.176,0.000456,0.0232,-0.0282,-0.00418,0.00976,0.00188
orientation_W,-0.000334,0.998,-0.226,-0.176,1.0,0.00153,0.0466,-0.0581,-0.00291,-0.00882,-0.00191
angular_velocity_X,-0.000334,0.0015,0.000572,0.000456,0.00153,1.0,-0.0862,-0.0162,-0.00257,-0.0194,-0.0347
angular_velocity_Y,0.000963,0.047,0.0211,0.0232,0.0466,-0.0862,1.0,-0.762,-0.0273,0.0155,0.00442
angular_velocity_Z,0.00111,-0.0585,-0.0257,-0.0282,-0.0581,-0.0162,-0.762,1.0,0.029,0.00055,-0.00122
linear_acceleration_X,-0.00221,-0.00255,-0.00388,-0.00418,-0.00291,-0.00257,-0.0273,0.029,1.0,0.0529,0.0165
linear_acceleration_Y,-0.00233,-0.0096,0.012,0.00976,-0.00882,-0.0194,0.0155,0.00055,0.0529,1.0,0.384


There is strong correlation between Y and Z orientations, X and W orientations, and Y and Z angular velocities.