In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
%matplotlib inline

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Assume we have a table with *bikeid*, *starttime*, *stoptime* grouped by *bikeid* and sorted by *starttime*, *stoptime* ascending.

Let $i$ be a row number such that $bikeid(i)=bikeid(i+1)$.

As a bike can not be in more than 1 trip at any given time moment,
$$t_{start}(i)<t_{stop}(i)<t_{start}(i+1)<t_{stop}(i+1)$$
without loss of generality.

Define:
* trip time duration $\Delta t_{trip}(i)$ as
$$\Delta t_{trip}(i)=t_{stop}(i)-t_{start}(i)$$
* idle time duration $\Delta t_{idle}(i+1)$ as
$$\Delta t_{idle}(i+1)=t_{start}(i+1)-t_{stop}(i)$$

In [1]:
bike = pd.read_csv('../input/citibike-system-data/201306-citibike-tripdata.csv')
bike.head()

We group by *bikeid* and sort by *starttime*, *stoptime* (ascending).

In [1]:
bike = bike.sort_values(['bikeid', 'starttime', 'stoptime'])
bike['starttime'] = pd.to_datetime(bike['starttime'])
bike['stoptime'] = pd.to_datetime(bike['stoptime'])
bike.head()

We observe that

$$\Delta t_{idle}(i+1)=t_{start}(i+1)-t_{stop}(i)=t_{start}(i+1)-t_{start}(i)+t_{start}(i)-t_{stop}(i)=t_{start}(i+1)-t_{start}(i)-(t_{stop}(i)-t_{start}(i))\Leftrightarrow$$
$$\Delta t_{idle}(i+1)=t_{start}(i+1)-t_{start}(i)-\Delta t_{trip}(i)$$

Thus, we can calculate trip time duration and idle time duration as follows.

In [1]:
bike['triptime'] = bike['stoptime'] - bike['starttime']
bike['idletime'] = bike['starttime'].diff() - bike['triptime'].shift(periods=1)
bike.head()

We calculate timestamps for computational reasons.

In [1]:
bike['tstrip'] = bike['triptime'].values.astype(np.int64) // 10 ** 9
bike['tsidle'] = bike['idletime'].values.astype(np.int64) // 10 ** 9
bike.head()

In [1]:
tsidle = bike['tsidle']
tsidle[tsidle<0] = 0
bike['tsidle'] = tsidle
bike.head()

In [1]:
output = bike
output.to_csv('CitiBike System Data Bikes.csv', index=False)