In [1]:
import pandas as pd
import numpy as np

### Read file VeloV.sample.csv with NaN column

In [2]:
columns = ['ID', 'time-stamp', 'hour', 'day-of-week', 'available-bike-stands', 'available-bikes', 'nan']
df = pd.read_csv('./VeloV.sample.csv', header=None, names=columns,
                sep=';')
df.head()

Unnamed: 0,ID,time-stamp,hour,day-of-week,available-bike-stands,available-bikes,nan
0,10113,635304352870000000,23,Friday,20,1,
1,10113,635304358900000000,23,Friday,20,1,
2,10113,635304364930000000,23,Friday,20,1,
3,10113,635304370970000000,23,Friday,20,1,
4,10113,635304377010000000,23,Friday,20,1,


### Drop NaN column

In [3]:
df = df.drop(['nan'], axis=1)

In [4]:
df.head()

Unnamed: 0,ID,time-stamp,hour,day-of-week,available-bike-stands,available-bikes
0,10113,635304352870000000,23,Friday,20,1
1,10113,635304358900000000,23,Friday,20,1
2,10113,635304364930000000,23,Friday,20,1
3,10113,635304370970000000,23,Friday,20,1
4,10113,635304377010000000,23,Friday,20,1


### Reformat Time-Stamp
This is code below for testing an example time-stamp: 63530435287000000

In [5]:
from datetime import datetime, timedelta, timezone
epoch = datetime(1, 1, 1, tzinfo=timezone.utc)
cookie_microseconds_since_epoch = 63530435287000000
cookie_datetime = epoch + timedelta(microseconds=cookie_microseconds_since_epoch)
str(cookie_datetime)

'2014-03-14 23:08:07+00:00'

### Apply for the whole data set

In [6]:
df['time-stamp'] = df['time-stamp'].map(lambda x: epoch + timedelta(microseconds=int(x)/10))

In [7]:
df.head()

Unnamed: 0,ID,time-stamp,hour,day-of-week,available-bike-stands,available-bikes
0,10113,2014-03-14 23:08:07+00:00,23,Friday,20,1
1,10113,2014-03-14 23:18:10+00:00,23,Friday,20,1
2,10113,2014-03-14 23:28:13+00:00,23,Friday,20,1
3,10113,2014-03-14 23:38:17+00:00,23,Friday,20,1
4,10113,2014-03-14 23:48:21+00:00,23,Friday,20,1


#### Acces the attributes of Time-Stamp
link https://pandas.pydata.org/pandas-docs/version/0.23.4/generated/pandas.Timestamp.html

In [8]:
df['time-stamp'][1].hour

23

In [9]:
df['time-stamp'][1].minute

18

In [10]:
df['time-stamp'][1].second

10

### Save the reformatted file

In [11]:
df.to_csv('./VeloVformatted.sample.csv', header = True, index = False, sep = ";")

you can do the same thing with 'VeloV.csv'