In [281]:
import pandas as pd

import numpy as np

import matplotlib
import matplotlib.pylab as plt
%matplotlib inline

import seaborn as sns

In [282]:
raw_train_data_file_path = './data/train.csv'
raw_train_data = pd.read_csv(raw_train_data_file_path)

In [283]:
raw_test_data_file_path = './data/test.csv'
raw_test_data = pd.read_csv(raw_test_data_file_path)

## Creating new features

In [284]:
def create_date_features(data):
    dates = pd.to_datetime(data['Dates'])
    
    years = dates.apply(lambda date: date.year)
    months = dates.apply(lambda date: date.month)
    month_days = dates.apply(lambda date: date.day)
    
    day_times = dates.apply(lambda date: (date.hour * 60 + date.minute) / float(60 * 24))
    
    return pd.DataFrame({'Year': years, 'Month': months, 'DayOfMonth': month_days, 'DayTime': day_times})

## Adding new features

In [285]:
def add_new_features(data):
    new_data = data.copy()
    
    # Date features
    date_features = get_date_features(new_data)
    new_data.drop('Dates', axis=1, inplace=True)
    new_data = pd.concat([new_data, date_features], axis=1)
    
    return new_data

## Writing new data to csv

In [286]:
import os.path
def write_data_to_file(data, filepath, features=['']):    
    file_name, extension = os.path.splitext(filepath)
    file_path_with_features = file_name + '+' + '+'.join(features) + extension
    
    with open(file_path_with_features, 'w') as f:
        data.to_csv(f, index=False)

## Tranforming train and test data

In [287]:
new_train_data = add_new_features(raw_train_data)
write_data_to_file(new_train_data, './data_with_new_features/train.csv', features=['date'])

In [288]:
new_test_data = add_new_features(raw_test_data)
write_data_to_file(new_test_data, './data_with_new_features/test.csv', features=['date'])