In [162]:
%matplotlib inline

import pandas as pd

from sklearn.model_selection import StratifiedShuffleSplit
from datetime import timedelta

## Prepare data

Divide the timestamp into two features **weekday** and **secondes** and transform the **action** in a numeric feature.

In [163]:
def get_time_in_seconds(time):
    return int(timedelta(hours=time.hour, minutes=time.minute).total_seconds())

def prepare_data(data):
    df['action']= df['action'].apply(lambda x: 1 if x == 'on' else 0)
    df['datetime'] = pd.to_datetime(df['timestamp'], unit='s', origin='unix')
    df['weekday'] = df['datetime'].apply(lambda x: x.weekday())
    df['time'] = df['datetime'].apply(lambda x: x.time())  
    df['seconds'] = df['time'].apply(get_time_in_seconds)  
    return df[['action', 'weekday', 'seconds']]

## Read data into a DataFrame

In [164]:
df = pd.read_csv('data/socket_log.csv', index_col='index')
socket_log = prepare_data(df)
socket_log.head()

Unnamed: 0_level_0,action,weekday,seconds
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1,0,26520
1,0,0,29160
2,1,0,61260
3,0,0,82740
4,1,1,26520


## Create test and training set

In [165]:
split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

for train_index, test_index in split.split(socket_log, socket_log['weekday']):
    strat_train_set = socket_log.loc[train_index]
    strat_test_set = socket_log.loc[test_index]

## Extract labels from features

In [166]:
X = strat_train_set.drop('action', axis=1)
y = strat_train_set['action'].copy()

## Train data with DecisionTreeClassifier

In [167]:
from sklearn.tree import DecisionTreeClassifier

socket_clf = DecisionTreeClassifier()
socket_clf.fit(X, y)

socket_clf.predict([[3, 26520]])

array([1])