In [1]:
import pandas
import numpy
from os import path
from sklearn.model_selection import StratifiedKFold
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential

ImportError: No module named 'pandas'

In [None]:
def get_data(datadir, expdir, expnum):
    '''Creates data frames from a list of files.
    
    :param datadir
        The data directory containing the expriment directories.
    :param expdir
        A list of expriment directories.
    :param expnum
        The number of expriment trials per experiment.
    :return
        A list of data frames.
    '''
    assert datadir, 'datadir must name a path'
    assert expdir, 'expdir cannot be empty'
    assert expnum > 0, 'expnum must be greater than zero'
    dfs = []
    for ed in expdir:
        for n in range(1,expnum+1):
            filename = '{}-{}.csv'.format(expdir, n)
            filepath = path.join(datadir, expdir, filename)
            df = pandas.read_csv(filepath)
            dfs.append(df)
    return dfs


def convert_to_input(dfs):
    '''Converts a list of data frames to a pair of lists of inputs and targets.
    
    :param dfs
        A list of data frames, where each data frame contains sensor readings from the PowerDue and PowerSense.
    :return
        A pair of lists (inputs, targets), where
        - each input in inputs is an array of the sensor readings for one experiment.
        - each target in targets is a vector of length two, where
          - [1, 0] represents a target where the PowerDue and mobile phone share context.
          - [0, 1] represents a target where the PowerDue and mobile phone do not share context.
    '''
    assert dfs, 'dfs cannot be empty'
    values = []
    targets = []
    cols = dfs[0].columns
    value_cols = cols[:-2]
    target_cols = cols[-2:]
    for df in dfs:
        value = df[value_cols].values
        target = df[0, target_cols].values
        values.append(value)
        targets.append(target)
    return values, targets     

In [None]:
datadir = 'data/clean'
experiment_dirs = ['exp1', 'exp2', 'exp3']
experiment_trials = 5

dfs = get_data(datadir, expriment_dirs, experiment_trials)
df1 = dfs[0]

In [None]:
# The number of columns that will be the input for the neural net model
num_cols = len(df1.columns) - 2

# Look at the column
print(df1.columns)

In [None]:
# Look at a small sample of the data
df1.head()

In [None]:
inputs, targets = convert_to_input(dfs)
for i in range(0, len(inputs), 5):
    value, target = inputs[i], targets[i]
    print('The first row of the trial experiment %d' % i)
    print(value[0, :])
    print('The target for the trial experiment %d' %i)
    print(target)
    print()

In [None]:
def create_model():
    '''Creates an uncompiled neural net model with a given architecture.
    
    :return
        An uncompiled neural net model.
    '''
    model = Sequential()
    model.add(Conv2D(filters=2, kernel_size=2, padding='same', activation='relu', input_shape=(None, num_cols))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=4, kernel_size=2, padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=8, kernel_size=2, padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=16, kernel_size=2, padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64, kernel_size=2, padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=128, kernel_size=2, padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(500, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(2, activation='softmax'))
    return model

In [None]:
# Compile the neural net
seed = 7
numpy.random.seed(seed)
# Leave one out k-fold cross validation
kfold = StratifiedKFold(n_splits=len(inputs), shuffle=True, random_state=seed)
scores = []
for train, test in kfold.split(inputs, targets):
    model = create_model()
    # Compile model
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
    train_inputs = inputs[train]
    train_targets = targets[train]
    print('train_targets:', train_targets)
    model.fit(train_inputs, train_targets, epochs=20, batch_size=10, verbose=0)
    test_input = inputs[test]
    test_target = targets[test]
    print('test_target:', test_target)
    score = model.evaluate(test_input, test_target, verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100))
    score_data = (test, score[1]*100)
    scores.append(score_data)


In [None]:
print('Scores:', scores)