In [1]:
from os import listdir
from os.path import isfile, join
from sklearn.cluster import KMeans
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

In [2]:
DATASET_DIR = "HMP_Dataset"
TRAINING_SETS = np.array(['Use_telephone', 'Standup_chair', 'Walk', 'Climb_stairs', 'Sitdown_chair', 'Brush_teeth', 'Comb_hair', 'Eat_soup', 'Pour_water', 'Descend_stairs', 'Eat_meat', 'Drink_glass', 'Getup_bed', 'Liedown_bed'])

all_data = {}

print("Reading data from files...")

def initial_setup():
    for dataset in TRAINING_SETS:
        all_data[dataset] = []
        for f in listdir(join(DATASET_DIR, dataset)):
            all_data[dataset].append(np.array(np.genfromtxt(join(DATASET_DIR, dataset, f), usecols=(0, 1, 2))))
        all_data[dataset] = np.array(all_data[dataset])

print("All done!")

Reading data from files...
All done!


In [3]:
def blockshaped(arr, nrows, ncols):
    h, w = arr.shape
    return (arr.reshape(h//nrows, nrows, -1, ncols)
               .swapaxes(1,2)
               .reshape(-1, nrows, ncols))

In [4]:
def chunkify(chunk_me):
    dif = chunk_me.shape[0]%32
    chunk_me = chunk_me[dif:,:]
    chunk_me = blockshaped(chunk_me, 32, 3)
    return chunk_me

In [5]:
initial_setup()
chunkified_data = all_data
for k in TRAINING_SETS:
    for i in range(len(all_data[k])):
        chunkified_data[k][i] = chunkify(all_data[k][i])

In [6]:
flattened_data = chunkified_data
flat_stuff = {}
huge_matrix = []
for k in TRAINING_SETS:
    flat_stuff[k] = []
    for j in range(len(flattened_data[k])):
        for i in range(len(flattened_data[k][j])):
            flat_stuff[k].append(flattened_data[k][j][i].reshape(96))
            huge_matrix.append(flattened_data[k][j][i].reshape(96))
    flat_stuff[k] = np.asarray(flat_stuff[k])
huge_matrix = np.asarray(huge_matrix)

In [7]:
for i in TRAINING_SETS:
    print(flat_stuff[i].shape)

(470, 96)
(745, 96)
(2835, 96)
(1210, 96)
(727, 96)
(926, 96)
(722, 96)
(207, 96)
(1254, 96)
(460, 96)
(974, 96)
(1288, 96)
(1381, 96)
(345, 96)


In [8]:
print(huge_matrix)
print(huge_matrix.shape)

[[36. 42. 50. ... 38. 37. 50.]
 [37. 40. 49. ... 21. 33. 53.]
 [21. 31. 53. ... 32. 44. 42.]
 ...
 [35. 41. 50. ... 34. 39. 51.]
 [34. 40. 51. ... 35. 38. 53.]
 [35. 39. 51. ... 34. 38. 52.]]
(13544, 96)


In [9]:
trainingMushrooms, testingMushrooms = train_test_split(huge_matrix, test_size=.2, train_size=.8, shuffle=False)

dependent = np.array(trainingMushrooms)
independent = np.array(trainingMushrooms)[:,2:]

clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
clf.fit(independent, dependent)

indTest = np.array(testingMushrooms)[:,2:]

test1 = np.array(testingMushrooms)
test2 = clf.predict(indTest)

In [15]:
test1

array([[41., 38., 50., ..., 45., 33., 50.],
       [44., 35., 50., ..., 45., 30., 50.],
       [45., 30., 49., ..., 44., 29., 50.],
       ...,
       [35., 41., 50., ..., 34., 39., 51.],
       [34., 40., 51., ..., 35., 38., 53.],
       [35., 39., 51., ..., 34., 38., 52.]])

In [16]:
test2

array([[33., 45., 49., ..., 34., 43., 50.],
       [33., 45., 50., ..., 34., 38., 50.],
       [35., 45., 49., ..., 34., 43., 50.],
       ...,
       [33., 38., 52., ..., 33., 38., 52.],
       [33., 38., 52., ..., 33., 38., 52.],
       [33., 38., 52., ..., 33., 38., 52.]])