In [56]:
from sklearn.linear_model import MultiTaskLasso
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from sklearn.feature_extraction import DictVectorizer

width = 12
height = 7
plt.rcParams["figure.figsize"] = (width, height)

import pandas as pd
import numpy as np
import tflscripts
from sklearn.preprocessing import Imputer
from sklearn import tree
import pickle

configuration = tflscripts.read_configuration()
df = pd.read_pickle('results.p')

In [8]:
feature_expansion = {
    'All': {
        'Microphone': 1,
        'Accelerometer': 1,
        'Environmental': 1,
        'EMI & motion': 1
    },
    'Microphone': {
        'Microphone': 1,
        'Accelerometer': 0,
        'Magnetometer': 0,
        'Environmental': 0,
        'EMI & motion': 0
    },
    'Accelerometer & magnetometer': {
        'Microphone': 0,
        'Accelerometer': 1,
        'Magnetometer': 1,
        'Environmental': 0,
        'EMI & motion': 0
    },
    'Environmental': {
        'Microphone': 0,
        'Accelerometer': 0,
        'Magnetometer': 0,
        'Environmental': 1,
        'EMI & motion': 0
    },
    'EMI & motion': {
        'Microphone': 0,
        'Accelerometer': 0,
        'Magnetometer': 0,
        'Environmental': 0,
        'EMI & motion': 1
    },
    'Microphone & accelerometer': {
        'Microphone': 1,
        'Accelerometer': 1,
        'Magnetometer': 0,
        'Environmental': 0,
        'EMI & motion': 1
    }
}

pd.DataFrame([pd.Series(feature_expansion['All']) for i in range(10)])

Unnamed: 0,Accelerometer,EMI & motion,Environmental,Microphone
0,1,1,1,1
1,1,1,1,1
2,1,1,1,1
3,1,1,1,1
4,1,1,1,1
5,1,1,1,1
6,1,1,1,1
7,1,1,1,1
8,1,1,1,1
9,1,1,1,1


In [18]:
for i, result in df.iterrows():
    result

pd.Series({'1': 122}).append(pd.Series({'2': 244}))

1    122
2    244
dtype: int64

In [66]:
def vectorize_column(df, column_name):
    rows = [dict(r.iteritems()) for _, r in df[[column_name]].iterrows()]
    vectorizer = DictVectorizer()
    arr = vectorizer.fit_transform(rows).toarray()
    arr_df = pd.DataFrame(arr)
#     arr_df.columns = [column_name + str(i) for i in range(len(arr[0]))]
    arr_df.columns = vectorizer.get_feature_names()
    return arr_df

df = df.loc[df.accuracy_negative > 0.95]
df = df.loc[df.type_of_transfer != 'Across spaces']
df = df.reset_index(drop=True)
type_of_transfer_df = vectorize_column(df, 'type_of_transfer')
classifier_df = vectorize_column(df, 'classifier')
features_df = vectorize_column(df, 'features_name')
activities_df = vectorize_column(df, 'label')
activities_df[activities_df == 0.0] = np.nan
activities_df = pd.DataFrame([row * df.accuracy_positive[i] for i, row in activities_df.iterrows()])
vectorized_df = pd.concat([type_of_transfer_df, classifier_df, features_df, activities_df], axis=1)

x_columns = vectorized_df.filter(regex='type_of_transfer|classifier|features_name').columns
y_columns = vectorized_df.filter(regex='label').columns

vectorized_df = vectorized_df.groupby(x_columns.values.tolist(), axis=0)[y_columns.values.tolist()].mean()
vectorized_df = vectorized_df.reset_index()

In [67]:
x = vectorized_df[x_columns]
y = vectorized_df[y_columns]
x = Imputer().fit_transform(x)
y = Imputer().fit_transform(y)
lasso = MultiTaskLasso()
coef_multi_task_lasso_ = lasso.fit(x, y).coef_

In [68]:
coef_multi_task_lasso_

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0., 

In [69]:
x.shape, y.shape

((90, 14), (90, 17))