In [4]:
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, confusion_matrix
from sklearn.utils import resample

In [5]:
df = pd.read_csv("aw_fb_data.csv")
df = df.drop(columns=["Unnamed: 0", "X1"])
le = preprocessing.LabelEncoder()
df["device"] = le.fit_transform(df["device"])

In [86]:
fitbit_df = df[df.device == 1].drop(columns=["device"])
fitbit_df = fitbit_df.sample(frac=1, random_state=42)
aw_df = df[df.device == 0].drop(columns=["device"])
aw_df = aw_df.sample(frac=1, random_state=42)
results = pd.DataFrame(columns=["Algorithm", "n_estimators", "eta", "max_depth", "n_neighbors", "AUC"])

## Experiments:

### Apple Watch Data Separately, Fitbit Data Separately, All Together:

1. XGBoost
    * n_estimators: 100, 200, 500, 1000
    * eta: 0.01, 0.1, 0.3
    * max_depth: 2, 3, 4, 5, 6
2. Decision Tree:
    * max_depth: 2, 5, 7, 10, 15, 20
3. Random Forest:
    * n_estimators: 100, 200, 500, 1000
    * max_depth: 2, 3, 4, 5, 6
4. K Nearest Neighbor:
    * n_neighbors: 3, 5, 7, 11
    

In [87]:
#DECISION TREE
max_depth = [2, 5, 7, 10, 15, 20]
for depth in max_depth:
    model = DecisionTreeClassifier(random_state = 42, max_depth = depth)
    from sklearn.model_selection import StratifiedKFold
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    target = df.loc[:,"activity"]
    aucs=[]
    for train_index, test_index in skf.split(df, target):
        train = df.loc[train_index,:] #changed to iloc for fitbit 
        test = df.loc[test_index,:]   #changed to iloc for fitbit 
        model.fit(train.iloc[:, :-1], train.iloc[:, -1])
        predictions = model.predict_proba(test.iloc[:, :-1])
        #print(roc_auc_score(test.iloc[:, -1], predictions, multi_class="ovr"))
        aucs.append(roc_auc_score(test.iloc[:, -1], predictions, multi_class="ovr"))
    print("Average: " + str(sum(aucs)/len(aucs)))
    auc = str(sum(aucs)/len(aucs))
    results=results.append({"Algorithm": ["Decision Tree"], "n_estimators": [0], "eta": [0], "max_depth": [depth], "n_neighbors": [0], "AUC": [auc]}, ignore_index=True)


Average: 0.6245557253662671
Average: 0.7867763036119788
Average: 0.8364830279779388
Average: 0.8772386185203415
Average: 0.8819062487835302
Average: 0.8714341058042567


In [88]:
#RANDOM FOREST
n_estimators = [100, 200, 500, 1000]
max_depth =  [2, 3, 4, 5, 6]
for estimators in n_estimators:
    for depth in max_depth:
        model = RandomForestClassifier(random_state = 42, n_estimators = estimators, max_depth = depth)
        from sklearn.model_selection import StratifiedKFold
        skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
        target = df.loc[:,"activity"]
        aucs=[]
        for train_index, test_index in skf.split(df, target):
            train = df.loc[train_index,:]
            test = df.loc[test_index,:]
            model.fit(train.iloc[:, :-1], train.iloc[:, -1])
            predictions = model.predict_proba(test.iloc[:, :-1])
            #print(roc_auc_score(test.iloc[:, -1], predictions, multi_class="ovr"))
            aucs.append(roc_auc_score(test.iloc[:, -1], predictions, multi_class="ovr"))
        print("Average: " + str(sum(aucs)/len(aucs)))
        auc = str(sum(aucs)/len(aucs))
        results=results.append({"Algorithm": ["Random Forest"], "n_estimators": [estimators], "eta": [0], "max_depth": [depth], "n_neighbors": [0], "AUC": [auc]}, ignore_index=True)


Average: 0.7590830631538752
Average: 0.8039899133100814
Average: 0.8379839908908824
Average: 0.8686687564776294
Average: 0.8947641068238333
Average: 0.7698801524004049
Average: 0.8083258640439
Average: 0.8438508257273089
Average: 0.8722846095923366
Average: 0.8979422480065071
Average: 0.7695939274113586
Average: 0.8098194019662077
Average: 0.8444208793191545
Average: 0.8724146148617369
Average: 0.8976786919605482
Average: 0.7708151196471851
Average: 0.8103949614910289
Average: 0.8443775716529599
Average: 0.8729186744257419
Average: 0.8981006723806939


In [89]:
#KNN
n_neighbors = [3, 5, 7, 11]
for neighbors in n_neighbors:
        model = KNeighborsClassifier(n_neighbors = neighbors)
        from sklearn.model_selection import StratifiedKFold
        skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
        target = df.loc[:,"activity"]
        aucs=[]
        for train_index, test_index in skf.split(df, target):
            train = df.loc[train_index,:]
            test = df.loc[test_index,:]
            model.fit(train.iloc[:, :-1], train.iloc[:, -1])
            predictions = model.predict_proba(test.iloc[:, :-1])
            #print(roc_auc_score(test.iloc[:, -1], predictions, multi_class="ovr"))
            aucs.append(roc_auc_score(test.iloc[:, -1], predictions, multi_class="ovr"))
        print("Average: " + str(sum(aucs)/len(aucs)))
        auc = str(sum(aucs)/len(aucs))
        results=results.append({"Algorithm": ["KNN"], "n_estimators": [0], "eta": [0], "max_depth": [0], "n_neighbors": [neighbors], "AUC": [auc]}, ignore_index=True)


Average: 0.8932573470758989
Average: 0.9064852820656256
Average: 0.9064657147921643
Average: 0.9016311310612508


In [91]:
#XGBoost
n_estimators = [100, 200, 500, 1000]
eta = [0.01, 0.1, 0.3]
max_depth = [2, 3, 4, 5, 6]
for eta in eta:
    for estimators in n_estimators:
        for depth in max_depth:
            model = XGBClassifier(random_state = 42, n_estimators = estimators, eta = eta, max_depth = depth)
            from sklearn.model_selection import StratifiedKFold
            skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
            target = df.loc[:,"activity"]
            aucs=[]
            for train_index, test_index in skf.split(df, target):
                train = df.loc[train_index,:]
                test = df.loc[test_index,:]
                model.fit(train.iloc[:, :-1], train.iloc[:, -1])
                predictions = model.predict_proba(test.iloc[:, :-1])
                #print(roc_auc_score(test.iloc[:, -1], predictions, multi_class="ovr"))
                aucs.append(roc_auc_score(test.iloc[:, -1], predictions, multi_class="ovr"))
            print("Average: " + str(sum(aucs)/len(aucs)))
            auc = str(sum(aucs)/len(aucs))
            results=results.append({"Algorithm": ["XGBoost"], "n_estimators": [estimators], "eta": [eta], "max_depth": [depth], "n_neighbors": [0], "AUC": [auc]}, ignore_index=True)








































Average: 0.8003993741360047




































Average: 0.8469175519869264




































Average: 0.8818692770809179




































Average: 0.9091423005353849




































Average: 0.9257063578230115




































Average: 0.8246277946885222




































Average: 0.8684325417660013




































Average: 0.9021465410564458




































Average: 0.9266601022920739




































Average: 0.9412816972065634




































Average: 0.8653998053535178




































Average: 0.9079682992783832




































Average: 0.9349239250611279




































Average: 0.9531771230860635




































Average: 0.9634480196842811




































Average: 0.8955364374554096




































Average: 0.9326564311937557




































Average: 0.9552374983148267




































Average: 0.9680215876070358




































Average: 0.9743471373504298




































Average: 0.8967306922354498




































Average: 0.9338542937706624




































Average: 0.9561846466895169




































Average: 0.968420968184167




































Average: 0.9751649282030332




































Average: 0.9214499672521462




































Average: 0.9538582102885516




































Average: 0.9695280928884713




































Average: 0.9778832851925984




































Average: 0.9814749903244827




































Average: 0.9461586415230803




































Average: 0.9704500289744565




































Average: 0.9795951608283868




































Average: 0.9834977596253873




































Average: 0.9845210918097754




































Average: 0.9601166407502209




































Average: 0.9774148429643029




































Average: 0.982205290186663




































Average: 0.9846452519177286




































Average: 0.9850809055898528




































Average: 0.9339904375228439




































Average: 0.9621183535937821




































Average: 0.9756482465480965




































Average: 0.98103183887714




































Average: 0.9833237293045689




































Average: 0.9509387999764727




































Average: 0.9731771851395015




































Average: 0.9812705567194577




































Average: 0.9837823869726318




































Average: 0.9850155206615032




































Average: 0.9662842631797677




































Average: 0.9796813096790894




































Average: 0.9836256780183849




































Average: 0.984742231352201




































Average: 0.9852840747275913




































Average: 0.9721192179638807




































Average: 0.9816654782078246




































Average: 0.984022538165442




































Average: 0.9847824795855938




































Average: 0.9852962409201724


In [None]:
from xgboost import plot_importance
from matplotlib import pyplot
plot_importance(rf)
pyplot.show()

In [92]:
results.to_csv("df_results.csv")