In [1]:
import numpy as np
import pandas as pd
from sklearn.svm import LinearSVC

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_classification

In [44]:
import time

In [3]:
# means per kid: sensordata
data = pd.read_csv("../spsdata/new.csv")

In [4]:
# means per kid: sensordata, age, gender
data2 = pd.read_csv("../spsdata/test.csv")

In [5]:
# all rows: sensordata
data3 = pd.read_csv("../spsdata/new_all.csv")

In [6]:
# all rows: sensordata, age, gender
data4 = pd.read_csv("../spsdata/test_all.csv")

In [7]:
#splits per kid

def split(data, ratio):
    ugh = []
    for i in data.ID.unique():
        ugh.append(data[data["ID"] == i])
    np.random.shuffle(ugh)
    trainlist = ugh[int(len(ugh)*ratio):]
    testlist = ugh[:int(len(ugh)*ratio)]
    train = pd.DataFrame(dict((col,[]) for col in list(data)))
    test = pd.DataFrame(dict((col,[]) for col in list(data)))
    for item in trainlist:
        train = train.append(item, ignore_index = True)
    xtrain = train.drop(columns=["ID","label"])
    ytrain = train["label"]
    for item in testlist:
        test = test.append(item, ignore_index = True)
    xtest = test.drop(columns=["ID","label"])
    ytest = test.set_index("ID")["label"]
    return xtrain, xtest, ytrain, ytest

#print(split(data4, 0.3))

In [86]:
def linear_svm(data, ratio=0.2):
    #print("splitting...\n")
    xtrain, xtest, ytrain, ytest = split(data, ratio)
    clf = LinearSVC(random_state=0, tol=1e-5, dual=False)
    #print("fitting...\n")
    clf.fit(xtrain, ytrain)
    #print("getting predictions...\n")
    ypred = clf.predict(xtest)
    #print("calculating accuracy...\n")
    acc = accuracy_score(ytest,ypred)
    return acc * 100

In [87]:
def linear_svm_complex(data, ratio=0.2):
    #print("splitting...\n")
    xtrain, xtest, ytrain, ytest = split(data, ratio)
    with open("../spsdata/hype.csv", "w") as f:
        ytest.to_csv(f, header=True, index=True)
    foo = pd.read_csv("../spsdata/hype.csv")
    clf = LinearSVC(random_state=0, tol=1e-5, dual=False)
    #print("fitting...\n")
    clf.fit(xtrain, ytrain)
    #print("getting predictions...\n")
    ypred = clf.predict(xtest)
    se = pd.Series(ypred)
    foo['predict'] = se.values
    labels = []
    predicts = []
    for i in foo.ID.unique():
        label = foo[foo["ID"] == i]["label"].mean()
        labels.append(label)
        predict = foo[foo["ID"] == i]["predict"].mean()
        predicts.append(predict)
    with open("../spsdata/hype.csv", "w") as f:
        foo.to_csv(f, header=True, index=None)
    #print("calculating accuracy...\n")
    acc = accuracy_score(labels,[round(predict) for predict in predicts])
    #acc = accuracy_score(ytest,ypred)
    return acc * 100

In [88]:
print(linear_svm_complex(data2, 0.3))

60.71428571428571


In [92]:
# run svm n times
# gives average accuracy

def linear_run(data, ratio, n=10):
    total = 0
    start = time.time()
    for i in range(n):
        score = linear_svm(data, ratio)
        #print("{} %".format(round(score, 2)))
        total += score
    print("Average accuracy is {} %".format(total/float(n)))
    return "This took " + str(round((time.time() - start), 2)) + " seconds."

In [93]:
# run svm n times
# gives average accuracy

def linear_run_complex(data, ratio, n=10):
    total = 0
    start = time.time()
    for i in range(n):
        score = linear_svm_complex(data, ratio)
        print("{} %".format(round(score, 2)))
        total += score
    print("Average accuracy is {} %".format(total/float(n)))
    return "This took " + str(round((time.time() - start), 2)) + " seconds."

In [94]:
print(linear_run(data3, 0.3))

Average accuracy is 49.536140734476604 %
This took 79.88 seconds.


In [74]:
print(linear_run_complex(data3, 0.3))

53.57 %
39.29 %
39.29 %
50.0 %
60.71 %
42.86 %
39.29 %
57.14 %
42.86 %
35.71 %
Average accuracy is 46.07142857142857 %
This took 161.52 seconds.


In [78]:
print(linear_run(data2, 0.3, n=500))

Average accuracy is 57.45714285714281 %
This took 34.64 seconds.
