In [1]:
import os, subprocess, json, time, pickle, winwifi_api
from itertools import islice
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction import DictVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score, train_test_split
from collections import Counter

In [4]:
folder_path = os.getcwd()
data_quality = folder_path+"\data\\quality"
data_rssi = folder_path+"\data\\rssi"
data_all = folder_path+"\data\\all"
model_qual = folder_path + "\model.pkl"
model_rssi = folder_path + "\model2.pkl"

model_file = model_qual
data_path = data_quality

def parse_output(output):
    ssid = bssid = quality = None
    ssid_line = bssid_line = -100
    results = []
    for num, line in enumerate(output.split("\n")):
        line = line.strip()
        if line.startswith("SSID"):
            ssid = " ".join(line.split()[3:]).strip()
            ssid_line = num
            if ssid == '':
                ssid = 'None'
        elif line.startswith("BSSID"):
            bssid = ":".join(line.split(":")[1:]).strip()
            bssid_line = num
        elif num == bssid_line + 1:
            quality = int(":".join(line.split(":")[1:]).strip().replace("%", ""))
            if bssid is not None:
                ap = {"ssid":ssid, "bssid":bssid, "quality" : quality}
                results.append(ap)
    return results

def make_str(output):
    try:                        output = output.decode("utf8",errors='ignore')
    except UnicodeDecodeError:  output = output.decode("utf16",errors='ignore')
    except AttributeError:      pass
    return                      output

def get_sample():
    netsh = subprocess.Popen("netsh wlan show networks mode=bssid", stdout=subprocess.PIPE, shell=True)
    (out, _) = netsh.communicate()
    results = parse_output(make_str(out))
    sample = {ap['ssid'] + " " + ap['bssid']: ap['quality'] for ap in results}
    return sample
    
def get_sample2():
    results = winwifi_api.wifi_scan()
    sample = []
    for ap in results:
        sample = {ap[0] + " " + ap[1] + " " + str(ap[3]): ap[4] for ap in results}
    return sample

def get_all():
    results = winwifi_api.wifi_scan()
    sample = []
    for ap in results:
        sample.append([ap[0] + " " + ap[1], ap[2], ap[3], ap[4], ap[5]])
    return sample

    
def get_pipeline(clf=RandomForestClassifier(n_estimators=100, class_weight="balanced", max_features = "auto")):
    return make_pipeline(DictVectorizer(sparse=False), clf)

def get_train_data(folder=None):
    X = []
    y = []
    for file_name in os.listdir(folder):
        if file_name.endswith(".txt"):
            data = []
            with open(os.path.join(folder, file_name)) as f:
                for line in f:
                    data.append(json.loads(line))
            X.extend(data)
            y.extend([file_name.rstrip(".txt")] * len(data))
    return X, y

def get_model(model = model_file):
    try:
        model_file = folder_path + "\model.pkl"
        with open(model, "rb") as f:
            lp = pickle.load(f)
        return lp
    except: raise ValueError("Can not find model file!")

def train_model(path = data_path, model = model_file):
    X, y = get_train_data(path)
    if len(X) == 0: raise ValueError("Can not find any trained locations!")
    lp = get_pipeline()
    lp.fit(X, y)
    with open(model, "wb") as f:
        pickle.dump(lp, f)

def learn(label, n=1):
    label_path = os.path.join(data_path, label + ".txt")
    label_path2 = os.path.join(data_rssi, label + "_rssi.txt")
    label_path3 = os.path.join(data_all, label + "_all.txt")

    try:
        new_sample  = get_sample()
        new_rssi = get_sample2()
        new_all = get_all()
        print("Number of APs in range: ", len(new_rssi))
        if new_sample:
            write_data(label_path, new_sample)
            write_data(label_path2, new_rssi)
            write_data(label_path3, new_all)
            print("Done, number of old measurement of", locations(data_path, label))
            print("Done, number of rssi measurement of", locations(data_rssi, label + "_rssi"))

    except: 
        print("Something go wrong.")
    train_model()
    
def write_data(label_path, data):
    with open(label_path, "a") as f:
        f.write(json.dumps(data))
        f.write("\n")

def write_data2(label_path, data):
    with open(label_path, "a") as f:
        f.write('%s\n' % data)

def locations(path=None, loc=None):
    _, y = get_train_data(path)
    if len(y) == 0: 
        raise ValueError("Can not find any trained locations!")
    else:
        occurrences = Counter(y)
        if loc:
            for key, value in occurrences.items():
                if key == loc:
                    return("{}: {}".format(key, value))
        else:
            for key, value in occurrences.items():
                print("{}: {}".format(key, value))

def print_proba():
    out = predict_proba()
    out = {k: v for k, v in sorted(out.items(), key=lambda item: item[1], reverse = True)}
    out = dict(islice(out.items(), 4))
    return out

def predict_proba( lp = get_model(), data_sample =  get_sample() ):
    out = dict(zip(lp.classes_, lp.predict_proba(data_sample)[0]))
    out = {k: v for k, v in sorted(out.items(), key=lambda item: item[1], reverse = True)}
    return out

def predict():
    lp = get_model()
    data_sample = get_sample()
    return lp.predict(data_sample)[0]


def crossval(data = data_path, model = get_model, clf=None, X=None, y=None, folds=10, n=5):
    if X is None or y is None:
        X, y = get_train_data(data)
    if len(X) < folds:  raise ValueError('There are not enough samples ({}). Need at least {}.'.format(len(X), folds))
    clf = clf or model
    tot = 0
    print("KFold folds={}, running {} times".format(folds, n))
    for i in range(n):
        res = cross_val_score(clf, X, y, cv=folds).mean()
        tot += res
        print("{}/{}: {}".format(i + 1, n, res))
    print("-------- total --------")
    print(tot / n)

In [132]:
import sched, time
s = sched.scheduler(time.time, time.sleep)
def do_something(a): 
    print("Doing stuff...")
    s.enter(a, 2, learn, argument=('office_desk_filip',))
    s.run()


In [133]:
locations(data_path)
for i in range(60):
    print("measuring time:", i+1)
    do_something(10)

locations(data_path)

In [None]:
# Quality model test

In [20]:
locations(data_quality)

office_1_1_qual: 126
office_1_2_qual: 120
office_2_1_qual: 180
office_2_2_qual: 118
office_3_1_qual: 180
office_3_2_qual: 120
office_3_3_qual: 120


In [21]:
train_model(data_quality, model_qual)

In [22]:
predict_proba(get_model(model_qual), get_sample() )

{'office_1_1_qual': 0.82,
 'office_1_2_qual': 0.12,
 'office_2_2_qual': 0.05,
 'office_3_1_qual': 0.01,
 'office_2_1_qual': 0.0,
 'office_3_2_qual': 0.0,
 'office_3_3_qual': 0.0}

In [8]:
crossval(data_quality, get_model(model_qual))

KFold folds=10, running 5 times
1/5: 0.752147766323024
2/5: 0.7459407216494846
3/5: 0.7562714776632301
4/5: 0.7510524054982818
5/5: 0.7469072164948455
-------- total --------
0.7504639175257732


In [None]:
# RSSI model test

In [6]:
locations(data_rssi)

office_1_1_rssi: 125
office_1_2_rssi: 119
office_2_1_rssi: 179
office_2_2_rssi: 116
office_3_1_rssi: 179
office_3_2_rssi: 119
office_3_3_rssi: 120


In [11]:
train_model(data_rssi, model_rssi)

In [19]:
predict_proba(get_model(model_rssi), get_sample2())

{'office_1_1_rssi': 0.5,
 'office_1_2_rssi': 0.2,
 'office_2_1_rssi': 0.09,
 'office_2_2_rssi': 0.09,
 'office_3_1_rssi': 0.06,
 'office_3_3_rssi': 0.06,
 'office_3_2_rssi': 0.0}

In [9]:
crossval(data_rssi, get_model(model_rssi))

KFold folds=10, running 5 times
1/5: 0.6552521929824562
2/5: 0.6489912280701754
3/5: 0.654265350877193
4/5: 0.6604605263157894
5/5: 0.6552412280701755
-------- total --------
0.6548421052631579
