In [101]:
import os, subprocess, json, time, pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction import DictVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score
from collections import Counter

In [107]:
folder_path = os.getcwd()
data_path = folder_path+"\data"
model_file = folder_path + "\model.pkl"

def parse_output(output):
    ssid = bssid = quality = None
    ssid_line = bssid_line = -100
    results = []
    for num, line in enumerate(output.split("\n")):
        line = line.strip()
        if line.startswith("SSID"):
            ssid = " ".join(line.split()[3:]).strip()
            ssid_line = num
            if ssid == '':
                ssid = 'None'
        elif line.startswith("BSSID"):
            bssid = ":".join(line.split(":")[1:]).strip()
            bssid_line = num
        elif num == bssid_line + 1:
            quality = int(":".join(line.split(":")[1:]).strip().replace("%", ""))
            if bssid is not None:
                ap = {"ssid":ssid, "bssid":bssid, "quality" : quality}
                results.append(ap)
    return results

def make_str(output):
    try:                        output = output.decode("utf8",errors='ignore')
    except UnicodeDecodeError:  output = output.decode("utf16",errors='ignore')
    except AttributeError:      pass
    return                      output

def get_sample():
    netsh = subprocess.Popen("netsh wlan show networks mode=bssid", stdout=subprocess.PIPE, shell=True)
    (out, _) = netsh.communicate()
    results = parse_output(make_str(out))
    sample = {ap['ssid'] + " " + ap['bssid']: ap['quality'] for ap in results}
    return sample



def get_pipeline(clf=RandomForestClassifier(n_estimators=100, class_weight="balanced")):
    return make_pipeline(DictVectorizer(sparse=False), clf)

def get_train_data(folder=None):
    X = []
    y = []
    for file_name in os.listdir(folder):
        if file_name.endswith(".txt"):
            data = []
            with open(os.path.join(folder, file_name)) as f:
                for line in f:
                    data.append(json.loads(line))
            X.extend(data)
            y.extend([file_name.rstrip(".txt")] * len(data))
    return X, y

def get_model():
    try:
        model_file = folder_path + "\model.pkl"
        with open(model_file, "rb") as f:
            lp = pickle.load(f)
        return lp
    except: raise ValueError("Can not find model file!")

def train_model():
    X, y = get_train_data(data_path)
    if len(X) == 0: raise ValueError("Can not find any trained locations!")
    lp = get_pipeline()
    lp.fit(X, y)
    with open(model_file, "wb") as f:
        pickle.dump(lp, f)

def learn(label, n=1):
    label_path = os.path.join(data_path, label + ".txt")
    try:
        new_sample = get_sample()
        if new_sample:
            write_data(label_path, new_sample)
            print("Done, number of measurement of", locations(data_path, label))
    except: pass
    train_model()
    
def write_data(label_path, data):
    with open(label_path, "a") as f:
        f.write(json.dumps(data))
        f.write("\n")


def locations(path=None, loc=None):
    _, y = get_train_data(path)
    if len(y) == 0: 
        raise ValueError("Can not find any trained locations!")
    else:
        occurrences = Counter(y)
        if loc:
            for key, value in occurrences.items():
                if key == loc:
                    return("{}: {}".format(key, value))
        else:
            for key, value in occurrences.items():
                print("{}: {}".format(key, value))

def predict_proba():
    lp = get_model()
    data_sample = get_sample()
    print(json.dumps(dict(zip(lp.classes_, lp.predict_proba(data_sample)[0]))))

def predict():
    lp = get_model()
    data_sample = get_sample()
    return lp.predict(data_sample)[0]


def crossval(clf=None, X=None, y=None, folds=10, n=5):
    if X is None or y is None:
        X, y = get_train_data(data_path)
    if len(X) < folds:  raise ValueError('There are not enough samples ({}). Need at least {}.'.format(len(X), folds))
    clf = clf or get_model()
    tot = 0
    print("KFold folds={}, running {} times".format(folds, n))
    for i in range(n):
        res = cross_val_score(clf, X, y, cv=folds).mean()
        tot += res
        print("{}/{}: {}".format(i + 1, n, res))
    print("-------- total --------")
    print(tot / n)

In [100]:
locations(data_path)

home_kuchyn: 30
home_loby: 30
home_loznice: 69
home_obyvak: 142
office_kancl: 81
office_loby: 25
office_vyhled: 10
office_zasedacka: 45


In [14]:
import sched, time
s = sched.scheduler(time.time, time.sleep)
def do_something(a): 
    print("Doing stuff...")
    
    s.enter(a, 2, learn, argument=('home_loby',))
    s.run()


In [15]:
locations(data_path)
for i in range(30):
    print("measuring time:", i+1)
    do_something(60)

locations(data_path)

home_kuchyn: 30
home_loznice: 69
home_obyvak: 141
office_kancl: 71
office_loby: 25
office_vyhled: 10
office_zasedacka: 45
measuring time: 1
Doing stuff...
Done, number of measurement of home_loby: 1
measuring time: 2
Doing stuff...
Done, number of measurement of home_loby: 2
measuring time: 3
Doing stuff...
Done, number of measurement of home_loby: 3
measuring time: 4
Doing stuff...
Done, number of measurement of home_loby: 4
measuring time: 5
Doing stuff...
Done, number of measurement of home_loby: 5
measuring time: 6
Doing stuff...
Done, number of measurement of home_loby: 6
measuring time: 7
Doing stuff...
Done, number of measurement of home_loby: 7
measuring time: 8
Doing stuff...
Done, number of measurement of home_loby: 8
measuring time: 9
Doing stuff...
Done, number of measurement of home_loby: 9
measuring time: 10
Doing stuff...
Done, number of measurement of home_loby: 10
measuring time: 11
Doing stuff...
Done, number of measurement of home_loby: 11
measuring time: 12
Doing st

In [90]:
learn("office_kancl")

Done, number of measurement of office_kancl: 81


In [96]:
predict_proba()

{"home_kuchyn": 0.0, "home_loby": 0.0, "home_loznice": 0.0, "home_obyvak": 0.0, "office_kancl": 0.97, "office_loby": 0.01, "office_vyhled": 0.01, "office_zasedacka": 0.01}


In [108]:
crossval()

KFold folds=10, running 5 times
1/5: 0.9908033826638478
2/5: 0.983985200845666
3/5: 0.983985200845666
4/5: 0.9908562367864693
5/5: 0.9862579281183932
-------- total --------
0.9871775898520084


In [42]:
X, y = get_train_data(data_path)

In [43]:
print(y)

['home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_kuchyn', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loby', 'home_loznice', 'home_loznice', 'home_loznice', 'home_loznice', 'home_loznice', 'home_loznice', 'home_loznice', 'home_loznice', 'home_loznice', 'home_loznice',

In [44]:
df = (pd.json_normalize(X))

In [88]:
df.to_excel("output.xlsx") 

In [46]:
print(df)

     Farkpoint 70:54:25:70:7c:0c  Farkpoint 70:54:25:70:7c:0b  \
0                           56.0                         74.0   
1                           58.0                         74.0   
2                           58.0                         74.0   
3                           58.0                         74.0   
4                           58.0                         76.0   
..                           ...                          ...   
420                          NaN                          NaN   
421                          NaN                          NaN   
422                          NaN                          NaN   
423                          NaN                          NaN   
424                          NaN                          NaN   

     DIR-53 ec:ad:e0:25:ce:45  UPC1456822 38:43:7d:c8:3d:73  \
0                        66.0                          26.0   
1                        68.0                          26.0   
2                        64.0 

In [45]:
lp = get_model()
data_sample = get_sample()
print(lp.classes_)

['home_kuchyn' 'home_loby' 'home_loznice' 'home_obyvak' 'office_kancl'
 'office_loby' 'office_vyhled' 'office_zasedacka']


In [47]:
df.describe()

Unnamed: 0,Farkpoint 70:54:25:70:7c:0c,Farkpoint 70:54:25:70:7c:0b,DIR-53 ec:ad:e0:25:ce:45,UPC1456822 38:43:7d:c8:3d:73,Stark Industries 38:43:7d:ee:40:8b,DIRECT-1oC48x Series 86:25:19:26:53:cb,UPC34DD272 ac:22:05:15:aa:28,Redmi 9AT d6:c5:0d:b6:15:9d,a1m_opik 40:b0:76:4a:a8:e0,DIR-43 ee:ad:e0:35:ce:45,...,Dominio 22:4e:26:61:40:ff,si16e 44:d9:e7:3c:3a:a0,Quadient - Guest 46:d9:e7:2f:90:b9,Wonfood 12:4e:26:61:40:ff,PROFICIO 84:18:3a:38:8a:68,46:d9:e7:2f:7c:f9,66:d9:e7:2f:7c:f9,Quadient 44:d9:e7:2f:7c:f9,AndroidAP 32:ab:6a:c5:d9:d9,si20e f4:92:bf:61:94:85
count,271.0,271.0,271.0,23.0,73.0,148.0,14.0,13.0,66.0,264.0,...,1.0,1.0,72.0,1.0,1.0,36.0,1.0,47.0,84.0,35.0
mean,61.420664,81.269373,82.416974,27.478261,32.273973,56.675676,28.142857,49.538462,47.181818,92.477273,...,54.0,56.0,54.388889,50.0,50.0,44.166667,50.0,55.87234,65.833333,50.0
std,16.363469,10.127216,8.866893,1.503619,9.142787,6.492503,1.460092,4.332347,14.630746,7.097867,...,,,7.644487,,,1.0,,0.87519,3.880235,0.0
min,34.0,60.0,62.0,26.0,18.0,46.0,26.0,44.0,18.0,76.0,...,54.0,56.0,44.0,50.0,50.0,44.0,50.0,50.0,60.0,50.0
25%,52.0,74.0,82.0,26.0,30.0,52.0,28.0,46.0,34.75,87.0,...,54.0,56.0,44.0,50.0,50.0,44.0,50.0,56.0,60.0,50.0
50%,58.0,82.0,83.0,28.0,34.0,56.0,28.0,48.0,52.0,89.5,...,54.0,56.0,60.0,50.0,50.0,44.0,50.0,56.0,68.0,50.0
75%,78.0,88.0,86.0,28.0,40.0,60.0,29.5,50.0,58.0,100.0,...,54.0,56.0,60.0,50.0,50.0,44.0,50.0,56.0,68.0,50.0
max,98.0,100.0,100.0,30.0,44.0,76.0,30.0,58.0,62.0,100.0,...,54.0,56.0,60.0,50.0,50.0,50.0,50.0,56.0,70.0,50.0
