In [29]:
import csv
from binascii import hexlify

In [2]:
# Need to convert from CSV of ints.

def load_datasets():
    features = []
    with open('features.csv', 'r') as f:
        reader = csv.reader(f)
        for l in reader:
            ba = bytearray(int(b) for b in l)
            features.append(hexlify(ba))
    labels = [l for l in csv.reader(open('labels.csv', 'r'))]
    return features, labels

In [3]:
X, y = load_datasets()

In [4]:
# flatten y
y = [arch[0] for arch in y]

---

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

---

In [7]:
from sklearn.feature_extraction.text import CountVectorizer

In [8]:
vec_opts = {
    "ngram_range": (1, 4),
    "analyzer": "word",
    "token_pattern": "..",
    "min_df": 1,
}

v = CountVectorizer(**vec_opts)

X_train_vec = v.fit_transform(X_train)

In [11]:
#for feature, freq in zip(v.inverse_transform(X_train_vec)[0], X_train_vec.A[0]):
#    print("'%s' : %s" % (feature, freq))

In [12]:
from sklearn.feature_extraction.text import TfidfTransformer

In [13]:
idf_opts = {"use_idf": True}

idf = TfidfTransformer(**idf_opts)

X_train_idf = idf.fit_transform(X_train_vec)

---

In [14]:
from sklearn.pipeline import Pipeline

In [15]:
trans_pipeline = Pipeline([
    ('vec', CountVectorizer(**vec_opts)),
    ('idf', TfidfTransformer(**idf_opts)),
])

In [16]:
X_train_idf = trans_pipeline.fit_transform(X_train, y_train)

In [17]:
X_test_idf = trans_pipeline.transform(X_test)

---

In [18]:
from sklearn.naive_bayes import MultinomialNB

In [19]:
clf1 = MultinomialNB().fit(X_train_idf, y_train)

In [20]:
clf1.score(X_test_idf, y_test)

0.9937107715716731

---

In [21]:
from sklearn.linear_model import SGDClassifier

In [22]:
clf2 = SGDClassifier().fit(X_train_idf, y_train)

In [23]:
clf2.score(X_test_idf, y_test)

0.9940784885563938

---

In [38]:
X_trans = trans_pipeline.fit_transform(X, y)

In [26]:
clf = SGDClassifier().fit(X_trans, y)

In [28]:
clf.score(X_trans, y)

0.995003807818162

---

In [30]:
from machine_server import Server

In [32]:
s = Server()
s.get()

{'binary': 'EKAAhSigEIU4oBAHAAAAxCBgAMQA4ACEAKABOwAAALAQIADIJ2AAkhAgAJQQIADEIOAAxCBgABEAAABAAAAAkA==',
 'target': ['avr', 'm68k', 'mipsel', 'sh4', 'sparc', 'x86_64']}

In [45]:
s = Server()

tries = 0
while not s.hash:
    s.get()

    x = trans_pipeline.transform([hexlify(s.binary)])
    y = clf.predict(x)[0]

    s.post(y)

    s.log.info("Guess:[{: >9}]   Answer:[{: >9}]   Wins:[{: >3}]".format(y, s.ans, s.wins))
    
    tries += 1
    
print(tries)
print(s.hash)

2021-04-16 23:18:36,472 - machine_server - INFO - Guess:[      arm]   Answer:[      arm]   Wins:[  1]
2021-04-16 23:18:36,788 - machine_server - INFO - Guess:[   mipsel]   Answer:[   mipsel]   Wins:[  2]
2021-04-16 23:18:37,100 - machine_server - INFO - Guess:[   xtensa]   Answer:[   xtensa]   Wins:[  3]
2021-04-16 23:18:37,389 - machine_server - INFO - Guess:[     m68k]   Answer:[     m68k]   Wins:[  4]
2021-04-16 23:18:37,694 - machine_server - INFO - Guess:[      avr]   Answer:[      avr]   Wins:[  5]
2021-04-16 23:18:37,991 - machine_server - INFO - Guess:[      sh4]   Answer:[      sh4]   Wins:[  6]
2021-04-16 23:18:38,248 - machine_server - INFO - Guess:[      avr]   Answer:[      avr]   Wins:[  7]
2021-04-16 23:18:38,705 - machine_server - INFO - Guess:[   x86_64]   Answer:[   x86_64]   Wins:[  8]
2021-04-16 23:18:39,022 - machine_server - INFO - Guess:[   mipsel]   Answer:[   mipsel]   Wins:[  9]
2021-04-16 23:18:39,366 - machine_server - INFO - Guess:[alphaev56]   Answer:[alph

2021-04-16 23:19:01,414 - machine_server - INFO - Guess:[   mipsel]   Answer:[   mipsel]   Wins:[ 82]
2021-04-16 23:19:01,715 - machine_server - INFO - Guess:[    sparc]   Answer:[    sparc]   Wins:[ 83]
2021-04-16 23:19:02,040 - machine_server - INFO - Guess:[   x86_64]   Answer:[   x86_64]   Wins:[ 84]
2021-04-16 23:19:02,315 - machine_server - INFO - Guess:[      arm]   Answer:[      arm]   Wins:[ 85]
2021-04-16 23:19:02,585 - machine_server - INFO - Guess:[      arm]   Answer:[      arm]   Wins:[ 86]
2021-04-16 23:19:03,070 - machine_server - INFO - Guess:[     m68k]   Answer:[     m68k]   Wins:[ 87]
2021-04-16 23:19:03,493 - machine_server - INFO - Guess:[   x86_64]   Answer:[   x86_64]   Wins:[ 88]
2021-04-16 23:19:03,752 - machine_server - INFO - Guess:[     s390]   Answer:[     s390]   Wins:[ 89]
2021-04-16 23:19:03,994 - machine_server - INFO - Guess:[     m68k]   Answer:[     m68k]   Wins:[ 90]
2021-04-16 23:19:04,264 - machine_server - INFO - Guess:[    sparc]   Answer:[    

2021-04-16 23:19:26,885 - machine_server - INFO - Guess:[     s390]   Answer:[     s390]   Wins:[163]
2021-04-16 23:19:27,148 - machine_server - INFO - Guess:[     m68k]   Answer:[     m68k]   Wins:[164]
2021-04-16 23:19:27,396 - machine_server - INFO - Guess:[alphaev56]   Answer:[alphaev56]   Wins:[165]
2021-04-16 23:19:27,678 - machine_server - INFO - Guess:[   mipsel]   Answer:[   mipsel]   Wins:[166]
2021-04-16 23:19:27,992 - machine_server - INFO - Guess:[      avr]   Answer:[      avr]   Wins:[167]
2021-04-16 23:19:28,259 - machine_server - INFO - Guess:[   xtensa]   Answer:[   xtensa]   Wins:[168]
2021-04-16 23:19:28,515 - machine_server - INFO - Guess:[     mips]   Answer:[     mips]   Wins:[169]
2021-04-16 23:19:28,768 - machine_server - INFO - Guess:[  powerpc]   Answer:[  powerpc]   Wins:[170]
2021-04-16 23:19:29,119 - machine_server - INFO - Guess:[     m68k]   Answer:[     m68k]   Wins:[171]
2021-04-16 23:19:29,397 - machine_server - INFO - Guess:[  powerpc]   Answer:[  po

2021-04-16 23:19:50,490 - machine_server - INFO - Guess:[   mipsel]   Answer:[   mipsel]   Wins:[244]
2021-04-16 23:19:50,787 - machine_server - INFO - Guess:[    sparc]   Answer:[    sparc]   Wins:[245]
2021-04-16 23:19:51,067 - machine_server - INFO - Guess:[     m68k]   Answer:[     m68k]   Wins:[246]
2021-04-16 23:19:51,324 - machine_server - INFO - Guess:[   mipsel]   Answer:[   mipsel]   Wins:[247]
2021-04-16 23:19:51,594 - machine_server - INFO - Guess:[  powerpc]   Answer:[  powerpc]   Wins:[248]
2021-04-16 23:19:51,848 - machine_server - INFO - Guess:[  powerpc]   Answer:[  powerpc]   Wins:[249]
2021-04-16 23:19:52,090 - machine_server - INFO - Guess:[    sparc]   Answer:[    sparc]   Wins:[250]
2021-04-16 23:19:52,639 - machine_server - INFO - Guess:[     m68k]   Answer:[     m68k]   Wins:[251]
2021-04-16 23:19:52,883 - machine_server - INFO - Guess:[  powerpc]   Answer:[  powerpc]   Wins:[252]
2021-04-16 23:19:53,124 - machine_server - INFO - Guess:[alphaev56]   Answer:[alph

2021-04-16 23:20:13,291 - machine_server - INFO - Guess:[     mips]   Answer:[     mips]   Wins:[325]
2021-04-16 23:20:13,508 - machine_server - INFO - Guess:[      avr]   Answer:[      avr]   Wins:[326]
2021-04-16 23:20:13,726 - machine_server - INFO - Guess:[     s390]   Answer:[     s390]   Wins:[327]
2021-04-16 23:20:14,119 - machine_server - INFO - Guess:[      sh4]   Answer:[      sh4]   Wins:[328]
2021-04-16 23:20:14,317 - machine_server - INFO - Guess:[  powerpc]   Answer:[  powerpc]   Wins:[329]
2021-04-16 23:20:14,613 - machine_server - INFO - Guess:[      arm]   Answer:[      arm]   Wins:[330]
2021-04-16 23:20:15,042 - machine_server - INFO - Guess:[      sh4]   Answer:[      sh4]   Wins:[331]
2021-04-16 23:20:15,335 - machine_server - INFO - Guess:[     m68k]   Answer:[     m68k]   Wins:[332]
2021-04-16 23:20:15,609 - machine_server - INFO - Guess:[  powerpc]   Answer:[  powerpc]   Wins:[333]
2021-04-16 23:20:16,134 - machine_server - INFO - Guess:[   x86_64]   Answer:[   x

2021-04-16 23:20:36,983 - machine_server - INFO - Guess:[     m68k]   Answer:[     m68k]   Wins:[406]
2021-04-16 23:20:37,246 - machine_server - INFO - Guess:[  powerpc]   Answer:[  powerpc]   Wins:[407]
2021-04-16 23:20:37,542 - machine_server - INFO - Guess:[  powerpc]   Answer:[  powerpc]   Wins:[408]
2021-04-16 23:20:37,779 - machine_server - INFO - Guess:[     s390]   Answer:[     s390]   Wins:[409]
2021-04-16 23:20:38,019 - machine_server - INFO - Guess:[      sh4]   Answer:[      sh4]   Wins:[410]
2021-04-16 23:20:38,285 - machine_server - INFO - Guess:[      sh4]   Answer:[      sh4]   Wins:[411]
2021-04-16 23:20:38,545 - machine_server - INFO - Guess:[     m68k]   Answer:[     m68k]   Wins:[412]
2021-04-16 23:20:38,798 - machine_server - INFO - Guess:[  powerpc]   Answer:[  powerpc]   Wins:[413]
2021-04-16 23:20:39,053 - machine_server - INFO - Guess:[  powerpc]   Answer:[  powerpc]   Wins:[414]
2021-04-16 23:20:39,321 - machine_server - INFO - Guess:[      arm]   Answer:[    

2021-04-16 23:20:59,462 - machine_server - INFO - Guess:[   x86_64]   Answer:[   x86_64]   Wins:[487]
2021-04-16 23:20:59,754 - machine_server - INFO - Guess:[     s390]   Answer:[     s390]   Wins:[488]
2021-04-16 23:21:00,088 - machine_server - INFO - Guess:[      sh4]   Answer:[      sh4]   Wins:[489]
2021-04-16 23:21:00,325 - machine_server - INFO - Guess:[      sh4]   Answer:[      sh4]   Wins:[490]
2021-04-16 23:21:00,584 - machine_server - INFO - Guess:[    sparc]   Answer:[    sparc]   Wins:[491]
2021-04-16 23:21:00,873 - machine_server - INFO - Guess:[   x86_64]   Answer:[   x86_64]   Wins:[492]
2021-04-16 23:21:01,102 - machine_server - INFO - Guess:[     mips]   Answer:[     mips]   Wins:[493]
2021-04-16 23:21:01,730 - machine_server - INFO - Guess:[   x86_64]   Answer:[   x86_64]   Wins:[494]
2021-04-16 23:21:02,035 - machine_server - INFO - Guess:[      avr]   Answer:[      avr]   Wins:[495]
2021-04-16 23:21:02,245 - machine_server - INFO - Guess:[alphaev56]   Answer:[alph

500
Congratulations! Collect your prize at /hash


In [46]:
s.hash

'Congratulations! Collect your prize at /hash'

In [48]:
s._request("/hash?email=nbsantos@gmail.com")

{'hash': '63beaa7765a444808c81b159f9a13c3e2bd407756bbfe691f7458c941bbfb89d7b2275736572223a20226e6273616e746f7340676d61696c2e636f6d227d'}