In [1]:
import pandas as pd
import numpy as np

import scipy.sparse
import sklearn.feature_extraction

import matplotlib.pylab as plt
%matplotlib inline

from tqdm import tqdm
import platform

pd.set_option("display.max_rows", 10)
pd.set_option('display.max_columns', 1100)

import os

%pylab inline
warnings.filterwarnings('ignore')

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


## User Agent representation

### User Agent as tuple

#### From Udger 

`UserAgent = {ua_family_code, ua_version, ua_class_code, device_class_code, os_family_code, os_code}`

### Load data (if needed)

In [4]:
main_data = np.load('df/main_prod_data.npy').tolist()
values_data = np.load('df/values_prod_data.npy').tolist()
order_data = np.load('df/order_prod_data.npy').tolist()

In [10]:
main_df = pd.DataFrame(main_data)

main_df

list_device_class_code = pd.DataFrame(main_data).device_class_code.value_counts().index.tolist()
list_os_family_code = pd.DataFrame(main_data).os_family_code.value_counts().index.tolist()
list_os_code = pd.DataFrame(main_data).os_code.value_counts().index.tolist()
list_ua_class_code = pd.DataFrame(main_data).ua_class_code.value_counts().index.tolist()
list_ua_family_code = pd.DataFrame(main_data).ua_family_code.value_counts().index.tolist()
list_ua_version = pd.DataFrame(main_data).ua_version.value_counts().index.tolist()

print("Device count: {}".format(len(list_device_class_code)))
print("Device platform family count: {}".format(len(list_os_family_code)))
print("Device platform count: {}".format(len(list_os_code)))
print("Device browser class count: {}".format(len(list_ua_class_code)))
print("Device browser family count: {}".format(len(list_ua_family_code)))
print("Device browser version count: {}".format(len(list_ua_version)))

Device count: 5
Device platform family count: 29
Device platform count: 98
Device browser class count: 5
Device browser family count: 129
Device browser version count: 2585


### Train Part

In [7]:
important_orders_keys_set = {
    'Upgrade-Insecure-Requests',
    'Accept', 
    'If-Modified-Since',
    'Host', 
    'Connection', 
    'User-Agent', 
    'From', 
    'Accept-Encoding' 
}

important_values_keys_set = {
    'Accept', 
    'Accept-Charset', 
    'Accept-Encoding'
}

In [8]:
orders_vectorizer = sklearn.feature_extraction.DictVectorizer(sparse=True, dtype=float)
values_vectorizer = sklearn.feature_extraction.DictVectorizer(sparse=True, dtype=float)

from lib.parsers.logParser import LogParser

l_parser = LogParser(log_folder='Logs/')
#from sklearn import preprocessing

#y = pd.DataFrame(main_data).User_Agent.fillna('NaN')
#print("UA count: {}".format(len(list_ua)))
#from sklearn import preprocessing

#y = pd.DataFrame(main_data).User_Agent.fillna('NaN')
#print("UA count: {}".format(len(list_ua)))#### OS_family_code
l_parser.reassign_orders_values(order_data, values_data)
full_sparce_dummy = l_parser.prepare_data(orders_vectorizer, values_vectorizer, important_orders_keys_set, important_values_keys_set, fit_dict=True)

100%|██████████| 178439/178439 [00:00<00:00, 504529.35it/s]
100%|██████████| 178439/178439 [00:00<00:00, 295416.17it/s]
 40%|████      | 71564/178439 [00:00<00:00, 715635.56it/s]

Sparse dummy orders shape: 
(178439, 49)


100%|██████████| 178439/178439 [00:00<00:00, 717345.55it/s]

Sparse dummy values shape: 
(178439, 291)





In [9]:
import os
from sklearn.externals import joblib

filename_order = 'cls/prod_orders_vectorizer.joblib.pkl'
_ = joblib.dump(orders_vectorizer, filename_order, compress=9)

filename_values = 'cls/prod_values_vectorizer.joblib.pkl'
_ = joblib.dump(values_vectorizer, filename_values, compress=9)

from lib.helpers.fileSplitter import split_file

files_count = split_file(filename_order, 'parted-cls/prod_orders_vectorizer.joblib.pkl')
files_count = split_file(filename_values, 'parted-cls/prod_values_vectorizer.joblib.pkl')

# Warning

Sometimes if dataset have over 150K rows and n_jobs=-1 we get `OSError: [Errno 28] No space left on device` in `sklearn/externals/joblib/pool.py`

https://github.com/scikit-learn/scikit-learn/issues/3313
https://stackoverflow.com/questions/24406937/scikit-learn-joblib-bug-multiprocessing-pool-self-value-out-of-range-for-i-fo

Maybe
https://stackoverflow.com/questions/40115043/no-space-left-on-device-error-while-fitting-sklearn-model

`It seems, that your are running out of shared memory (/dev/shm when you run df -h). Try setting JOBLIB_TEMP_FOLDER environment variable to something different: e.g., to /tmp. In my case it has solved the problem.`

#### OS_family_code

In [12]:
%%time

from sklearn.linear_model import LogisticRegression

clf_os_family_code = LogisticRegression(random_state=42, C=100)
clf_os_family_code.fit(full_sparce_dummy, main_df.os_family_code.fillna('NaN'))

CPU times: user 57.9 s, sys: 148 ms, total: 58 s
Wall time: 58.6 s


In [13]:
import os
from sklearn.externals import joblib#### OS_code

filename = 'cls/prod_os_family_code_logreg_cls.joblib.pkl'
_ = joblib.dump(clf_os_family_code, filename, compress=9)

print("Model saved with size(Bytes): {}".format(os.stat(filename).st_size))

files_count = split_file(filename, 'parted-cls/prod_os_family_code_logreg_cls.joblib.pkl')

print('Splitted in {} files'.format(files_count))

Model saved with size(Bytes): 59470
Splitted in 0 files


#### OS_code

In [14]:
%%time

clf_os_code = LogisticRegression(random_state=42, C=100)
clf_os_code.fit(full_sparce_dummy, main_df.os_code.fillna('NaN'))

CPU times: user 4min 4s, sys: 600 ms, total: 4min 4s
Wall time: 4min 6s


In [15]:
filename = 'cls/prod_os_code_logreg_cls.joblib.pkl'
_ = joblib.dump(clf_os_code, filename, compress=9)

print("Model saved with size(Bytes): {}".format(os.stat(filename).st_size))

files_count = split_file(filename, 'parted-cls/prod_os_code_logreg_cls.joblib.pkl')

print('Splitted in {} files'.format(files_count))

Model saved with size(Bytes): 203087
Splitted in 0 files


#### Browser family_code

In [16]:
%%time

clf_ua_family_code = LogisticRegression(random_state=42, C=100)
clf_ua_family_code.fit(full_sparce_dummy, main_df.ua_family_code.fillna('NaN'))

CPU times: user 3min 47s, sys: 0 ns, total: 3min 47s
Wall time: 3min 47s


In [17]:
filename = 'cls/prod_ua_family_code_logreg_cls.joblib.pkl'
_ = joblib.dump(clf_ua_family_code, filename, compress=9)

print("Model saved with size(Bytes): {}".format(os.stat(filename).st_size))

files_count = split_file(filename, 'parted-cls/prod_ua_family_code_logreg_cls.joblib.pkl')

print('Splitted in {} files'.format(files_count))

Model saved with size(Bytes): 257819
Splitted in 0 files


#### Browser version

In [18]:
%%time

clf_ua_version = LogisticRegression(random_state=42, C=100)
clf_ua_version.fit(full_sparce_dummy, main_df.ua_version.fillna('NaN'))

CPU times: user 1h 1min 38s, sys: 4.95 s, total: 1h 1min 43s
Wall time: 1h 1min 59s


In [19]:
filename = 'cls/prod_ua_version_logreg_cls.joblib.pkl'
_ = joblib.dump(clf_ua_version, filename, compress=9)

print("Model saved with size(Bytes): {}".format(os.stat(filename).st_size))

files_count = split_file(filename, 'parted-cls/prod_ua_version_logreg_cls.joblib.pkl')

print('Splitted in {} files'.format(files_count))

Model saved with size(Bytes): 4852875
Splitted in 0 files


## Test part

In [20]:
import pandas as pd
import numpy as np

import scipy.sparse
import sklearn.feature_extraction

import matplotlib.pylab as plt
%matplotlib inline

from tqdm import tqdm
import platform

pd.set_option("display.max_rows", 10)
pd.set_option('display.max_columns', 1100)

import os

%pylab inline
warnings.filterwarnings('ignore')

Populating the interactive namespace from numpy and matplotlib


In [2]:
important_orders_keys_set = {
    'Upgrade-Insecure-Requests',
    'Accept', 
    'If-Modified-Since',
    'Host', 
    'Connection', 
    'User-Agent', 
    'From', 
    'Accept-Encoding' 
}

important_values_keys_set = {
    'Accept', 
    'Accept-Charset', 
    'Accept-Encoding'
}

import os
from sklearn.externals import joblib
from lib.helpers.fileSplitter import cat_files

orders_vectorizer = joblib.load('cls/prod_orders_vectorizer.joblib.pkl')
values_vectorizer = joblib.load("cls/prod_values_vectorizer.joblib.pkl")

clf_os_family_code = joblib.load('cls/prod_os_family_code_logreg_cls.joblib.pkl')

clf_os_code = joblib.load('cls/prod_os_code_logreg_cls.joblib.pkl')

clf_ua_family_code = joblib.load('cls/prod_ua_family_code_logreg_cls.joblib.pkl')

clf_ua_version = joblib.load('cls/prod_ua_version_logreg_cls.joblib.pkl')

### Load test data

In [3]:
main_data = np.load('df/main_prodtest_data1.npy').tolist()[200000:250000]
values_data = np.load('df/values_prodtest_data1.npy').tolist()[200000:250000]
order_data = np.load('df/order_prodtest_data1.npy').tolist()[200000:250000]

main_df = pd.DataFrame(main_data)

main_df

Unnamed: 0,User_Agent,device_class_code,ip,os_code,os_family_code,timestamp,ua_class_code,ua_family_code,ua_version
0,Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) G...,desktop,87.171.230.39,windows_7,windows,0,browser,firefox,firefox46.0
1,Mozilla/5.0 (Linux; Android 7.0; ASUS_Z017DC B...,smartphone,177.152.144.178,android_7_0,android,0,mobile_browser,chrome_mobile,chrome_mobile60.0.3112.107
2,Mozilla/5.0 (Windows NT 5.1; rv:48.0) Gecko/20...,desktop,80.144.206.21,windows_xp,windows,0,browser,firefox,firefox48.0
3,Mozilla/5.0 (iPad; CPU OS 10_3_3 like Mac OS X...,tablet,84.144.66.168,ios_10,ios,0,mobile_browser,safari_mobile,safari_mobile10.0
4,Mozilla/5.0 (Windows NT 6.1; rv:55.0) Gecko/20...,desktop,79.214.252.12,windows_7,windows,0,browser,firefox,firefox55.0
...,...,...,...,...,...,...,...,...,...
49995,Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like...,smartphone,123.16.181.186,ios_10,ios,0,mobile_browser,safari_mobile,safari_mobile10.0
49996,Mozilla/5.0 (Linux; Android 6.0; LG-F600L Buil...,smartphone,64.233.173.141,android_6,android,0,mobile_browser,chrome_mobile,chrome_mobile60.0.3112.107
49997,Mozilla/5.0 (Linux; Android 5.1.1; SM-J700H Bu...,smartphone,66.249.82.92,android_5_1,android,0,mobile_browser,chrome_mobile,chrome_mobile60.0.3112.107
49998,Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_3 like...,smartphone,192.228.196.98,ios_10,ios,0,mobile_browser,safari_mobile,safari_mobile10.0


In [4]:
important_values_keys_set = {
    'Accept', 
    'Accept-Charset', 
    'Accept-Encoding'
}
important_orders_keys_set = {
    'Upgrade-Insecure-Requests',
    'Accept', 
    'If-Modified-Since',
    'Host', 
    'Connection', 
    'User-Agent', 
    'From', 
    'Accept-Encoding' 
}

from lib.parsers.logParser import LogParser

l_parser = LogParser(log_folder='Logs/')

l_parser.reassign_orders_values(order_data, values_data)
X_test = l_parser.prepare_data(orders_vectorizer, values_vectorizer, important_orders_keys_set, important_values_keys_set, fit_dict=False)

100%|██████████| 50000/50000 [00:00<00:00, 540769.30it/s]
100%|██████████| 50000/50000 [00:00<00:00, 442357.81it/s]
100%|██████████| 50000/50000 [00:00<00:00, 699685.05it/s]

Sparse dummy orders shape: 
(50000, 49)
Sparse dummy values shape: 
(50000, 291)





### Calculate scores

Примечание: Для Decision Tree в cross_val_score по умолчанию берется показатель 'Accuracy'

Поскольку 'Accuracy' для линейной регрессии линейный мы не будем считать на 3-х или 5-ти фолдах(долго), а просто возьмем от тренировочной выборки 'Accuracy'

In [32]:
thres = 0.00001

**Browser (clf_ua_family_code)**

In [33]:
from lib.thresholdPredictions import ThresholdPredictions

pred = ThresholdPredictions(user_agent_list=clf_ua_family_code.classes_.tolist(), clf=clf_ua_family_code)
y_test_names, y_predicted, compare_answers, is_bot, answers_count = pred.bot_predict(X_test, main_df.ua_family_code.fillna('NaN'), thres, sparce_y=False, mark_new_labels_None=True, single_labels=True)

50000it [00:01, 35713.67it/s]


In [34]:
compare_frame = pd.concat(
    [
        pd.DataFrame(y_test_names),
        y_predicted, 
        pd.DataFrame(compare_answers),
        pd.DataFrame(is_bot), 
        pd.DataFrame(answers_count)
    ], keys=['browser_name', 'predicted_browser_name', 'browser_name_correctness', 'browser_name_bot', 'browser_name_count'], axis=1, join='inner')

compare_frame

Unnamed: 0_level_0,browser_name,predicted_browser_name,browser_name_correctness,browser_name_bot,browser_name_count
Unnamed: 0_level_1,ua_family_code,0,0,0,0
0,firefox,"[NaN, amigo, android_browser, aol_explorer, ar...",True,False,67
1,chrome_mobile,"[NaN, amigo, android_browser, android_webview,...",True,False,34
2,firefox,"[NaN, amigo, android_browser, aol_explorer, ar...",True,False,67
3,safari_mobile,"[NaN, android_browser, arora, blackberry_brows...",True,False,28
4,firefox,"[NaN, amigo, android_browser, aol_explorer, ar...",True,False,67
...,...,...,...,...,...
49995,safari_mobile,"[NaN, android_browser, android_webview, aol_ex...",True,False,70
49996,chrome_mobile,"[NaN, arora, beonex, blackberry_browser, camin...",True,False,59
49997,chrome_mobile,"[NaN, android_browser, blackberry_browser, chr...",True,False,12
49998,safari_mobile,"[NaN, android_browser, android_webview, aol_ex...",True,False,70


Accuracy: $ACC = \frac{TP + TN}{P + N},\ \ \mathrm{where}\ \ P + N = length,\ \ TP = sum(True), \ \ TN = 0$

In [35]:
compare_frame.browser_name_bot[0].value_counts()

False    44030
True      5952
Name: 0, dtype: int64

In [36]:
print('Сonfirmed bot: {}'.format(sum(compare_frame.browser_name_bot[0])/50000))

Сonfirmed bot: 0.11904


**Browser + Browser version (clf_ua_family_code + clf_ua_version)**

In [39]:
pred = ThresholdPredictions(user_agent_list=clf_ua_version.classes_.tolist(), clf=clf_ua_version)
y_test_names, y_predicted, compare_answers, is_bot, answers_count = pred.bot_predict(X_test, main_df.ua_version.fillna('NaN'), thres, sparce_y=False, mark_new_labels_None=True, single_labels=True)

50000it [00:02, 17710.74it/s]


In [40]:
compare_frame['browser_version'] = pd.DataFrame(y_test_names)
compare_frame['predicted_browser_version'] = y_predicted
compare_frame['browser_version_correctness'] = pd.DataFrame(compare_answers)
compare_frame['browser_version_bot'] = pd.DataFrame(is_bot)
compare_frame['browser_version_count'] = pd.DataFrame(answers_count)

In [41]:
compare_frame

Unnamed: 0_level_0,browser_name,predicted_browser_name,browser_name_correctness,browser_name_bot,browser_name_count,browser_version,predicted_browser_version,browser_version_correctness,browser_version_bot,browser_version_count
Unnamed: 0_level_1,ua_family_code,0,0,0,0,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,firefox,"[NaN, amigo, android_browser, aol_explorer, ar...",True,False,67,firefox46.0,"[NaN, android_browser4.0, aol_explorer4.0, aol...",False,True,515
1,chrome_mobile,"[NaN, amigo, android_browser, android_webview,...",True,False,34,chrome_mobile60.0.3112.107,"[NaN, amigo47.5.2526.115, android_browser4.0, ...",False,True,334
2,firefox,"[NaN, amigo, android_browser, aol_explorer, ar...",True,False,67,firefox48.0,"[NaN, android_browser4.0, aol_explorer4.0, aol...",False,True,515
3,safari_mobile,"[NaN, android_browser, arora, blackberry_brows...",True,False,28,safari_mobile10.0,"[NaN, android_browser4.0, blackberry_browser, ...",True,False,96
4,firefox,"[NaN, amigo, android_browser, aol_explorer, ar...",True,False,67,firefox55.0,"[NaN, android_browser4.0, aol_explorer4.0, aol...",True,False,515
...,...,...,...,...,...,...,...,...,...,...
49995,safari_mobile,"[NaN, android_browser, android_webview, aol_ex...",True,False,70,safari_mobile10.0,"[NaN, amigo47.5.2526.111, amigo56.0.2924.197, ...",True,False,766
49996,chrome_mobile,"[NaN, arora, beonex, blackberry_browser, camin...",True,False,59,chrome_mobile60.0.3112.107,"[NaN, amigo56.0.2924.197, amigo_mobile, androi...",True,False,754
49997,chrome_mobile,"[NaN, android_browser, blackberry_browser, chr...",True,False,12,chrome_mobile60.0.3112.107,"[NaN, aloha_browser1.5, amigo47.5.2526.111, am...",True,False,75
49998,safari_mobile,"[NaN, android_browser, android_webview, aol_ex...",True,False,70,safari_mobile10.0,"[NaN, amigo47.5.2526.111, amigo56.0.2924.197, ...",True,False,766


In [57]:
print('Сonfirmed bot: {}'.format(sum(compare_frame.browser_version_bot)/50000))
print('Conditional Сonfirmed bot: {}'.format(sum(compare_frame.browser_name_bot[0] | compare_frame.browser_version_bot)/50000))

Сonfirmed bot: 0.27672
Conditional Сonfirmed bot: 0.33966


**Browser + Browser version + Platform (clf_ua_family_code + clf_ua_version + clf_os_family_code)**

In [46]:
pred = ThresholdPredictions(user_agent_list=clf_os_family_code.classes_.tolist(), clf=clf_os_family_code)
y_test_names, y_predicted, compare_answers, is_bot, answers_count = pred.bot_predict(X_test, main_df.os_family_code.fillna('NaN'), thres, sparce_y=False, mark_new_labels_None=True, single_labels=True)

50000it [00:01, 37253.20it/s]


In [47]:
compare_frame['platform'] = pd.DataFrame(y_test_names)
compare_frame['predicted_platform'] = y_predicted
compare_frame['platform_correctness'] = pd.DataFrame(compare_answers)
compare_frame['platform_bot'] = pd.DataFrame(is_bot)
compare_frame['platform_count'] = pd.DataFrame(answers_count)
compare_frame

Unnamed: 0_level_0,browser_name,predicted_browser_name,browser_name_correctness,browser_name_bot,browser_name_count,browser_version,predicted_browser_version,browser_version_correctness,browser_version_bot,browser_version_count,platform,predicted_platform,platform_correctness,platform_bot,platform_count
Unnamed: 0_level_1,ua_family_code,0,0,0,0,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,firefox,"[NaN, amigo, android_browser, aol_explorer, ar...",True,False,67,firefox46.0,"[NaN, android_browser4.0, aol_explorer4.0, aol...",False,True,515,,"[NaN, aix, android, beos, bsd, chrome_os, hp_u...",,,
1,chrome_mobile,"[NaN, amigo, android_browser, android_webview,...",True,False,34,chrome_mobile60.0.3112.107,"[NaN, amigo47.5.2526.115, android_browser4.0, ...",False,True,334,,"[NaN, android, bsd, chrome_os, ios, jvm, linux...",,,
2,firefox,"[NaN, amigo, android_browser, aol_explorer, ar...",True,False,67,firefox48.0,"[NaN, android_browser4.0, aol_explorer4.0, aol...",False,True,515,,"[NaN, aix, android, beos, bsd, chrome_os, hp_u...",,,
3,safari_mobile,"[NaN, android_browser, arora, blackberry_brows...",True,False,28,safari_mobile10.0,"[NaN, android_browser4.0, blackberry_browser, ...",True,False,96,,"[NaN, android, blackberry_os, bsd, chrome_os, ...",,,
4,firefox,"[NaN, amigo, android_browser, aol_explorer, ar...",True,False,67,firefox55.0,"[NaN, android_browser4.0, aol_explorer4.0, aol...",True,False,515,,"[NaN, aix, android, beos, bsd, chrome_os, hp_u...",,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,safari_mobile,"[NaN, android_browser, android_webview, aol_ex...",True,False,70,safari_mobile10.0,"[NaN, amigo47.5.2526.111, amigo56.0.2924.197, ...",True,False,766,,"[NaN, aix, android, beos, bsd, chrome_os, hp_u...",,,
49996,chrome_mobile,"[NaN, arora, beonex, blackberry_browser, camin...",True,False,59,chrome_mobile60.0.3112.107,"[NaN, amigo56.0.2924.197, amigo_mobile, androi...",True,False,754,,"[NaN, aix, android, beos, blackberry_os, bsd, ...",,,
49997,chrome_mobile,"[NaN, android_browser, blackberry_browser, chr...",True,False,12,chrome_mobile60.0.3112.107,"[NaN, aloha_browser1.5, amigo47.5.2526.111, am...",True,False,75,,"[NaN, android, blackberry_os, ios, linux, mac_...",,,
49998,safari_mobile,"[NaN, android_browser, android_webview, aol_ex...",True,False,70,safari_mobile10.0,"[NaN, amigo47.5.2526.111, amigo56.0.2924.197, ...",True,False,766,,"[NaN, aix, android, beos, bsd, chrome_os, hp_u...",,,


In [58]:
print('Сonfirmed bot: {}'.format(sum(compare_frame.platform_bot)/50000))
print('Conditional Сonfirmed bot: {}'.format(sum(compare_frame.browser_name_bot[0] | compare_frame.browser_version_bot | compare_frame.platform_bot)/50000))

Сonfirmed bot: 0.0
Conditional Сonfirmed bot: 0.33966


**Browser + Browser version + Platform + Platform version (clf_ua_family_code + clf_ua_version + clf_os_family_code + clf_os_code)**

In [50]:
pred = ThresholdPredictions(user_agent_list=clf_os_code.classes_.tolist(), clf=clf_os_code)
y_test_names, y_predicted, compare_answers, is_bot, answers_count = pred.bot_predict(X_test, main_df.os_code.fillna('NaN'), thres, sparce_y=False, mark_new_labels_None=True, single_labels=True)

50000it [00:01, 35521.37it/s]


In [51]:
compare_frame['platform_version'] = pd.DataFrame(y_test_names)
compare_frame['predicted_platform_version'] = y_predicted
compare_frame['platform_version_correctness'] = pd.DataFrame(compare_answers)
compare_frame['platform_version_bot'] = pd.DataFrame(is_bot)
compare_frame['platform_version_count'] = pd.DataFrame(answers_count)
compare_frame

Unnamed: 0_level_0,browser_name,predicted_browser_name,browser_name_correctness,browser_name_bot,browser_name_count,browser_version,predicted_browser_version,browser_version_correctness,browser_version_bot,browser_version_count,platform,predicted_platform,platform_correctness,platform_bot,platform_count,platform_version,predicted_platform_version,platform_version_correctness,platform_version_bot,platform_version_count
Unnamed: 0_level_1,ua_family_code,0,0,0,0,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0,firefox,"[NaN, amigo, android_browser, aol_explorer, ar...",True,False,67,firefox46.0,"[NaN, android_browser4.0, aol_explorer4.0, aol...",False,True,515,,"[NaN, aix, android, beos, bsd, chrome_os, hp_u...",,,,windows_7,"[NaN, aix, android_4_1, android_4_2, android_4...",True,False,55
1,chrome_mobile,"[NaN, amigo, android_browser, android_webview,...",True,False,34,chrome_mobile60.0.3112.107,"[NaN, amigo47.5.2526.115, android_browser4.0, ...",False,True,334,,"[NaN, android, bsd, chrome_os, ios, jvm, linux...",,,,android_7_0,"[NaN, android_4_2, android_4_4, android_5_0, a...",True,False,42
2,firefox,"[NaN, amigo, android_browser, aol_explorer, ar...",True,False,67,firefox48.0,"[NaN, android_browser4.0, aol_explorer4.0, aol...",False,True,515,,"[NaN, aix, android, beos, bsd, chrome_os, hp_u...",,,,windows_xp,"[NaN, aix, android_4_1, android_4_2, android_4...",True,False,55
3,safari_mobile,"[NaN, android_browser, arora, blackberry_brows...",True,False,28,safari_mobile10.0,"[NaN, android_browser4.0, blackberry_browser, ...",True,False,96,,"[NaN, android, blackberry_os, bsd, chrome_os, ...",,,,ios_10,"[NaN, android_4_2, android_4_4, android_5_0, a...",True,False,35
4,firefox,"[NaN, amigo, android_browser, aol_explorer, ar...",True,False,67,firefox55.0,"[NaN, android_browser4.0, aol_explorer4.0, aol...",True,False,515,,"[NaN, aix, android, beos, bsd, chrome_os, hp_u...",,,,windows_7,"[NaN, aix, android_4_1, android_4_2, android_4...",True,False,55
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,safari_mobile,"[NaN, android_browser, android_webview, aol_ex...",True,False,70,safari_mobile10.0,"[NaN, amigo47.5.2526.111, amigo56.0.2924.197, ...",True,False,766,,"[NaN, aix, android, beos, bsd, chrome_os, hp_u...",,,,ios_10,"[NaN, aix, android_2, android_4_1, android_4_2...",True,False,52
49996,chrome_mobile,"[NaN, arora, beonex, blackberry_browser, camin...",True,False,59,chrome_mobile60.0.3112.107,"[NaN, amigo56.0.2924.197, amigo_mobile, androi...",True,False,754,,"[NaN, aix, android, beos, blackberry_os, bsd, ...",,,,android_6,"[NaN, aix, android_4_2, android_4_4, android_5...",True,False,46
49997,chrome_mobile,"[NaN, android_browser, blackberry_browser, chr...",True,False,12,chrome_mobile60.0.3112.107,"[NaN, aloha_browser1.5, amigo47.5.2526.111, am...",True,False,75,,"[NaN, android, blackberry_os, ios, linux, mac_...",,,,android_5_1,"[NaN, android_4_3, android_4_4, android_5_1, a...",True,False,15
49998,safari_mobile,"[NaN, android_browser, android_webview, aol_ex...",True,False,70,safari_mobile10.0,"[NaN, amigo47.5.2526.111, amigo56.0.2924.197, ...",True,False,766,,"[NaN, aix, android, beos, bsd, chrome_os, hp_u...",,,,ios_10,"[NaN, aix, android_2, android_4_1, android_4_2...",True,False,52


In [59]:
print('Сonfirmed bot: {}'.format(sum(compare_frame.platform_version_bot)/50000))
print('Conditional Сonfirmed bot: {}'.format(sum(compare_frame.browser_name_bot[0] | compare_frame.browser_version_bot | compare_frame.platform_bot | compare_frame.platform_version_bot)/50000))

Сonfirmed bot: 0.06036
Conditional Сonfirmed bot: 0.35916
