This repository has been archived by the owner on Jan 31, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
68 lines (59 loc) · 2.19 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python3
import sys
import shutil
import random
from datetime import datetime
import bson
from tqdm import tqdm
# {"in_response_to": [{"text": "Although practicality beats purity.", "occurrence": 1}], "created_at": 1487420546.689, "extra_data": {}, "occurrence": 1}
def main(db_path='chat.bson'):
'Main function'
fpath = ' '.join(sys.argv[1:])
with open(fpath, 'rb') as handle:
log_data = handle.read().decode('utf-8').split('\n')
with open(db_path, 'rb') as handle:
data = bson.loads(handle.read())
if not data:
data = []
for idx, line in tqdm(enumerate(log_data), total=len(log_data)):
matches = [m for m in data if m['text'] == line]
if matches:
line_obj = matches[0]
line_obj['occurrence'] += 1
if idx != 0:
irt_line = log_data[idx - 1]
if irt_line in [irt['text'] for irt in line_obj['in_response_to']]:
for irt in line_obj['in_response_to']:
if irt['text'] == line:
irt['occurrence'] += 1
else:
irt_obj = {
'text': irt_line,
'occurrence': 1
}
line_obj['in_response_to'].append(irt_obj)
else:
line_obj = {
'text': line,
'in_response_to': [],
'created_at': datetime.now().timestamp(),
'extra_data': [],
'occurrence': 1
}
if idx != 0:
irt_obj = {
'text': log_data[idx - 1],
'occurrence': 1
}
line_obj['in_response_to'].append(irt_obj)
data.append(line_obj)
print('Training finished!')
print('Writing data atomically...')
atom_name = db_path + '.t_asav' + str(random.randint(300, 4000))
with open(atom_name, 'wb+') as handle:
handle.write(bson.dumps(data))
shutil.move(atom_name, db_path)
print('Finished! Exiting...')
exit(0)
if __name__ == '__main__':
main()