In [2]:
#!/usr/bin/env python
# coding: utf-8

'''
Aggregate activity by character and by community
Natalia Velez, April 2021
'''

import os, re, glob
import numpy as np
import pandas as pd
from tqdm import tqdm
from os.path import join as opj

In [4]:
# Make output directory
out_dir = opj('outputs/activity_agg')
print('Creating output directory: %s' % out_dir)
os.makedirs(out_dir, exist_ok = True)

Creating output directory: outputs/activity_agg


In [None]:
# Find family labels


In [5]:
# Find matrix files:
mtx_files = glob.glob('outputs/activity_in/*[0-9].txt')
mtx_files.sort()
#mtx_files = mtx_files[:3] # Debug only!
print('Matrix files:')
print(*mtx_files[:10], sep='\n')
print('...\n')

# Find corresponding labels:
label_files = [f.replace('.txt', '_labels.txt') for f in mtx_files]
print('Label files:')
print(*label_files[:10], sep='\n')
print('...\n')

# Concatenate labels:
print('Loading labels...')
all_labels = [np.loadtxt(f) for f in label_files]
all_labels = np.concatenate(all_labels).astype(np.int)
print(all_labels)


# Sanity check: Are any labels repeated across log files?
unique_labels, label_counts = np.unique(all_labels, return_counts=True)
repeated_labels = unique_labels[label_counts > 1]
min_repeats = np.min(label_counts[label_counts > 1])
np.savetxt(opj(out_dir, 'avatarIDs.txt'), unique_labels)

print('%i total entries' % len(all_labels))
print('%i unique avatar IDs' % len(unique_labels))
print('%i repeated labels (range %i-%i)' % (len(repeated_labels),
                                            min_repeats,
                                            np.max(label_counts)))

Matrix files:
outputs/activity_in/jobmatrix_release-284_start-1573895672.txt
outputs/activity_in/jobmatrix_release-284_start-1573982073.txt
outputs/activity_in/jobmatrix_release-284_start-1574068473.txt
outputs/activity_in/jobmatrix_release-285_start-1574102503.txt
outputs/activity_in/jobmatrix_release-287_start-1574151678.txt
outputs/activity_in/jobmatrix_release-287_start-1574238079.txt
outputs/activity_in/jobmatrix_release-287_start-1574324479.txt
outputs/activity_in/jobmatrix_release-287_start-1574410879.txt
outputs/activity_in/jobmatrix_release-287_start-1574497279.txt
outputs/activity_in/jobmatrix_release-289_start-1574552311.txt
...
Label files:
outputs/activity_in/jobmatrix_release-284_start-1573895672_labels.txt
outputs/activity_in/jobmatrix_release-284_start-1573982073_labels.txt
outputs/activity_in/jobmatrix_release-284_start-1574068473_labels.txt
outputs/activity_in/jobmatrix_release-285_start-1574102503_labels.txt
outputs/activity_in/jobmatrix_release-287_start-1574151678_