In [1]:
# test_bayloc.ipynb

# First, make sure bayloc.py and rezin.py are in your working directory
# and you have installed necessary packages like numpy, scipy, sklearn.

from datetime import date
from bayloc.bayloc import BayLoc
from tqdm import tqdm
import ast

# Sample input data: a set of (date_str, country) tuples
# raw_data = [
#     ('2015-06-13', 'FR'),
#     ('2015-05-21', 'DE'),
#     ('2015-05-21', 'FR'),
#     ('2015-05-21', 'FR'),
#     ('2015-05-25', 'ES'),
#     ('2015-05-25', 'ES'),
#     ('2015-05-25', 'FR')
# ]

example_data = [('2015-05-12', 'RU'), ('2015-05-16', 'RU'), ('2015-05-17', 'RU'), ('2015-05-17', 'IE'), ('2015-05-31', 'RU'), ('2015-06-02', 'RU'), ('2015-06-10', 'RU'), ('2015-06-17', 'RU'), ('2015-06-25', 'RU'), ('2015-06-27', 'RU'), ('2015-07-04', 'RU'), ('2015-07-05', 'RU'), ('2015-07-09', 'RU'), ('2015-07-11', 'RU'), ('2015-07-25', 'RU'), ('2015-08-02', 'RU'), ('2015-08-20', 'RU'), ('2015-08-24', 'RU'), ('2015-08-28', 'RU'), ('2015-09-01', 'RU'), ('2015-09-02', 'RU'), ('2015-09-06', 'RU'), ('2015-09-16', 'RU'), ('2015-09-28', 'RU'), ('2015-09-30', 'RU'), ('2015-10-05', 'RU'), ('2015-10-08', 'RU'), ('2015-10-09', 'RU'), ('2016-09-20', 'IE'), ('2016-09-21', 'IE'), ('2016-09-22', 'IE'), ('2016-09-26', 'IE'), ('2016-09-27', 'IE'), ('2016-09-28', 'IE'), ('2016-10-01', 'IE'), ('2016-10-09', 'IE'), ('2016-10-18', 'IE'), ('2016-10-24', 'IE'), ('2016-10-27', 'IE'), ('2016-10-29', 'IE'), ('2016-11-01', 'IE'), ('2016-11-05', 'IE'), ('2016-11-09', 'IE'), ('2016-12-05', 'RU'), ('2016-12-08', 'RU')]


timelines = []
idx = 6

flag = 0

with open('/home/prosillo/corpustwittergitlab/tw_corpus_repo/tw_corpus/data_for_tests/timelines_examples_IE_nobot.txt', 'r') as f:
    for line in tqdm(f):
        if flag < 100:
            timelines.append(list(ast.literal_eval(line)))
            flag += 1
        else:
            break

# Initialize BayLoc with your raw data
bayloc = BayLoc(raw_data_set=example_data, lambda_min=0.0001, lambda_max=10)

# Infer missing days timeline
full_timeline = bayloc.infer_missing_days()
# print("Full timeline with inferred missing days:")
# for date, location in sorted(full_timeline.items()):
#     print(f"{date}: {location}")

# Get residence history with minimum 7 days threshold
residence = bayloc.get_residence_history(n_days=7)
print("\nResidence history (periods of stay):")
for place, period in residence.items():
    print(f"{place}: {period}")

stats = bayloc.get_basic_stats()

print("\nBasic timeline statistics:")

print("\nInput Data Stats:")
for k, v in stats["input_data"].items():
    print(f"{k}: {v}")

print("\nInferred Timeline Stats:")
for k, v in stats["inferred_timeline"].items():
    print(f"{k}: {v}")


# Example AUROC calculation (fake data for demo)

auroc = bayloc.compute_auroc_training_subset(r=0.8)
print(f"\nMicro-average AUROC: {auroc:.3f}")


100it [00:00, 1796.70it/s]



Residence history (periods of stay):
IE: [['2015-05-12', '2015-05-19'], ['2015-07-04', '2015-07-08'], ['2015-07-30', '2015-08-03'], ['2015-08-07', '2015-08-11'], ['2015-08-19', '2015-08-23'], ['2015-08-27', '2015-09-04'], ['2015-09-10', '2015-09-16'], ['2015-09-22', '2015-09-22'], ['2015-09-30', '2015-11-01']]
RU: [['2015-05-20', '2015-07-03'], ['2015-07-09', '2015-07-29'], ['2015-08-04', '2015-08-06'], ['2015-08-12', '2015-08-18'], ['2015-08-24', '2015-08-26'], ['2015-09-05', '2015-09-09'], ['2015-09-17', '2015-09-21'], ['2015-09-23', '2015-09-29'], ['2015-11-02', '2016-12-08']]

Basic timeline statistics:

Input Data Stats:
total_days: 44
unique_locations: {'IE', 'RU'}
counts_per_location: {'RU': 29, 'IE': 16}
start_date: 2015-05-12
end_date: 2016-12-08

Inferred Timeline Stats:
total_days: 577
unique_locations: {'IE', 'RU'}
counts_per_location: {'RU': 362, 'IE': 215}
start_date: 2015-05-12
end_date: 2016-12-08
number_of_known_days: 44
number_of_inferred_days: 533
optimized_lambda: 