In [1]:
from os.path import join
from os import sep, getcwd, chdir
import json
from tqdm import tqdm
from CommunitySizesCreators import RedditCommunitySizesFetcher, NormalCommunitySizesGenerator
from AnomalyInfuser import create_experiment_networks

  from tqdm.autonotebook import tqdm


In [2]:
# Imports from parent directory
original_cur_dir = getcwd()
chdir('..')
from SingleExperimentSettingDirCreator import create_experiment_directories
# Go back to current directory
chdir(original_cur_dir)

# General configuration

In [3]:
REDDIT_MAIN_PATH = join('E:', sep, 'Datasets', 'reddit')
EXPERIMENT_PATH = 'Experiment'
RAW_DATA_PATH = join(EXPERIMENT_PATH, 'Raw_data')
RAW_NETWORKS_PATH = join(RAW_DATA_PATH, 'RawCommSizes')

from ExperimentSettings import EXPERIMENT_SETTINGS

# Create experiment directories

In [4]:
EXPERIMENT_SETTINGS

{'norm_comm_alg': <function networkx.generators.random_graphs.barabasi_albert_graph(n, m, seed=None)>,
 'anom_comm_alg': <function networkx.generators.random_graphs.gnp_random_graph(n, p, seed=None, directed=False)>,
 'k_min': 1,
 'k_max': 1,
 'norm_m': 1,
 'norm_inter_p': 0.075,
 'anom_m': [0.01, 0.02, 0.04, 0.08, 0.16],
 'anom_inter_p': [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]}

In [5]:
group_sizes = ['min', 'quantile10', 'quartile1', 'median', 'random']

In [6]:
create_experiment_directories(base_dir=EXPERIMENT_PATH, network_generator_config=EXPERIMENT_SETTINGS, group_sizes = group_sizes)

# Generate community sizes population from Reddit network

## Configuration

In [7]:
COMM_SIZES_FILE_PATH = join(RAW_DATA_PATH, 'reddit_comm_sizes.json')

## Generate community sizes population from Reddit network

In [11]:
comm_sizes_fetcher = RedditCommunitySizesFetcher(REDDIT_MAIN_PATH)

In [13]:
comm_sizes_fetcher.create_comm_size_dict(use_edges_csv=True);

  0%|          | 0/20128 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [8]:
comm_sizes_fetcher.save_comm_size_dict_json(COMM_SIZES_FILE_PATH)

# Generate normal communities sizes

In [10]:
NORMAL_COMMS_GENERATOR_CONFIG = {
    'num_comms': 110,
    'min_comm_size': 30,
    'max_comm_size': 1500
}

min_seed = 1
max_seed = 5

## Data Creation

In [11]:
normal_comm_sizes_generator = NormalCommunitySizesGenerator(comm_sizes_file_path=COMM_SIZES_FILE_PATH)

for seed in range(min_seed, max_seed + 1):
    print(f'Sampling normal community sizes {seed} / {max_seed}:')
    comm_sizes_list = normal_comm_sizes_generator.generate_community_sizes_from_reddit(
        **NORMAL_COMMS_GENERATOR_CONFIG, random_seed=seed)
    
    file_path = join(RAW_DATA_PATH, f'comm_sizes_{seed:02}.json')
    with open(file_path, 'w') as file:
        json.dump(comm_sizes_list, file)

Sampling normal community sizes 1 / 5:
Sampling normal community sizes 2 / 5:
Sampling normal community sizes 3 / 5:
Sampling normal community sizes 4 / 5:
Sampling normal community sizes 5 / 5:


# Create anomaly-infused networks

## Configuration

In [12]:
TRAIN_TEST_SPLIT = 20  # number of communities in test set

## Netowrks creation

In [13]:
create_experiment_networks(
    base_dir = EXPERIMENT_PATH,
    raw_comm_sizes_dir_path = RAW_NETWORKS_PATH,
    train_test_split_num=TRAIN_TEST_SPLIT,
    experiment_settings=EXPERIMENT_SETTINGS,
    num_anom_comms=10,
    verbose=False)

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.05, m=0.01


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.05, m=0.02


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.05, m=0.04


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.05, m=0.08


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.05, m=0.16


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.1, m=0.01


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.1, m=0.02


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.1, m=0.04


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.1, m=0.08


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.1, m=0.16


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.15, m=0.01


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.15, m=0.02


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.15, m=0.04


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.15, m=0.08


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.15, m=0.16


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.2, m=0.01


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.2, m=0.02


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.2, m=0.04


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.2, m=0.08


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.2, m=0.16


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.25, m=0.01


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.25, m=0.02


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.25, m=0.04


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.25, m=0.08


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.25, m=0.16


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.3, m=0.01


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.3, m=0.02


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.3, m=0.04


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.3, m=0.08


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.3, m=0.16


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.35, m=0.01


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.35, m=0.02


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.35, m=0.04


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.35, m=0.08


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.35, m=0.16


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.4, m=0.01


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.4, m=0.02


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.4, m=0.04


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.4, m=0.08


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Finished infusing anomalies to p=0.4, m=0.16
