In [1]:
import os
os.chdir('../')

In [2]:
import pandas as pd
import warnings
from source.lib.helpers import *
from pandarallel import pandarallel
import ast
from itertools import combinations
from collections import defaultdict
from functools import reduce
import re

pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")

In [3]:
def WeightedQuantile(values, weights, quantiles):
    values = np.array(values)
    weights = np.array(weights)
    sorter = np.argsort(values)
    values = values[sorter]
    weights = weights[sorter]
    cumulative_weight = np.cumsum(weights)
    total_weight = cumulative_weight[-1]
    return np.interp(np.array(quantiles) * total_weight, cumulative_weight, values)

def Assign3Bin(repo):
    val = base_wm[repo]
    if val <= q33:
        return 0
    elif val <= q67:
        return 1
    else:
        return 2

def ConvertLogKeysToInt(log_dict):
    return {int(float(key)): value for key, value in log_dict.items()}


In [4]:
df_problems_contr_filtered = pd.read_parquet('issue/filtered_problem_data.parquet')
#df_problems_contr_filtered = df_problems_contr_filtered.query('time_period>="2015-01-01"')

In [5]:
def ProjectLevelStats(df_problems_contr_filtered):
    # controls that I can add down the road
    # % of problems that are unlinked prs/linked
    df_problems_contr_filtered['departed_involved'] = df_problems_contr_filtered.apply(lambda x: x['departed_actor_id'] in x['all_actors'], axis = 1)
    df_problems_contr_filtered['key_contributor_count'] = df_problems_contr_filtered['important_actors_rolling'].apply(len)
    df_problems_contr_filtered['total_contributor_count'] = df_problems_contr_filtered['all_actors_period'].apply(len)
    df_problems_contr_filtered['departed_opener'] = pd.to_numeric(df_problems_contr_filtered.apply(lambda x: x['departed_actor_id'] in x['pr_opener'] if x['departed_involved'] else np.nan, axis = 1))
    df_problems_contr_filtered['departed_author'] = pd.to_numeric(df_problems_contr_filtered.apply(lambda x: x['departed_actor_id'] in x['pr_authors'] if x['departed_involved'] else np.nan, axis = 1))
    
    df_project_filtered_group = df_problems_contr_filtered.groupby(
        ['repo_name', 'time_period', 'treatment_period', 'key_contributor_count', 'total_contributor_count']
    ).agg(
        problem_count=('problem_id', 'count'),
        ind_collab=('ind_collab_roll', 'mean'),
        ind_key_collab=('ind_key_collab_roll', 'mean'),
        ind_other_collab=('ind_other_collab_roll', 'mean'),
        departed_involved_count=('departed_involved','sum'),
        departed_involved=('departed_involved','mean'),
        departed_opened_count=('departed_opener','sum'),
        departed_opened=('departed_opener','mean'), # conditional on involvement
        departed_authored_count=('departed_author','sum'),
        departed_authored=('departed_author','mean'), # conditional on involvement
    ).reset_index()
    return df_project_filtered_group

df_project_filtered_group = ProjectLevelStats(df_problems_contr_filtered)

In [6]:
def ProjectSummaryStats(df_problems_contr_filtered):
    df_problems_contr_filtered['contr_count'] = df_problems_contr_filtered['all_actors_period'].apply(lambda x: len(np.fromstring(x)))
    df_problems_contr_filtered['problem_count'] = 1
    df_problems_contr_filtered['unlinked_issue_count'] = df_problems_contr_filtered['type'].apply(lambda x: x == "unlinked issue")
    df_problems_contr_filtered['unlinked_pr_count'] = df_problems_contr_filtered['type'].apply(lambda x: x == "unlinked pr")
    df_problems_contr_filtered['linked_issue_pr_count'] = df_problems_contr_filtered['type'].apply(lambda x: x == "linked")
    
    df_problems_repo_period = (
        df_problems_contr_filtered
          .groupby(['repo_name', 'time_period'], as_index=False)
          .agg(
              contr_count=('contr_count','first'),
              problem_count=('problem_count','sum'),
              unlinked_issue_count=('unlinked_issue_count','sum'),
              unlinked_pr_count=('unlinked_pr_count','sum'),
              linked_issue_pr_count=('linked_issue_pr_count','sum'),
              discussion_count=('total_prob_contr', 'sum'),
              contributor_count_problem=('problem_contr_count', 'sum'),
          )
    )
    df_problems_repo_period['discussion_count_per_problem'] = df_problems_repo_period['discussion_count']/df_problems_repo_period['problem_count']
    df_problems_repo_period['contributor_count_problem_per_problem'] = df_problems_repo_period['contributor_count_problem']/df_problems_repo_period['problem_count']

    # 1) Compute mean + exact percentiles for your metrics
    metrics = df_problems_repo_period[[
        'contr_count',
        'problem_count',
        'unlinked_issue_count',
        'unlinked_pr_count',
        'linked_issue_pr_count',
        'discussion_count_per_problem',
        'contributor_count_problem_per_problem'
    ]]
    means = metrics.mean()
    qs = metrics.quantile([0.1, 0.25, 0.5, 0.75, 0.9], interpolation='nearest')
    metrics_summary = pd.DataFrame({
        'Mean': means,
        '10th': qs.loc[0.1],
        '25th': qs.loc[0.25],
        '50th': qs.loc[0.5],
        '75th': qs.loc[0.75],
        '90th': qs.loc[0.9],
    })
    
    # 2) Compute time periods per project summary
    periods = df_problems_repo_period.groupby('repo_name')['time_period'].count()
    p_qs = periods.quantile([0.1, 0.25, 0.5, 0.75, 0.9], interpolation='nearest')
    periods_summary = pd.DataFrame({
        'Mean': periods.mean(),
        '10th': p_qs.loc[0.1],
        '25th': p_qs.loc[0.25],
        '50th': p_qs.loc[0.5],
        '75th': p_qs.loc[0.75],
        '90th': p_qs.loc[0.9],
    }, index=['Time periods per project'])
    
    # 3) Combine summaries and set labels
    summary = pd.concat([metrics_summary, periods_summary])
    summary.index = [
        'Contributors',
        'Problems',
        'Unlinked issues',
        'Unlinked pull requests',
        'Linked issue–pull request pairs',
        'Discussions per problem',
        'Contributors per problem',
        'Time periods per project'
    ]
    
    # 4) Define formatter: drop .00 for integers, else two decimals
    def format_value(x):
        if pd.isna(x):
            return ''
        if float(x).is_integer():
            return f"{int(x)}"
        return f"{x:.2f}"
    
    # 5) Build each row of the table
    rows = []
    for label, row in summary.iterrows():
        formatted_values = [format_value(row[col]) for col in summary.columns]
        row_line = f"    {label:<30} & " + " & ".join(f"{val:<6}" for val in formatted_values) + r" \\"
        rows.append(row_line)
    
    body = "\n".join(rows)
    
    # 6) Assemble the subfigure LaTeX
    subfigure_code = r"""\begin{subfigure}[b]{0.9\textwidth}
      \centering
      \footnotesize
      \caption{Project summary statistics}
      \label{fig:project-summary-stats}
      \begin{tabular}{@{}l r *{5}{r}@{}}
        \toprule
        Metric                        & Mean   & \multicolumn{5}{c}{Percentiles} \\
        \cmidrule(lr){3-7}
                                      &        & 10th   & 25th   & 50th   & 75th   & 90th   \\
        \midrule
    """ + body + r"""
        \bottomrule
      \end{tabular}
    \end{subfigure}"""
    
    print(subfigure_code)

In [7]:
ProjectSummaryStats(df_problems_contr_filtered)

\begin{subfigure}[b]{0.9\textwidth}
      \centering
      \footnotesize
      \caption{Project summary statistics}
      \label{fig:project-summary-stats}
      \begin{tabular}{@{}l r *{5}{r}@{}}
        \toprule
        Metric                        & Mean   & \multicolumn{5}{c}{Percentiles} \\
        \cmidrule(lr){3-7}
                                      &        & 10th   & 25th   & 50th   & 75th   & 90th   \\
        \midrule
        Contributors                   & 106.34 & 9      & 20     & 49     & 119    & 239    \\
    Problems                       & 204.33 & 11     & 35     & 88     & 256    & 558    \\
    Unlinked issues                & 110.24 & 4      & 15     & 44     & 146    & 291    \\
    Unlinked pull requests         & 73.55  & 3      & 10     & 28     & 79     & 169    \\
    Linked issue–pull request pairs & 20.54  & 0      & 1      & 5      & 20     & 53     \\
    Discussions per problem        & 4.03   & 2      & 2.76   & 3.67   & 4.82   & 6.46   \\
    Co

In [8]:
repo_list = df_problems_contr_filtered['repo_name'].unique().tolist()
def GetCommunicationLogs(df_contributors):
    df_contributors['communication_log'] = df_contributors['communication_log'].apply(ast.literal_eval)
    df_contributors['communication_log'] = df_contributors['communication_log'].apply(ConvertLogKeysToInt)
    
    df_contr_comm = df_contributors[['repo_name','time_period','actor_id','communication_log']]
    df_contr_dept = df_problems_contr_filtered[['repo_name','time_period','departed_actor_id','treatment_period']].drop_duplicates()
    df_contr_dept_comm = pd.merge(df_contr_comm, df_contr_dept)
    df_contr_dept_comm['dept_ov_comm'] = df_contr_dept_comm.apply(
        lambda x: x['communication_log'].get(int(x['departed_actor_id'])), axis = 1)

    cooccur_counts = BuildCooccurrenceCounts(df_problems_contr_filtered)
    
    df_contr_dept_comm['problem_count'] = df_contr_dept_comm.apply(
        lambda row: cooccur_counts.get(
            (row.repo_name, row.time_period, int(float(row.actor_id)), int(float(row.departed_actor_id))), 0), axis=1)
    df_contr_dept_comm.loc[df_contr_dept_comm['problem_count'] ==0, 'dept_ov_comm']= np.nan
    return df_contr_dept_comm

def BuildCooccurrenceCounts(df_probs):
    """Returns dict mapping
       (repo_name, time_period, actor, departed_actor) → count
       for every unordered pair in each row’s all_actors list."""
    counts = defaultdict(int)
    for (_, group) in df_probs.groupby(['repo_name','time_period']):
        repo = group.repo_name.iloc[0]
        period = group.time_period.iloc[0]
        for actors in group.all_actors:
            # turn actors-list into a set to avoid dupes
            s = set([int(float(a)) for a in actors])
            # for every *ordered* pair of distinct actors
            for a, b in combinations(s, 2):
                counts[(repo, period, a, b)] += 1
                counts[(repo, period, b, a)] += 1
    return counts

df_contributors = pd.read_parquet('drive/output/derived/graph_structure/contributor_characteristics.parquet', filters = [('repo_name',"in",repo_list)])
df_contr_dept_comm = GetCommunicationLogs(df_contributors)

In [9]:
df_pre_contr = df_contr_dept_comm[df_contr_dept_comm['time_period'] < df_contr_dept_comm['treatment_period']]
df_pre_repo = (
    df_pre_contr[['repo_name','time_period']].drop_duplicates()
    .sort_values(['repo_name','time_period'], ascending=[True,False])
    .groupby(['repo_name'], as_index=False)
    .head(5)
)
df_pre_contr = pd.merge(df_pre_contr, df_pre_repo)

actor_metrics = (
    df_pre_contr
    .groupby(['repo_name','actor_id'], as_index=False)
    .agg(
        agg_dept_ov_comm   = ('dept_ov_comm', 'sum'),
        agg_problem_count     = ('problem_count',    'sum'),
    )
)
actor_metrics['agg_dept_ov_comm'] = actor_metrics['agg_dept_ov_comm'].replace(0, np.nan)
actor_metrics['agg_dept_ov_comm_per_problem'] = actor_metrics['agg_dept_ov_comm'] / actor_metrics['agg_problem_count']

repo_metrics = (
    actor_metrics.groupby('repo_name', as_index=False)
    .agg(avg_dept_ov_comm_repo_avg = ('agg_dept_ov_comm', 'mean'),
         avg_dept_ov_comm_per_problem_repo_avg = ('agg_dept_ov_comm_per_problem','mean'),
    )
)

df_actor_summary = actor_metrics.merge(repo_metrics, on='repo_name')
df_actor_summary['actor_id'] = pd.to_numeric(df_actor_summary['actor_id'], errors='coerce')

for metric in ['dept_ov_comm', 'dept_ov_comm_per_problem']:
    agg_col  = f'agg_{metric}'
    avg_col  = f'avg_{metric}_repo_avg'

        
    df_actor_summary[f'{metric}_2bin']   = (
        df_actor_summary[agg_col] > df_actor_summary[avg_col]
    ).astype(int)
    df_actor_summary[f'{metric}_05avg_2bin']   = (
        df_actor_summary[agg_col] > .5*df_actor_summary[avg_col]
    ).astype(int)
    df_actor_summary[f'{metric}_2avg_2bin']   = (
        df_actor_summary[agg_col] > 2*df_actor_summary[avg_col]
    ).astype(int)
    df_actor_summary[f'{metric}_3avg_2bin']   = (
        df_actor_summary[agg_col] > 3*df_actor_summary[avg_col]
    ).astype(int)
    
    df_actor_summary.loc[df_actor_summary[agg_col].isna(), f'{metric}_2bin'] = np.nan
    df_actor_summary.loc[df_actor_summary[agg_col].isna(), f'{metric}_05avg_2bin'] = np.nan
    df_actor_summary.loc[df_actor_summary[agg_col].isna(), f'{metric}_2avg_2bin'] = np.nan
    df_actor_summary.loc[df_actor_summary[agg_col].isna(), f'{metric}_3avg_2bin'] = np.nan

    if metric == 'dept_ov_comm_per_problem':
        df_actor_summary[[f'{metric}_min_2bin',f'{metric}_min_05avg_2bin',f'{metric}_min_2avg_2bin',f'{metric}_min_3avg_2bin']] = df_actor_summary[
        [f'{metric}_2bin',f'{metric}_05avg_2bin',f'{metric}_2avg_2bin',f'{metric}_3avg_2bin']]
        
        df_actor_summary.loc[df_actor_summary['agg_problem_count']<5, f'{metric}_min_2bin'] = np.nan
        df_actor_summary.loc[df_actor_summary['agg_problem_count']<5, f'{metric}_min_05avg_2bin'] = np.nan
        df_actor_summary.loc[df_actor_summary['agg_problem_count']<5, f'{metric}_min_2avg_2bin'] = np.nan
        df_actor_summary.loc[df_actor_summary['agg_problem_count']<5, f'{metric}_min_3avg_2bin'] = np.nan

In [10]:
df_contr_all = df_problems_contr_filtered[['repo_name','time_period','all_actors_period']].drop_duplicates(
    ['repo_name','time_period']).explode('all_actors_period').rename(columns={'all_actors_period':'actor_id'}).sort_values(
    'time_period').drop_duplicates(['repo_name','actor_id'])
df_contr_all['actor_id'] = pd.to_numeric(df_contr_all['actor_id'])

df_actor_summary = pd.merge(df_contr_all, df_actor_summary, how = 'outer')
df_actor_summary['agg_problem_count'] = df_actor_summary['agg_problem_count'].fillna(0)
df_actor_summary['avg_dept_ov_comm_repo_avg'] = (df_actor_summary.groupby('repo_name')['avg_dept_ov_comm_repo_avg']
                                           .transform(lambda x: x.ffill().bfill()))
df_actor_summary['avg_dept_ov_comm_per_problem_repo_avg'] = (df_actor_summary.groupby('repo_name')['avg_dept_ov_comm_per_problem_repo_avg']
                                          .transform(lambda x: x.ffill().bfill()))

In [11]:
df_actor_summary = pd.merge(df_actor_summary, df_contr_dept_comm[['repo_name','treatment_period']].drop_duplicates())

In [12]:
import pandas as pd
from functools import reduce
overview_metrics = [
    'dept_ov_comm_2bin',
    'dept_ov_comm_05avg_2bin',
    'dept_ov_comm_2avg_2bin',
    'dept_ov_comm_3avg_2bin',
    'dept_ov_comm_per_problem_2bin',
    'dept_ov_comm_per_problem_05avg_2bin',
    'dept_ov_comm_per_problem_2avg_2bin',
    'dept_ov_comm_per_problem_3avg_2bin',
    'dept_ov_comm_per_problem_min_2bin',
    'dept_ov_comm_per_problem_min_05avg_2bin',
    'dept_ov_comm_per_problem_min_2avg_2bin',
    'dept_ov_comm_per_problem_min_3avg_2bin'
]
status_funcs = {
    'high': lambda s: s > 0,
    'low':  lambda s: s == 0,
    'never_communicated': lambda s: s.isna(),
    'communicated':       lambda s: s.notna(),
    'never_communicated_predep': "",
}

actor_frames = []
for metric in overview_metrics:
    for status, cond in status_funcs.items():
        col_name = f'{status}_{metric}_actors'
        if status == "never_communicated_predep":
            df_temp = (
                df_actor_summary[df_actor_summary[metric].isna()]
                .query('time_period < treatment_period')
                .groupby('repo_name')['actor_id']
                .agg(list)
                .reset_index()
                .rename(columns={'actor_id': col_name})
            )
        else:   
            df_temp = (
                df_actor_summary[cond(df_actor_summary[metric])]
                .groupby('repo_name')['actor_id']
                .agg(list)
                .reset_index()
                .rename(columns={'actor_id': col_name})
            )
        actor_frames.append(df_temp)

df_actor_lists = reduce(
    lambda left, right: pd.merge(left, right, on='repo_name', how='outer'),
    actor_frames
)

for metric in overview_metrics:
    for status in status_funcs:
        col = f'{status}_{metric}_actors'
        df_actor_lists[col] = df_actor_lists[col].apply(lambda x: x if isinstance(x, list) else [])

df_pr_issues      = df_problems_contr_filtered[
    df_problems_contr_filtered['type'].isin(['linked', 'unlinked pr'])
]
df_pr_with_actors = pd.merge(df_pr_issues, df_actor_lists, on='repo_name', how='left')


In [13]:
status_map = {'high': 'above',
 'low': 'below',
 'never_communicated': 'never_comm',
 'communicated': 'comm',
             'never_communicated_predep':'never_comm_predep'}
metric_map = {
    'dept_ov_comm_2bin': 'dept_comm_avg',
    'dept_ov_comm_05avg_2bin': 'dept_comm_05avg',
    'dept_ov_comm_2avg_2bin': 'dept_comm_2avg',
    'dept_ov_comm_3avg_2bin': 'dept_comm_3avg',
    'dept_ov_comm_per_problem_2bin': 'dept_comm_per_problem_avg',
    'dept_ov_comm_per_problem_05avg_2bin': 'dept_comm_per_problem_05avg',
    'dept_ov_comm_per_problem_2avg_2bin': 'dept_comm_per_problem_2avg',
    'dept_ov_comm_per_problem_3avg_2bin': 'dept_comm_per_problem_3avg',
    'dept_ov_comm_per_problem_min_2bin': 'dept_comm_per_problem_min_avg',
    'dept_ov_comm_per_problem_min_05avg_2bin': 'dept_comm_per_problem_min_05avg',
    'dept_ov_comm_per_problem_min_2avg_2bin': 'dept_comm_per_problem_min_2avg',
    'dept_ov_comm_per_problem_min_3avg_2bin': 'dept_comm_per_problem_min_3avg',
}

In [76]:
df_pr_with_actors

Unnamed: 0,repo_name,problem_id,problem_id_num,issues,prs,same_repo,other_repo,type,time_period,close_time,comment_close_time,pr_actors,pr_contributions_dict,issue_actors,issue_contributions_dict,important_actors,important_actors_rolling,treatment_period,departed_actor_id,all_actors,problem_contr_count,contributions_dict,total_prob_contr,all_actors_period,ind_collab,ind_collab_roll,ind_key_collab_roll,ind_other_collab_roll,cont_collab_roll,cont_collab_clean_char_roll,cont_collab_clean_wd_roll,review_count,review_comment_count,pr_opener,commit_author,pr_authors,departed_involved,key_contributor_count,total_contributor_count,departed_opener,departed_author,contr_count,problem_count,unlinked_issue_count,unlinked_pr_count,linked_issue_pr_count,high_dept_ov_comm_2bin_actors,low_dept_ov_comm_2bin_actors,never_communicated_dept_ov_comm_2bin_actors,communicated_dept_ov_comm_2bin_actors,never_communicated_predep_dept_ov_comm_2bin_actors,high_dept_ov_comm_05avg_2bin_actors,low_dept_ov_comm_05avg_2bin_actors,never_communicated_dept_ov_comm_05avg_2bin_actors,communicated_dept_ov_comm_05avg_2bin_actors,never_communicated_predep_dept_ov_comm_05avg_2bin_actors,high_dept_ov_comm_2avg_2bin_actors,low_dept_ov_comm_2avg_2bin_actors,never_communicated_dept_ov_comm_2avg_2bin_actors,communicated_dept_ov_comm_2avg_2bin_actors,never_communicated_predep_dept_ov_comm_2avg_2bin_actors,high_dept_ov_comm_3avg_2bin_actors,low_dept_ov_comm_3avg_2bin_actors,never_communicated_dept_ov_comm_3avg_2bin_actors,communicated_dept_ov_comm_3avg_2bin_actors,never_communicated_predep_dept_ov_comm_3avg_2bin_actors,high_dept_ov_comm_per_problem_2bin_actors,low_dept_ov_comm_per_problem_2bin_actors,never_communicated_dept_ov_comm_per_problem_2bin_actors,communicated_dept_ov_comm_per_problem_2bin_actors,never_communicated_predep_dept_ov_comm_per_problem_2bin_actors,high_dept_ov_comm_per_problem_05avg_2bin_actors,low_dept_ov_comm_per_problem_05avg_2bin_actors,never_communicated_dept_ov_comm_per_problem_05avg_2bin_actors,communicated_dept_ov_comm_per_problem_05avg_2bin_actors,never_communicated_predep_dept_ov_comm_per_problem_05avg_2bin_actors,high_dept_ov_comm_per_problem_2avg_2bin_actors,low_dept_ov_comm_per_problem_2avg_2bin_actors,never_communicated_dept_ov_comm_per_problem_2avg_2bin_actors,communicated_dept_ov_comm_per_problem_2avg_2bin_actors,never_communicated_predep_dept_ov_comm_per_problem_2avg_2bin_actors,high_dept_ov_comm_per_problem_3avg_2bin_actors,low_dept_ov_comm_per_problem_3avg_2bin_actors,never_communicated_dept_ov_comm_per_problem_3avg_2bin_actors,communicated_dept_ov_comm_per_problem_3avg_2bin_actors,never_communicated_predep_dept_ov_comm_per_problem_3avg_2bin_actors,high_dept_ov_comm_per_problem_min_2bin_actors,low_dept_ov_comm_per_problem_min_2bin_actors,never_communicated_dept_ov_comm_per_problem_min_2bin_actors,communicated_dept_ov_comm_per_problem_min_2bin_actors,never_communicated_predep_dept_ov_comm_per_problem_min_2bin_actors,high_dept_ov_comm_per_problem_min_05avg_2bin_actors,low_dept_ov_comm_per_problem_min_05avg_2bin_actors,never_communicated_dept_ov_comm_per_problem_min_05avg_2bin_actors,communicated_dept_ov_comm_per_problem_min_05avg_2bin_actors,never_communicated_predep_dept_ov_comm_per_problem_min_05avg_2bin_actors,high_dept_ov_comm_per_problem_min_2avg_2bin_actors,low_dept_ov_comm_per_problem_min_2avg_2bin_actors,never_communicated_dept_ov_comm_per_problem_min_2avg_2bin_actors,communicated_dept_ov_comm_per_problem_min_2avg_2bin_actors,never_communicated_predep_dept_ov_comm_per_problem_min_2avg_2bin_actors,high_dept_ov_comm_per_problem_min_3avg_2bin_actors,low_dept_ov_comm_per_problem_min_3avg_2bin_actors,never_communicated_dept_ov_comm_per_problem_min_3avg_2bin_actors,communicated_dept_ov_comm_per_problem_min_3avg_2bin_actors,never_communicated_predep_dept_ov_comm_per_problem_min_3avg_2bin_actors
0,AnalogJ/lexicon,AnalogJ/lexicon/4,4,[],[4],[],[],unlinked pr,2016-01-01,0.177894,0.0,"[11462.0, 891875.0]","{""11462.0"": {""contributions"": 1, ""contribution...",[],{},"[13824387.0, 891875.0]","[13824387.0, 891875.0]",2020-01-01,13824387.0,"[11462.0, 891875.0]",2,"{""891875.0"": {""contributions_text_wt"": 134, ""c...",2,"[273688.0, 891875.0, 11462.0, 204463.0, 377024...",0,0,,0.0,0.000000,0.000000,0.000000,,,[11462.0],[],[11462.0],False,2,14,,,14,1,False,True,False,"[891875.0, 5785366.0, 9728851.0]","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[1388.0, 225151.0, 352937.0, 855724.0, 891875....","[696.0, 8534.0, 37886.0, 47904.0, 131406.0, 13...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[891875.0, 5785366.0, 9728851.0]","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[891875.0, 9728851.0]","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[696.0, 1388.0, 47904.0, 131406.0, 202474.0, 2...","[8534.0, 37886.0, 133209.0, 168188.0, 295190.0...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[696.0, 1388.0, 8534.0, 47904.0, 131406.0, 133...","[37886.0, 1204969.0, 1260936.0, 1844089.0, 187...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[47904.0, 1110751.0, 9248747.0]","[696.0, 1388.0, 8534.0, 37886.0, 131406.0, 133...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...",[9248747.0],"[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[891875.0, 5785366.0, 9728851.0]","[352937.0, 1204969.0, 1260936.0, 1844089.0, 35...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 5785366.0, 9728851.0]","[1204969.0, 1260936.0, 1844089.0, 3595020.0]","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...",[],"[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...",[],"[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0..."
1,AnalogJ/lexicon,AnalogJ/lexicon/7,7,[],[7],[],[],unlinked pr,2016-01-01,16.842847,0.0,"[204463.0, 891875.0]","{""204463.0"": {""contributions"": 4, ""contributio...",[],{},"[13824387.0, 891875.0]","[13824387.0, 891875.0]",2020-01-01,13824387.0,"[204463.0, 891875.0]",2,"{""891875.0"": {""contributions_text_wt"": 558, ""c...",8,"[273688.0, 891875.0, 11462.0, 204463.0, 377024...",0,0,,0.0,0.000000,0.000000,0.000000,,,[204463.0],[],[204463.0],False,2,14,,,14,1,False,True,False,"[891875.0, 5785366.0, 9728851.0]","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[1388.0, 225151.0, 352937.0, 855724.0, 891875....","[696.0, 8534.0, 37886.0, 47904.0, 131406.0, 13...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[891875.0, 5785366.0, 9728851.0]","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[891875.0, 9728851.0]","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[696.0, 1388.0, 47904.0, 131406.0, 202474.0, 2...","[8534.0, 37886.0, 133209.0, 168188.0, 295190.0...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[696.0, 1388.0, 8534.0, 47904.0, 131406.0, 133...","[37886.0, 1204969.0, 1260936.0, 1844089.0, 187...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[47904.0, 1110751.0, 9248747.0]","[696.0, 1388.0, 8534.0, 37886.0, 131406.0, 133...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...",[9248747.0],"[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[891875.0, 5785366.0, 9728851.0]","[352937.0, 1204969.0, 1260936.0, 1844089.0, 35...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 5785366.0, 9728851.0]","[1204969.0, 1260936.0, 1844089.0, 3595020.0]","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...",[],"[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...",[],"[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0..."
2,AnalogJ/lexicon,AnalogJ/lexicon/8,8,[],[8],[],[],unlinked pr,2016-01-01,1.257002,0.0,"[377024.0, 891875.0]","{""377024.0"": {""contributions"": 1, ""contributio...",[],{},"[13824387.0, 891875.0]","[13824387.0, 891875.0]",2020-01-01,13824387.0,"[377024.0, 891875.0]",2,"{""377024.0"": {""contributions_text_wt"": 297, ""c...",2,"[273688.0, 891875.0, 11462.0, 204463.0, 377024...",0,0,,0.0,0.000000,0.000000,0.000000,,,[377024.0],[],[377024.0],False,2,14,,,14,1,False,True,False,"[891875.0, 5785366.0, 9728851.0]","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[1388.0, 225151.0, 352937.0, 855724.0, 891875....","[696.0, 8534.0, 37886.0, 47904.0, 131406.0, 13...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[891875.0, 5785366.0, 9728851.0]","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[891875.0, 9728851.0]","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[696.0, 1388.0, 47904.0, 131406.0, 202474.0, 2...","[8534.0, 37886.0, 133209.0, 168188.0, 295190.0...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[696.0, 1388.0, 8534.0, 47904.0, 131406.0, 133...","[37886.0, 1204969.0, 1260936.0, 1844089.0, 187...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[47904.0, 1110751.0, 9248747.0]","[696.0, 1388.0, 8534.0, 37886.0, 131406.0, 133...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...",[9248747.0],"[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[891875.0, 5785366.0, 9728851.0]","[352937.0, 1204969.0, 1260936.0, 1844089.0, 35...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 5785366.0, 9728851.0]","[1204969.0, 1260936.0, 1844089.0, 3595020.0]","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...",[],"[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...",[],"[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0..."
3,AnalogJ/lexicon,AnalogJ/lexicon/11,11,[],[11],[],[],unlinked pr,2016-01-01,2.076516,0.0,"[891875.0, 13824387.0]","{""891875.0"": {""contributions"": 1, ""contributio...",[],{},"[13824387.0, 891875.0]","[13824387.0, 891875.0]",2020-01-01,13824387.0,"[891875.0, 13824387.0]",2,"{""891875.0"": {""contributions_text_wt"": 699, ""c...",2,"[273688.0, 891875.0, 11462.0, 204463.0, 377024...",1,1,1.0,,1.000000,1.000934,1.018726,,,[891875.0],[],[891875.0],True,2,14,0.0,0.0,14,1,False,True,False,"[891875.0, 5785366.0, 9728851.0]","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[1388.0, 225151.0, 352937.0, 855724.0, 891875....","[696.0, 8534.0, 37886.0, 47904.0, 131406.0, 13...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[891875.0, 5785366.0, 9728851.0]","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[891875.0, 9728851.0]","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[696.0, 1388.0, 47904.0, 131406.0, 202474.0, 2...","[8534.0, 37886.0, 133209.0, 168188.0, 295190.0...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[696.0, 1388.0, 8534.0, 47904.0, 131406.0, 133...","[37886.0, 1204969.0, 1260936.0, 1844089.0, 187...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[47904.0, 1110751.0, 9248747.0]","[696.0, 1388.0, 8534.0, 37886.0, 131406.0, 133...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...",[9248747.0],"[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[891875.0, 5785366.0, 9728851.0]","[352937.0, 1204969.0, 1260936.0, 1844089.0, 35...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 5785366.0, 9728851.0]","[1204969.0, 1260936.0, 1844089.0, 3595020.0]","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...",[],"[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...",[],"[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0..."
4,AnalogJ/lexicon,AnalogJ/lexicon/12,12,[],[12],[],[],unlinked pr,2016-01-01,0.066100,0.0,"[891875.0, 13824387.0]","{""891875.0"": {""contributions"": 1, ""contributio...",[],{},"[13824387.0, 891875.0]","[13824387.0, 891875.0]",2020-01-01,13824387.0,"[891875.0, 13824387.0]",2,"{""891875.0"": {""contributions_text_wt"": 907, ""c...",2,"[273688.0, 891875.0, 11462.0, 204463.0, 377024...",1,1,1.0,,1.000000,1.009795,1.056268,,,[891875.0],[],[891875.0],True,2,14,0.0,0.0,14,1,False,True,False,"[891875.0, 5785366.0, 9728851.0]","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[1388.0, 225151.0, 352937.0, 855724.0, 891875....","[696.0, 8534.0, 37886.0, 47904.0, 131406.0, 13...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[891875.0, 5785366.0, 9728851.0]","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[891875.0, 9728851.0]","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[696.0, 1388.0, 47904.0, 131406.0, 202474.0, 2...","[8534.0, 37886.0, 133209.0, 168188.0, 295190.0...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[696.0, 1388.0, 8534.0, 47904.0, 131406.0, 133...","[37886.0, 1204969.0, 1260936.0, 1844089.0, 187...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[47904.0, 1110751.0, 9248747.0]","[696.0, 1388.0, 8534.0, 37886.0, 131406.0, 133...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...",[9248747.0],"[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 223...","[696.0, 1388.0, 8534.0, 37886.0, 47904.0, 1314...","[5343.0, 5387.0, 8556.0, 11462.0, 20580.0, 232...","[891875.0, 5785366.0, 9728851.0]","[352937.0, 1204969.0, 1260936.0, 1844089.0, 35...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 5785366.0, 9728851.0]","[1204969.0, 1260936.0, 1844089.0, 3595020.0]","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...",[],"[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...",[],"[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0...","[352937.0, 891875.0, 1204969.0, 1260936.0, 184...","[696.0, 1388.0, 5343.0, 5387.0, 8534.0, 8556.0..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120421,xapi-project/xen-api,xapi-project/xen-api/5156,5156,[],[5156],[],[],unlinked pr,2023-07-01,0.938275,0.0,"[413005.0, 5189409.0, 27800561.0, 843617.0]","{""413005.0"": {""contributions"": 5, ""contributio...",[],{},"[5189409.0, 843617.0, 413005.0, 24416311.0]","[5189409.0, 843617.0, 721894.0, 413005.0]",2022-01-01,24416311.0,"[413005.0, 5189409.0, 27800561.0, 843617.0]",4,"{""843617.0"": {""contributions_text_wt"": 0, ""con...",16,"[793993.0, 298721.0, 5189409.0, 3705142.0, 325...",1,1,,1.0,1.053254,1.159304,1.350740,7.0,13.0,[413005.0],[],[413005.0],False,4,19,,,19,1,False,True,False,"[413005.0, 721894.0, 843617.0, 5189409.0, 1726...","[298721.0, 303331.0, 451043.0, 1241401.0, 3705...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 721894.0, 843617.0, 5189409.0, 1252...","[298721.0, 303331.0, 451043.0, 1241401.0, 3705...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 843617.0, 5189409.0]","[298721.0, 303331.0, 451043.0, 721894.0, 12414...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[843617.0, 5189409.0]","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[303331.0, 12529066.0, 17266972.0, 20078729.0,...","[298721.0, 413005.0, 451043.0, 721894.0, 84361...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[303331.0, 413005.0, 451043.0, 721894.0, 84361...","[298721.0, 3705142.0]","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...",[17266972.0],"[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...",[],"[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[12529066.0, 17266972.0, 27800561.0]","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 51894...",[3705142.0],"[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...",[17266972.0],"[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...",[],"[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ..."
120422,xapi-project/xen-api,xapi-project/xen-api/5157,5157,[],[5157],[],[],unlinked pr,2023-07-01,8.255000,0.0,"[62988402.0, 843617.0, 721894.0, 27800561.0, 4...","{""413005.0"": {""contributions"": 1, ""contributio...",[],{},"[5189409.0, 843617.0, 413005.0, 24416311.0]","[5189409.0, 843617.0, 721894.0, 413005.0]",2022-01-01,24416311.0,"[62988402.0, 843617.0, 721894.0, 27800561.0, 4...",5,"{""843617.0"": {""contributions_text_wt"": 369, ""c...",7,"[793993.0, 298721.0, 5189409.0, 3705142.0, 325...",1,1,,1.0,1.111111,1.463888,1.438691,2.0,2.0,[62988402.0],[],[62988402.0],False,4,19,,,19,1,False,True,False,"[413005.0, 721894.0, 843617.0, 5189409.0, 1726...","[298721.0, 303331.0, 451043.0, 1241401.0, 3705...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 721894.0, 843617.0, 5189409.0, 1252...","[298721.0, 303331.0, 451043.0, 1241401.0, 3705...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 843617.0, 5189409.0]","[298721.0, 303331.0, 451043.0, 721894.0, 12414...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[843617.0, 5189409.0]","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[303331.0, 12529066.0, 17266972.0, 20078729.0,...","[298721.0, 413005.0, 451043.0, 721894.0, 84361...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[303331.0, 413005.0, 451043.0, 721894.0, 84361...","[298721.0, 3705142.0]","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...",[17266972.0],"[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...",[],"[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[12529066.0, 17266972.0, 27800561.0]","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 51894...",[3705142.0],"[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...",[17266972.0],"[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...",[],"[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ..."
120423,xapi-project/xen-api,xapi-project/xen-api/5159,5159,[],[5159],[],[],unlinked pr,2023-07-01,3.661238,0.0,"[413005.0, 843617.0, 721894.0]","{""413005.0"": {""contributions"": 1, ""contributio...",[],{},"[5189409.0, 843617.0, 413005.0, 24416311.0]","[5189409.0, 843617.0, 721894.0, 413005.0]",2022-01-01,24416311.0,"[413005.0, 843617.0, 721894.0]",3,"{""843617.0"": {""contributions_text_wt"": 0, ""con...",3,"[793993.0, 298721.0, 5189409.0, 3705142.0, 325...",1,1,,1.0,1.000000,2.000000,2.000000,2.0,2.0,[413005.0],[],[413005.0],False,4,19,,,19,1,False,True,False,"[413005.0, 721894.0, 843617.0, 5189409.0, 1726...","[298721.0, 303331.0, 451043.0, 1241401.0, 3705...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 721894.0, 843617.0, 5189409.0, 1252...","[298721.0, 303331.0, 451043.0, 1241401.0, 3705...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 843617.0, 5189409.0]","[298721.0, 303331.0, 451043.0, 721894.0, 12414...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[843617.0, 5189409.0]","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[303331.0, 12529066.0, 17266972.0, 20078729.0,...","[298721.0, 413005.0, 451043.0, 721894.0, 84361...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[303331.0, 413005.0, 451043.0, 721894.0, 84361...","[298721.0, 3705142.0]","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...",[17266972.0],"[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...",[],"[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[12529066.0, 17266972.0, 27800561.0]","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 51894...",[3705142.0],"[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...",[17266972.0],"[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...",[],"[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ..."
120424,xapi-project/xen-api,xapi-project/xen-api/5162,5162,[],[5162],[],[],unlinked pr,2023-07-01,2.168356,0.0,"[843617.0, 721894.0, 413005.0]","{""413005.0"": {""contributions"": 3, ""contributio...",[],{},"[5189409.0, 843617.0, 413005.0, 24416311.0]","[5189409.0, 843617.0, 721894.0, 413005.0]",2022-01-01,24416311.0,"[843617.0, 721894.0, 413005.0]",3,"{""843617.0"": {""contributions_text_wt"": 582, ""c...",9,"[793993.0, 298721.0, 5189409.0, 3705142.0, 325...",1,1,,1.0,1.062500,1.914374,1.920824,3.0,6.0,[843617.0],[],[843617.0],False,4,19,,,19,1,False,True,False,"[413005.0, 721894.0, 843617.0, 5189409.0, 1726...","[298721.0, 303331.0, 451043.0, 1241401.0, 3705...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 721894.0, 843617.0, 5189409.0, 1252...","[298721.0, 303331.0, 451043.0, 1241401.0, 3705...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 843617.0, 5189409.0]","[298721.0, 303331.0, 451043.0, 721894.0, 12414...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[843617.0, 5189409.0]","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[303331.0, 12529066.0, 17266972.0, 20078729.0,...","[298721.0, 413005.0, 451043.0, 721894.0, 84361...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[303331.0, 413005.0, 451043.0, 721894.0, 84361...","[298721.0, 3705142.0]","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...",[17266972.0],"[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...",[],"[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[298721.0, 303331.0, 413005.0, 451043.0, 72189...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[12529066.0, 17266972.0, 27800561.0]","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 51894...",[3705142.0],"[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...",[17266972.0],"[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...",[],"[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ...","[413005.0, 451043.0, 721894.0, 843617.0, 37051...","[8839.0, 45922.0, 46941.0, 53164.0, 103693.0, ..."


In [82]:
# --- your existing actor-list build (unchanged) ---
def SummarizeMetricStatus(df, metric, status, match_col    ):
    actor_col = f'{status}_{metric}_actors'
    df[actor_col] = df[actor_col].apply(lambda x: x if isinstance(x, list) else [])
    mask = df.apply(
        lambda row: (
            row['departed_actor_id'] not in row['all_actors']
            and bool(set(row[match_col]) & set(row[actor_col]))
        ),
        axis=1
    )
    df_filtered = df[mask].drop_duplicates(
        ['repo_name','problem_id']).query('type != "unlinked issue" & time_period >= treatment_period')
    
    summary = (
        df_filtered
        .groupby(['repo_name', 'time_period'])
        .apply(lambda grp: pd.Series({
            'prs_opened_count': len(grp),
            'contributor_count': len(set().union(*grp[actor_col].tolist(), *grp['all_actors_period'].tolist()))
        }))
        .reset_index()
    )
    if status in ['never_communicated','communicated']:
        opp_status = 'communicated' if status == 'never_communicated' else 'never_communicated'
        opp_actor_col = f'{opp_status}_{metric}_actors'
        opp_mask = df_filtered.apply(lambda row: (row['departed_actor_id'] not in row['issue_actors']
                                         and bool(set(row['issue_actors']) & set(row[opp_actor_col]))),
                            axis=1)
        subset_opp_involved = df_filtered[opp_mask]
        print(subset_opp_involved.shape)
        print(metric, status)
        print(subset_opp_involved.shape[0]/df_filtered.shape[0], df_filtered.shape[0])
        
    if status in ['low','high']:
        opp_status = 'high' if status == 'low' else 'low'
        opp_actor_col = f'{opp_status}_{metric}_actors'
        opp_mask = df_filtered.apply(lambda row: (row['departed_actor_id'] not in row['issue_actors']
                                         and bool(set(row['issue_actors']) & set(row[opp_actor_col]))),
                            axis=1)
        subset_opp_involved = df_filtered[opp_mask]
        print(subset_opp_involved.shape)
        print(metric, status)
        print(subset_opp_involved.shape[0]/df_filtered.shape[0], df_filtered.shape[0])
        
    summary['overview_metric'] = metric
    summary['status'] = status
    return summary


def PivotAndFlattenCommSummary(df_pr_with_actors, match_col, overview_metrics, base_status, 
                               special_status, metric_map, status_map):
    summaries = [
        SummarizeMetricStatus(df_pr_with_actors, m, s, match_col)
        for m in overview_metrics for s in base_status
    ] + [
        SummarizeMetricStatus(df_pr_with_actors, overview_metrics[0], s, match_col)
        for s in special_status
    ]
    df_comm_summary = pd.concat(summaries, ignore_index=True)

    df_wide = (
        df_comm_summary
        .pivot_table(
            index=['repo_name', 'time_period'],
            columns=['overview_metric', 'status'],
            values=['prs_opened_count', 'contributor_count'],
            fill_value=0
        )
        .reset_index()
    )

    flat_columns = []
    for col in df_wide.columns:
        if isinstance(col, tuple):
            if all(col) and len(col) == 3:
                metric_type, metric_name, status = col
                flat_prefix = 'prs_opened' if metric_type == 'prs_opened_count' else 'contributors'
                mname = metric_map[metric_name]
                sname = status_map[status]
                flat_columns.append(f'{flat_prefix}_{mname}_{sname}')
            else:
                flat_columns.append(col[0])
        else:
            flat_columns.append(col)

    df_wide.columns = flat_columns

    return df_wide.rename(columns={
        'prs_opened_dept_comm_avg_comm':           'prs_opened_dept_comm',
        'prs_opened_dept_comm_avg_never_comm':     'prs_opened_dept_never_comm',
        'prs_opened_dept_comm_avg_never_comm_predep':     'prs_opened_dept_never_comm_predep',
        'contributors_dept_comm_avg_comm':         'contributors_dept_comm',
        'contributors_dept_comm_avg_never_comm':   'contributors_dept_never_comm',
        'contributors_dept_comm_avg_never_comm_predep':   'contributors_dept_never_comm_predep',
    })

#df_comm_wide = PivotAndFlattenCommSummary(
#    df_pr_with_actors, match_col='pr_opener', overview_metrics=overview_metrics, base_status=['never_communicated', 'communicated'], 
#    special_status=['never_communicated', 'communicated', 'never_communicated_predep'], metric_map=metric_map, status_map=status_map)
df_comm_wide = PivotAndFlattenCommSummary(
    df_pr_with_actors, match_col='pr_opener', overview_metrics=overview_metrics, base_status=['low', 'high'], 
    special_status=['never_communicated', 'communicated', 'never_communicated_predep'], metric_map=metric_map, status_map=status_map)

(197, 106)
dept_ov_comm_2bin low
0.0652317880794702 3020
(325, 106)
dept_ov_comm_2bin high
0.02835456290350724 11462
(174, 106)
dept_ov_comm_05avg_2bin low
0.09786276715410573 1778
(246, 106)
dept_ov_comm_05avg_2bin high
0.019368553657192346 12701
(373, 106)
dept_ov_comm_2avg_2bin low
0.049979900844164545 7463
(361, 106)
dept_ov_comm_2avg_2bin high
0.05140985474223868 7022
(388, 106)
dept_ov_comm_3avg_2bin low
0.045652429697611484 8499
(311, 106)
dept_ov_comm_3avg_2bin high
0.051937207748830996 5988
(345, 106)
dept_ov_comm_per_problem_2bin low
0.0398291387670284 8662
(278, 106)
dept_ov_comm_per_problem_2bin high
0.04777453170647877 5819
(108, 106)
dept_ov_comm_per_problem_05avg_2bin low
0.08653846153846154 1248
(141, 106)
dept_ov_comm_per_problem_05avg_2bin high
0.010657596371882086 13230
(38, 106)
dept_ov_comm_per_problem_2avg_2bin low
0.0026874115983026876 14140
(32, 106)
dept_ov_comm_per_problem_2avg_2bin high
0.09667673716012085 331
(2, 106)
dept_ov_comm_per_problem_3avg_2bin low
0

ValueError: cannot insert time_period, already exists

In [16]:
"""# people are involved int wice as much activity as they're opening 
for avg in ['avg','2avg','3avg']:
    sel_cols = [f'prs_opened_dept_comm_{avg}_above',f'prs_opened_dept_comm_{avg}_below']
    print(df_comm_wide_ov[sel_cols].sum().sum()/df_comm_wide[sel_cols].sum().sum())"""

"# people are involved int wice as much activity as they're opening \nfor avg in ['avg','2avg','3avg']:\n    sel_cols = [f'prs_opened_dept_comm_{avg}_above',f'prs_opened_dept_comm_{avg}_below']\n    print(df_comm_wide_ov[sel_cols].sum().sum()/df_comm_wide[sel_cols].sum().sum())"

In [17]:
df_problems_contr_filtered['unimp_actors'] = df_problems_contr_filtered.apply(
    lambda row: [actor for actor in row['all_actors'] if actor not in row['important_actors_rolling']], axis=1)
df_problems_contr_filtered['problem_unimp_contr_count'] = df_problems_contr_filtered['unimp_actors'].apply(len)

In [18]:
df_contributors = df_problems_contr_filtered[['repo_name','time_period','treatment_period','all_actors_period','departed_actor_id']].explode('all_actors_period').sort_values(['repo_name','time_period'])
df_project_predeparture_contributors = df_contributors.query('time_period < treatment_period & departed_actor_id != all_actors_period').drop_duplicates(['repo_name','all_actors_period'])
df_project_predeparture_contributors = df_project_predeparture_contributors.groupby(['repo_name'])['all_actors_period'].agg(list).reset_index().rename(columns={'all_actors_period':'all_actors_pre_departure'})
df_project_nondeparture_contributors = df_contributors.query('departed_actor_id != all_actors_period').drop_duplicates(['repo_name','all_actors_period'])
df_project_nondeparture_contributors = df_project_nondeparture_contributors.groupby(['repo_name'])['all_actors_period'].agg(list).reset_index().rename(columns={'all_actors_period':'all_actors_non_departure'})


In [19]:
df_problems_contr_filtered_predep = pd.merge(df_problems_contr_filtered, df_project_predeparture_contributors)
df_problems_contr_filtered_predep = df_problems_contr_filtered_predep.loc[
    df_problems_contr_filtered_predep.apply(lambda row: row['all_actors'].size == np.intersect1d(row['all_actors'], row['all_actors_pre_departure']).size, axis=1)
]
df_problems_contr_filtered_nondep = pd.merge(df_problems_contr_filtered, df_project_nondeparture_contributors)
df_problems_contr_filtered_nondep = df_problems_contr_filtered_nondep.loc[
    df_problems_contr_filtered_nondep.apply(lambda row: row['all_actors'].size == np.intersect1d(row['all_actors'], row['all_actors_non_departure']).size, axis=1)
]

In [20]:
df_problems_contr_filtered['contributions_dict'] = df_problems_contr_filtered['contributions_dict'].apply(ast.literal_eval)
df_problems_contr_filtered['contributions_dict'] = (df_problems_contr_filtered['contributions_dict'].apply(lambda d: {float(k): v for k, v in d.items()}))
df_problems_contr_filtered['total_contributions'] = (df_problems_contr_filtered['contributions_dict'].apply(
    lambda contributions_dict: sum(item['contributions'] for key, item in contributions_dict.items()))
)

In [21]:
df_agg_predep = df_problems_contr_filtered_predep.sort_values(['repo_name','problem_id_num','time_period']).drop_duplicates(
    ['repo_name','problem_id']).query('type != "unlinked issue"').groupby(
    ['repo_name','time_period'])['problem_id'].count().reset_index().rename(columns={'problem_id':'prs_opened_predep'})

df_agg_nondep = df_problems_contr_filtered_nondep.sort_values(['repo_name','problem_id_num','time_period']).drop_duplicates(
    ['repo_name','problem_id']).query('type != "unlinked issue"').groupby(
    ['repo_name','time_period'])['problem_id'].count().reset_index().rename(columns={'problem_id':'prs_opened_nondep'})

df_agg_prob = df_problems_contr_filtered.sort_values(['repo_name','problem_id_num','time_period']).drop_duplicates(
    ['repo_name','problem_id']).query('type != "unlinked issue"').groupby(
    ['repo_name','time_period'])['problem_id'].count().reset_index().rename(columns={'problem_id':'prs_opened_prob'})

df_agg_prs = pd.merge(df_agg_predep, df_agg_nondep, how = 'outer').merge(df_comm_wide, how = 'outer').merge(df_agg_prob, how = 'outer')


df_problem_contr_count = df_problems_contr_filtered.query('type != "unlinked issue"').groupby(
    ['repo_name','time_period'])[['problem_contr_count','problem_unimp_contr_count']].mean().reset_index().rename(
    columns={'problem_contr_count':'problem_avg_contr_count', 'problem_unimp_contr_count':'problem_avg_unimp_contr_count'})
df_problem_close_time_contr = df_problems_contr_filtered.sort_values(['repo_name','problem_id_num','time_period']).drop_duplicates(
    ['repo_name','problem_id']).query('type != "unlinked issue"').query('type != "unlinked issue"').groupby(
    ['repo_name','time_period'])[['close_time','total_contributions']].mean().reset_index()
df_agg_prs = pd.merge(df_agg_prs, df_problem_contr_count, how = 'outer').merge(df_problem_close_time_contr, how = 'outer')





In [22]:
df_problems_contr_filtered['important_contributions_share'] = (df_problems_contr_filtered.apply(
    lambda d: sum(v['contributions'] for k, v in d['contributions_dict'].items() if k in d['important_actors_rolling']), axis = 1) / 
                                                 df_problems_contr_filtered['total_contributions'])
df_imp_share = df_problems_contr_filtered.query('type != "unlinked issue"').groupby(
    ['repo_name','time_period'])[['important_contributions_share']].mean().reset_index()
df_agg_prs = pd.merge(df_agg_prs, df_imp_share, how = 'outer')#.fillna(0)

In [23]:
df_comment_grouped = (
    df_problems_contr_filtered
    .sort_values(['repo_name', 'problem_id_num', 'time_period'])
    .drop_duplicates(['repo_name', 'problem_id'])
    .query('type != "unlinked issue"')
    .groupby(['repo_name', 'time_period'])
    .agg(
        review_count=('review_count', 'mean'),
        review_comment_count=('review_comment_count', 'mean'),
        prop_review_count_na=('review_count', lambda x: x.isna().mean()),
        prop_review_comment_count_na=('review_comment_count', lambda x: x.isna().mean()),
    )
    .reset_index()
)

df_agg_prs = pd.merge(df_agg_prs, df_comment_grouped, how = 'outer')

In [24]:
df_agg_prs[['repo_name','time_period','prs_opened_prob','prs_opened_predep','prs_opened_nondep',
            'prs_opened_dept_comm','prs_opened_dept_never_comm','prs_opened_dept_never_comm_predep']].head(30)

Unnamed: 0,repo_name,time_period,prs_opened_prob,prs_opened_predep,prs_opened_nondep,prs_opened_dept_comm,prs_opened_dept_never_comm,prs_opened_dept_never_comm_predep
0,AnalogJ/lexicon,2016-01-01,33.0,3.0,3.0,0.0,3.0,3.0
1,AnalogJ/lexicon,2016-07-01,27.0,5.0,5.0,1.0,4.0,4.0
2,AnalogJ/lexicon,2017-01-01,35.0,13.0,13.0,6.0,7.0,7.0
3,AnalogJ/lexicon,2017-07-01,17.0,3.0,3.0,2.0,1.0,1.0
4,AnalogJ/lexicon,2018-01-01,49.0,6.0,6.0,4.0,2.0,2.0
5,AnalogJ/lexicon,2018-07-01,45.0,1.0,1.0,1.0,0.0,0.0
6,AnalogJ/lexicon,2019-01-01,51.0,5.0,5.0,5.0,0.0,0.0
7,AnalogJ/lexicon,2019-07-01,37.0,10.0,10.0,7.0,3.0,2.0
8,AnalogJ/lexicon,2020-01-01,25.0,18.0,28.0,20.0,8.0,3.0
9,AnalogJ/lexicon,2020-07-01,17.0,5.0,17.0,2.0,15.0,1.0


In [25]:
# columns to leave alone
exclude = {'close_time', 'prop_review_comment_count_na', 'prop_review_count_na'}

# build a dict mapping every other column to 0
fill_values = {col: 0 for col in df_agg_prs.columns if col not in exclude}

# apply the fill
df_agg_prs = df_agg_prs.fillna(fill_values)


In [26]:
preperiod_recent = df_project_filtered_group.query('time_period < treatment_period').groupby('repo_name').tail(5)
preperiod_recent['other_involved_count'] = preperiod_recent['departed_involved_count'] - preperiod_recent['problem_count']
preperiod_recent['uniform_weight'] = 1

count_dict = {
    'ind_collab': 'problem_count',
    'ind_key_collab': 'departed_involved_count',
    'ind_other_collab': 'other_involved_count',
    'departed_involved': 'problem_count',
    'departed_involved_count': 'uniform_weight',
    'key_contributor_count': 'uniform_weight',
    'total_contributor_count': 'uniform_weight',
    'problem_count': 'uniform_weight',
    'departed_opened': 'departed_opened_count',
    'departed_authored': 'departed_authored_count'
}

for collab_type, count_col in count_dict.items():
    avg_collab = WeightedMean(preperiod_recent[collab_type], preperiod_recent[count_col])
    base_wm = preperiod_recent.groupby('repo_name').apply(
        lambda df: WeightedMean(df[collab_type], df[count_col], zero_weight_return = 0)
    )

    above_set = set(base_wm[base_wm > avg_collab].index)
    df_project_filtered_group[f"{collab_type}_2bin"] = df_project_filtered_group['repo_name'].apply(lambda x: int(x in above_set))

df_project_filtered_group = df_project_filtered_group.merge(df_agg_prs, how = 'left')
df_project_filtered_group[['prs_opened_predep','prs_opened_nondep']] = df_project_filtered_group[['prs_opened_predep','prs_opened_nondep']].fillna(0)

from source.lib.JMSLab.SaveData import SaveData
SaveData(df_project_filtered_group, ["repo_name","time_period"],
         'issue/project_collaboration.parquet',
         'issue/project_collaboration.log')




File 'issue/project_collaboration.parquet' saved successfully.


In [None]:
total_contributor_count

In [65]:
t = pd.merge(
    df_project_filtered_group,
    df_pr_with_actors.drop_duplicates(['repo_name', 'time_period']),
    on=['repo_name', 'time_period']
)

t['comm_ratio'] = (t.apply(lambda row: len(set(row['never_communicated_dept_ov_comm_2bin_actors']) & set(row['all_actors_period'])), axis=1)) / t['total_contributor_count_x']


In [66]:
df_pr_with_actors.columns.tolist()

['repo_name',
 'problem_id',
 'problem_id_num',
 'issues',
 'prs',
 'same_repo',
 'other_repo',
 'type',
 'time_period',
 'close_time',
 'comment_close_time',
 'pr_actors',
 'pr_contributions_dict',
 'issue_actors',
 'issue_contributions_dict',
 'important_actors',
 'important_actors_rolling',
 'treatment_period',
 'departed_actor_id',
 'all_actors',
 'problem_contr_count',
 'contributions_dict',
 'total_prob_contr',
 'all_actors_period',
 'ind_collab',
 'ind_collab_roll',
 'ind_key_collab_roll',
 'ind_other_collab_roll',
 'cont_collab_roll',
 'cont_collab_clean_char_roll',
 'cont_collab_clean_wd_roll',
 'review_count',
 'review_comment_count',
 'pr_opener',
 'commit_author',
 'pr_authors',
 'departed_involved',
 'key_contributor_count',
 'total_contributor_count',
 'departed_opener',
 'departed_author',
 'contr_count',
 'problem_count',
 'unlinked_issue_count',
 'unlinked_pr_count',
 'linked_issue_pr_count',
 'high_dept_ov_comm_2bin_actors',
 'low_dept_ov_comm_2bin_actors',
 'never_co

In [68]:
t.query('time_period>=treatment_period_x').groupby(['ind_key_collab_2bin'])['comm_ratio'].mean()

ind_key_collab_2bin
0    0.900279
1    0.895424
Name: comm_ratio, dtype: float64