In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from scripts.oso_db import execute_query, query_and_dump_to_csv
from visualizations.contribution_heatmap import contribution_heatmap
from visualizations.stacked_area import activity_plot, EVENT_TYPES

In [None]:
query = """
    SELECT
        p."name" AS project_name,
        a."name" AS github_repo,
        e."fromId" AS contributor_id,
        e."type" AS event_type,
        e."id" AS event_id,
        e."time" AS event_time
    FROM
        project p
    JOIN
        project_artifacts_artifact paa ON p."id" = paa."projectId"
    JOIN
        artifact a ON paa."artifactId" = a."id"
    JOIN
        event e ON a."id" = e."toId"
    WHERE
        a."namespace" = 'GITHUB'
        AND p."slug" = 'gitcoin'
        AND NOT a."name" IN (
            'gitcoinco/mattermost-server',
            'gitcoinco/mattermost-webapp',
            'gitcoinco/mattermost-redux',
            'gitcoinco/mattermost-plugin-gitlab',
            'gitcoinco/django-cacheops',
            'gitcoinco/contract-metadata',
            'gitcoinco/discourse-adplugin',
            'gitcoinco/slack-protector',
            'gitcoinco/snapshot-spaces',
            'gitcoinco/sourcecred',
            'gitcoinco/cred',
            'gitcoinco/code_fund_ads',
            'gitcoinco/EIPs',
            'gitcoinco/codefund',
            'gitcoinco/lemonade-stand',
            'gitcoinco/coz-slack-ico-management-platform',
            'gitcoinco/desktop-app'
        )
"""

result = execute_query(query, col_names=True)
df = pd.DataFrame(result[1:], columns=result[0])
df['date'] = df['event_time'].apply(lambda x: x.date())
df['month'] = pd.PeriodIndex(df.event_time, freq='M')
df['quarter'] = pd.PeriodIndex(df.event_time, freq='Q')
df.drop(columns=['event_time'], inplace=True)
df

In [None]:
len(df[df['event_type'] == 'COMMIT_CODE'])

In [None]:
len(df[df['event_type'] == 'COMMIT_CODE']['contributor_id'].unique())

In [None]:
len(df['github_repo'].unique())

In [None]:
df['']

In [None]:
contrib_counts = df[df.event_type.isin(EVENT_TYPES)]['contributor_id'].value_counts()
filtered_contribs = contrib_counts[contrib_counts>10].index

repo_star_counts = df[df.event_type == 'STARRED'].groupby('github_repo')['event_id'].count()
repo_commit_counts = df[df.event_type == 'COMMIT_CODE'].groupby('github_repo')['event_id'].count()
filtered_repos = set(repo_star_counts[repo_star_counts>1].index).union(set(repo_commit_counts[repo_commit_counts>1].index))

dff = df[df['github_repo'].isin(filtered_repos) * df['contributor_id'].isin(filtered_contribs)]

dff

In [None]:
df_contrib_counts = (dff[dff.event_type.isin(['COMMIT_CODE'])]
 .groupby(['month', 'contributor_id'])['event_id']
 .count().rename('contributions')
 .reset_index())

fulltime_threshold = 10
df_contrib_counts['dev_type'] = df_contrib_counts['contributions'].apply(lambda x: 'full-time' if x > fulltime_threshold else 'part-time')
df_contrib_counts['dev_type']
df_contrib_counts['devs'] = 1

activity_plot(
    df_contrib_counts,
    'dev_type',
    date_col='month',
    value_col='devs',
    filter_col='dev_type',
    filter_vals=['full-time', 'part-time'],
    ylabel='Monthly Active Contributors',
    start_date='2018-01-01',
    end_date='2023-08-31',
)

In [None]:
activity_plot(
    df_contrib_counts,
    'dev_type',
    date_col='month',
    value_col='devs',
    filter_col='dev_type',
    filter_vals=['full-time'],
    ylabel='Monthly Active Contributors',
    start_date='2018-01-01',
    end_date='2023-08-31',
)

In [None]:
activity_df = dff.copy()
activity_df['num_contributions'] = 1
activity_plot(activity_df, 
              'event_type', 
              date_col='month', 
              start_date='2018-01-01',
              end_date='2023-08-31',
              filter_vals=['COMMIT_CODE', 'PULL_REQUEST_CREATED', 'ISSUE_CREATED'])

In [None]:
df_activity = (
    dff.groupby(['github_repo', 'event_type'])
    ['event_id'].count()
    .reset_index()
    .rename(columns={'event_id': 'total_contributions'})
)

contribution_heatmap(
    df_activity,
    index_col='github_repo',
    column_col='event_type',
    value_col='total_contributions',
    figsize=(8,22),
    vmax=500,
    sort_label_method='mean'
)

In [None]:
df_commits = (
    dff[dff['event_type']=='COMMIT_CODE']
    .groupby(['github_repo', 'quarter'])
    ['event_id'].count()
    .reset_index()
    .rename(columns={'event_id': 'total_commits'})
)
contribution_heatmap(
    df_commits, 
    index_col='github_repo', 
    column_col='quarter', 
    value_col='total_commits', 
    vmax=50,
    figsize=(8,22)
)

In [None]:
df_prs = (
    dff[dff['event_type']=='PULL_REQUEST_APPROVED']
    .groupby(['github_repo', 'month'])
    ['event_id'].count()
    .reset_index()
    .rename(columns={'event_id': 'total_prs_approved'})
)
contribution_heatmap(
    df_prs, 
    index_col='github_repo', 
    column_col='month', 
    value_col='total_prs_approved', 
    vmax=50,
#    figsize=(8,22)
)

In [None]:
df_contribs = (
    dff[dff['event_type'].isin(EVENT_TYPES)]
    .groupby(['github_repo', 'quarter'])
    ['contributor_id'].nunique()
    .reset_index()
    .rename(columns={'contributor_id': 'total_contributors'})
)
contribution_heatmap(
    df_contribs, 
    index_col='github_repo', 
    column_col='quarter', 
    value_col='total_contributors', 
    vmax=10,
    figsize=(8,22)
)

In [None]:
df[df['event_type'] == 'PULL_REQUEST_APPROVED']['contributor_id'].value_counts().head(20)

In [None]:
df_issues = (
    df[df['event_type'].isin(['ISSUE_CLOSED','ISSUE_CREATED'])]
    .pivot_table(index='contributor_id', columns='event_type', values='event_id', aggfunc='count')
).dropna()
df_issues['NUM_ISSUES'] = df_issues.sum(axis=1)
df_issues.sort_values(by='NUM_ISSUES', ascending=False).head(20)