In [1]:
import psycopg2
import pandas as pd 
import sqlalchemy as salc
import matplotlib
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import datetime
import json
warnings.filterwarnings('ignore')

with open("../../config.json") as config_file:
    config = json.load(config_file)

database_connection_string = 'postgresql+psycopg2://{}:{}@{}:{}/{}'.format(config['user'], config['password'], config['host'], config['port'], config['database'])

dbschema='augur_data'
engine = salc.create_engine(
    database_connection_string,
    connect_args={'options': '-csearch_path={}'.format(dbschema)})

In [2]:
repolist = pd.DataFrame()

repo_query = salc.sql.text(f"""
            select rg_name, repo_group_id, repo_name, d.repo_id, repo_git, forked_from, repo_archived, count(*) as issues
            from (
             SELECT a.rg_name as rg_name,
                a.repo_group_id as repo_group_id,
                b.repo_name as repo_name,
                b.repo_id as repo_id,
                b.repo_git as repo_git, 
                b.forked_from as forked_from,
                b.repo_archived as repo_archived
            FROM
                repo_groups a,
                repo b
            WHERE
                a.repo_group_id = b.repo_group_id 
            ORDER BY
                rg_name,
                repo_name) d, issues c 
            where d.repo_id = c.repo_id 
            group by rg_name, repo_git, repo_group_id, repo_name, d.repo_id, forked_from, repo_archived
                

    """)

repolist = pd.read_sql(repo_query, con=engine)

display(repolist)

repolist.dtypes

Unnamed: 0,rg_name,repo_group_id,repo_name,repo_id,repo_git,forked_from,repo_archived,issues
0,18f,25602,10x-apis-xtravaganza,125279,https://github.com/18f/10x-apis-xtravaganza,Parent not available,1.0,10
1,18f,25602,10x-dux-app,125423,https://github.com/18f/10x-dux-app,Parent not available,1.0,3
2,18f,25602,10x-dux-vuls-eval,125425,https://github.com/18f/10x-dux-vuls-eval,Parent not available,1.0,6
3,18f,25602,10x-mel,125410,https://github.com/18f/10x-mel,Parent not available,1.0,11
4,18f,25602,10x-mlaas,125245,https://github.com/18f/10x-mlaas,Parent not available,1.0,92
...,...,...,...,...,...,...,...,...
48106,zotero,25444,zotero-libreoffice-integration,27205,https://github.com/zotero/zotero-libreoffice-i...,Parent not available,0.0,63
48107,zotero,25444,zotero-schema,27431,https://github.com/zotero/zotero-schema,Parent not available,0.0,2
48108,zotero,25444,zotero-standalone-build,27218,https://github.com/zotero/zotero-standalone-build,Parent not available,0.0,58
48109,zotero,25444,zotero-word-for-mac-integration,27192,https://github.com/zotero/zotero-word-for-mac-...,Parent not available,0.0,34


rg_name           object
repo_group_id      int64
repo_name         object
repo_id            int64
repo_git          object
forked_from       object
repo_archived    float64
issues             int64
dtype: object

In [3]:
repolist.to_csv(path_or_buf='issues_with_git_url.csv')