In [1]:
# This script requires github3.py version 0.9.6
# pip install github3.py

import os
import pandas as pd
from github3 import login

GITHUB_API_TOKEN = os.environ.get('GITHUB_API_TOKEN')
gh = login(token=GITHUB_API_TOKEN)
LOCATION = 'Recife'

def queries_list(location=LOCATION):
    return (
        'location:{} followers:>1'.format(location),
        'location:{} followers:1'.format(location),
        'location:{} followers:0'.format(location),
    )

def search_users(query):
    return [u.user for u in gh.search_users(query, sort='followers')]


def fetch_gh_users():
    return [search_users(q) for q in queries_list()]


def create_users_df(batches):
    return pd.concat(
        [pd.DataFrame({'User': u}) for u in batches],
        ignore_index=True)


def followers_iter(gh_user):
    return ','.join([str(u) for u in gh_user.iter_followers()])


def map_f_batch(map_fn, users_iterator):
    return pd.DataFrame({'Followers': [map_fn(u) for u in users_iterator]})


def create_followers_df(batches):
    return pd.concat(
        [map_f_batch(followers_iter, u) for u in batches],
        ignore_index=True)


In [2]:
ghusers_batches = fetch_gh_users()
#
gh_users = create_users_df(ghusers_batches)
# gh_users.to_csv('gh_users.csv', index=False)

gh_followers = create_followers_df(ghusers_batches)
gh_followers.columns = ['Followers']
# gh_followers.to_csv('gh_followers.csv', index=False)

In [3]:
gh_users.describe()
#

Unnamed: 0,User
count,1899.0
unique,996.0
top,
freq,867.0


In [11]:
gh_followers.describe()

Unnamed: 0,User
count,1899.0
unique,996.0
top,
freq,867.0


In [9]:
gh_users.head()

Unnamed: 0,User
0,tarruda
1,joselitojunior1
2,mairatma
3,marcelcaraciolo
4,luanfonceca


In [31]:
gh_users_followers = gh_users.join(gh_followers)

In [36]:
gh_users_followers

Unnamed: 0,User,Followers
0,tarruda,"Sannis,danielmahon,csjaba,FergusRedican,Victor..."
1,joselitojunior1,"renatooliveira,jeffesonmaia,jotaefe,duartefq,J..."
2,mairatma,"brunocoelho,henvic,eduardolundgren,aperrelli,a..."
3,marcelcaraciolo,"thiagoarrais,brunojm,henriquebastos,macndesign..."
4,luanfonceca,"brunohenrique,luizvarela,gladson,lucasbibiano,..."
5,deividazevedo2,"JoaquimCMH,Widancassio,LuizAntonioPS,yhikishim..."
6,gileno,"brunojm,rafaelcaricio,frenetic,adonescunha,mar..."
7,simoneas02,"ElsonBarcelos,IsabelaDePaula,sergiockd,douglas..."
8,filipeximenes,"luisgabriel,renatooliveira,marciobarbosa,anton..."
9,renatooliveira,"adrianomelo,x8lucas8x,luisgabriel,joselitojuni..."


In [80]:
gh_users_followers['Followers'].apply(len)

0       5425
1       4120
2       3630
3       3586
4       3329
5       2803
6       2596
7       2369
8       1824
9       1810
10      1810
11      1387
12      1178
13      1344
14      1378
15      1250
16      1163
17      1225
18      1161
19      1081
20      1036
21       939
22       952
23       828
24       810
25       866
26       892
27       800
28       723
29       673
        ... 
1869       0
1870       0
1871       0
1872       0
1873       0
1874       0
1875       0
1876       0
1877       0
1878       0
1879       0
1880       0
1881       0
1882       0
1883       0
1884       0
1885       0
1886       0
1887       0
1888       0
1889       0
1890       0
1891       0
1892       0
1893       0
1894       0
1895       0
1896       0
1897       0
1898       0
Name: Followers, Length: 1899, dtype: int64

In [89]:
gh_users_followers['nFollowers'] = gh_users_followers['Followers'].apply(lambda x: (len(x) and len(x.split(','))) or len(x))

gh_users_followers

Unnamed: 0,User,Followers,nFollowers
0,tarruda,"Sannis,danielmahon,csjaba,FergusRedican,Victor...",561
1,joselitojunior1,"renatooliveira,jeffesonmaia,jotaefe,duartefq,J...",346
2,mairatma,"brunocoelho,henvic,eduardolundgren,aperrelli,a...",334
3,marcelcaraciolo,"thiagoarrais,brunojm,henriquebastos,macndesign...",332
4,luanfonceca,"brunohenrique,luizvarela,gladson,lucasbibiano,...",297
5,deividazevedo2,"JoaquimCMH,Widancassio,LuizAntonioPS,yhikishim...",268
6,gileno,"brunojm,rafaelcaricio,frenetic,adonescunha,mar...",230
7,simoneas02,"ElsonBarcelos,IsabelaDePaula,sergiockd,douglas...",206
8,filipeximenes,"luisgabriel,renatooliveira,marciobarbosa,anton...",163
9,renatooliveira,"adrianomelo,x8lucas8x,luisgabriel,joselitojuni...",161


In [92]:
gh_users_followers.to_csv('gh_users_followers.csv', index=False, columns=['User', 'nFollowers', 'Followers'])