In [5]:
### Script to get GitHub profile data of all Stargazers of a given GitHub repository 
###
###    by Max Woolf (@minimaxir)

import json
import csv
import urllib.request as urllib2
import urllib.request
import datetime
import time
import GHKeys as mystuff
import os.path
import pickle

In [27]:
def getInfo(user,repository):

    repo = user+"/"+repository

    fields = ["user_id", "username", "num_followers", "num_following", "num_repos","created_at","star_time"]
    page_number = 0
    users_processed = 0
    stars_remaining = True
    list_stars = []

    print("Gathering Stargazers for %s..." % repo)

    ###
    ###    This block of code creates a list of tuples in the form of (username, star_time)
    ###    for the Statgazers, which will laterbe used to extract full GitHub profile data
    ###

    while stars_remaining:
        query_url = "https://api.github.com/repos/%s/stargazers?page=%s&access_token=%s" % (repo, page_number, mystuff.access_token)

        req = urllib.request.Request(query_url)
        req.add_header('Accept', 'application/vnd.github.v3.star+json')
        try:
            response = urllib.request.urlopen(req)
        except:
            pass
        data = json.loads(response.read())

        for user in data:
            username = user['user']['login']

            star_time = datetime.datetime.strptime(user['starred_at'],'%Y-%m-%dT%H:%M:%SZ')
            star_time = star_time + datetime.timedelta(hours=-5) # EST
            star_time = star_time.strftime('%Y-%m-%d %H:%M:%S')

            list_stars.append((username, star_time))

        if len(data) < 25:
            stars_remaining = False

        page_number += 1

    print("Done Gathering Stargazers for %s!" % repo)

    list_stars = list(set(list_stars)) # remove dupes

    print("Now Gathering Stargazers' GitHub Profiles...")

    ###
    ###    This block of code extracts the full profile data of the given Stargazer
    ###    and writes to CSV
    ###

    with open('%s-stargazers.csv' % repo.split('/')[1], 'w') as stars:
        print(fields)
        stars_writer = csv.writer(stars)
        stars_writer.writerow(fields)

        for user in list_stars:
            username = user[0]

            query_url = "https://api.github.com/users/%s?access_token=%s" % (username, mystuff.access_token)

            req = urllib.request.Request(query_url)
            try:
                response = urllib.request.urlopen(req)
            except:
                pass
            data = json.loads(response.read())

            user_id = data['id']
            num_followers = data['followers']
            num_following = data['following']
            num_repos = data['public_repos']

            created_at = datetime.datetime.strptime(data['created_at'],'%Y-%m-%dT%H:%M:%SZ')
            created_at = created_at + datetime.timedelta(hours=-5) # EST
            created_at = created_at.strftime('%Y-%m-%d %H:%M:%S')

            stars_writer.writerow([user_id, username, num_followers, num_following, num_repos, created_at, user[1]])

            users_processed += 1

            if users_processed % 100 == 0:
                print("%s Users Processed: %s" % (users_processed, datetime.datetime.now()))

            time.sleep(1) # stay within API rate limit of 5000 requests / hour + buffer

In [28]:
getInfo("kelu124","vit3rick")
getInfo("kelu124","lit3rick")
getInfo("kelu124","pyUn0-lib")
getInfo("kelu124","un0rick")
getInfo("kelu124","echomods")

Gathering Stargazers for kelu124/vit3rick...
Done Gathering Stargazers for kelu124/vit3rick!
Now Gathering Stargazers' GitHub Profiles...
['user_id', 'username', 'num_followers', 'num_following', 'num_repos', 'created_at', 'star_time']
Gathering Stargazers for kelu124/lit3rick...
Done Gathering Stargazers for kelu124/lit3rick!
Now Gathering Stargazers' GitHub Profiles...
['user_id', 'username', 'num_followers', 'num_following', 'num_repos', 'created_at', 'star_time']
Gathering Stargazers for kelu124/pyUn0-lib...
Done Gathering Stargazers for kelu124/pyUn0-lib!
Now Gathering Stargazers' GitHub Profiles...
['user_id', 'username', 'num_followers', 'num_following', 'num_repos', 'created_at', 'star_time']


In [72]:
def getWatcher(USERNAME,repository):

    repo = USERNAME+"/"+repository

    fields = ["user_id", "username", "num_followers", "num_following", "num_repos","created_at","star_time"]
    page_number = 0
    users_processed = 0
    stars_remaining = True
    list_stars = []

    print("Gathering subscribers for %s..." % repo)

    ###
    ###    This block of code creates a list of tuples in the form of (username, star_time)
    ###    for the Statgazers, which will laterbe used to extract full GitHub profile data
    ###

    while stars_remaining:
        query_url = "https://api.github.com/repos/%s/subscribers?page=%s&access_token=%s" % (repo, page_number, mystuff.access_token)

        req = urllib.request.Request(query_url)
        req.add_header('Accept', 'application/vnd.github.v3.star+json')
        try:
            response = urllib.request.urlopen(req)
        except:
            pass
        data = json.loads(response.read())
        #print(data)
        for user in data:
            username = user['login']

            list_stars.append(username)

        if len(data) < 25:
            stars_remaining = False

        page_number += 1

    print("Done Gathering subscribers for %s!" % repo)
    with open("./project_pkl/"+USERNAME+"_"+repository+'_suscribers.pkl', 'wb') as f:
        pickle.dump(list_stars, f)
    return list_stars

In [71]:
getWatcher("kelu124","vit3rick")
getWatcher("kelu124","lit3rick")
getWatcher("kelu124","pyUn0-lib")
getWatcher("kelu124","un0rick")
getWatcher("kelu124","echomods")

Gathering subscribers for kelu124/vit3rick...
Done Gathering subscribers for kelu124/vit3rick!
Gathering subscribers for kelu124/lit3rick...
Done Gathering subscribers for kelu124/lit3rick!
Gathering subscribers for kelu124/pyUn0-lib...
Done Gathering subscribers for kelu124/pyUn0-lib!
Gathering subscribers for kelu124/un0rick...
Done Gathering subscribers for kelu124/un0rick!
Gathering subscribers for kelu124/echomods...
Done Gathering subscribers for kelu124/echomods!


['kelu124',
 'DINKIN',
 'openube',
 'bobnet',
 'fabiogiordani',
 'blakme',
 'antouanbg',
 'boyihao-utp',
 'wlmeng11',
 'docfabio',
 'thecomicopera',
 'ChunHsinTung',
 'Doom4535',
 'dmitry-sukhoruchkin',
 'aperocks',
 '10syle',
 'g4idrijs',
 'AnttiLukats',
 'yuntianrenren',
 'JeanRintoul',
 'diegslva',
 'basharbme',
 'mxochicale',
 'Felipeasg',
 'prasimix',
 'ykeraichia',
 'Bobbyjoness',
 'hetykai',
 'justicelee',
 'cmadh',
 'kelu124',
 'DINKIN',
 'openube',
 'bobnet',
 'fabiogiordani',
 'blakme',
 'antouanbg',
 'boyihao-utp',
 'wlmeng11',
 'docfabio',
 'thecomicopera',
 'ChunHsinTung',
 'Doom4535',
 'dmitry-sukhoruchkin',
 'aperocks',
 '10syle',
 'g4idrijs',
 'AnttiLukats',
 'yuntianrenren',
 'JeanRintoul',
 'diegslva',
 'basharbme',
 'mxochicale',
 'Felipeasg',
 'prasimix',
 'ykeraichia',
 'Bobbyjoness',
 'hetykai',
 'justicelee',
 'cmadh',
 'panzerkiller',
 'serhargun']

In [73]:
def getForks(USERNAME,repository):

    repo = USERNAME+"/"+repository

    fields = ["user_id", "username", "num_followers", "num_following", "num_repos","created_at","star_time"]
    page_number = 0
    users_processed = 0
    stars_remaining = True
    list_stars = []

    print("Gathering subscribers for %s..." % repo)

    ###
    ###    This block of code creates a list of tuples in the form of (username, star_time)
    ###    for the Statgazers, which will laterbe used to extract full GitHub profile data
    ###

    while stars_remaining:
        query_url = "https://api.github.com/repos/%s/forks?page=%s&access_token=%s" % (repo, page_number, mystuff.access_token)

        req = urllib.request.Request(query_url)
        req.add_header('Accept', 'application/vnd.github.v3.star+json')
        try:
            response = urllib.request.urlopen(req)
        except:
            pass
        data = json.loads(response.read())
        #print(data)
        for user in data:
            username = user['owner']['login']

            list_stars.append(username)

        if len(data) < 25:
            stars_remaining = False

        page_number += 1

    print("Done Gathering subscribers for %s!" % repo)
    with open("./project_pkl/"+USERNAME+"_"+repository+'_forks.pkl', 'wb') as f:
        pickle.dump(list_stars, f)
    return list_stars

In [67]:
getForks("kelu124","vit3rick")
getForks("kelu124","lit3rick")
getForks("kelu124","pyUn0-lib")
getForks("kelu124","un0rick")
getForks("kelu124","echomods")

Gathering subscribers for kelu124/vit3rick...
Done Gathering subscribers for kelu124/vit3rick!
Gathering subscribers for kelu124/lit3rick...
Done Gathering subscribers for kelu124/lit3rick!
Gathering subscribers for kelu124/pyUn0-lib...
Done Gathering subscribers for kelu124/pyUn0-lib!
Gathering subscribers for kelu124/un0rick...
Done Gathering subscribers for kelu124/un0rick!
Gathering subscribers for kelu124/echomods...
Done Gathering subscribers for kelu124/echomods!


['sysclock',
 'mtubpeng1',
 'kevinn1999',
 'mohammadfatemieh',
 'panzerkiller',
 'oring111',
 'Yarulik',
 'Hendryputra',
 'fabriciopk',
 'r4forth',
 'trungnpt',
 'riscmcu',
 'fcqing',
 'DuoBelt',
 'dev1104',
 'jammie',
 'ascillato',
 'octavianCraciun',
 'YaweiZhao',
 'djson',
 'awantae',
 'basharbme',
 'sunjiangbo',
 'h798471649',
 'zhangsinap',
 'Graal76',
 'TobiG77',
 'amashal',
 'ZebraTech',
 'vck',
 'sysclock',
 'mtubpeng1',
 'kevinn1999',
 'mohammadfatemieh',
 'panzerkiller',
 'oring111',
 'Yarulik',
 'Hendryputra',
 'fabriciopk',
 'r4forth',
 'trungnpt',
 'riscmcu',
 'fcqing',
 'DuoBelt',
 'dev1104',
 'jammie',
 'ascillato',
 'octavianCraciun',
 'YaweiZhao',
 'djson',
 'awantae',
 'basharbme',
 'sunjiangbo',
 'h798471649',
 'zhangsinap',
 'Graal76',
 'TobiG77',
 'amashal',
 'ZebraTech',
 'vck',
 'dzwpusa',
 'maswangy',
 'trigrass2',
 'jtwiefel',
 '416104443',
 'docfabio',
 'AdLibitumTechnologies',
 'QuantumZhang',
 'zhouzhuhuang',
 'dezdeepblue',
 'huleg',
 'soobash',
 'mahiralta

In [74]:
def getStars(USERNAME,repository):

    repo = USERNAME+"/"+repository

    fields = ["user_id", "username", "num_followers", "num_following", "num_repos","created_at","star_time"]
    page_number = 0
    users_processed = 0
    stars_remaining = True
    list_stars = []

    print("Gathering Stargazers for %s..." % repo)

    ###
    ###    This block of code creates a list of tuples in the form of (username, star_time)
    ###    for the Statgazers, which will laterbe used to extract full GitHub profile data
    ###

    while stars_remaining:
        query_url = "https://api.github.com/repos/%s/stargazers?page=%s&access_token=%s" % (repo, page_number, mystuff.access_token)

        req = urllib.request.Request(query_url)
        req.add_header('Accept', 'application/vnd.github.v3.star+json')
        try:
            response = urllib.request.urlopen(req)
        except:
            pass
        data = json.loads(response.read())

        for user in data:
            username = user['user']['login']


            list_stars.append(username)

        if len(data) < 25:
            stars_remaining = False

        page_number += 1

    print("Done Gathering Stargazers for %s!" % repo)

    list_stars = list(set(list_stars)) # remove dupes
    print("Done Gathering subscribers for %s!" % repo)
    with open("./project_pkl/"+USERNAME+"_"+repository+'_stars.pkl', 'wb') as f:
        pickle.dump(list_stars, f)
    return list_stars

In [69]:
getStars("kelu124","vit3rick")
getStars("kelu124","lit3rick")
getStars("kelu124","pyUn0-lib")
getStars("kelu124","un0rick")
getStars("kelu124","echomods")

Gathering Stargazers for kelu124/vit3rick...
Done Gathering Stargazers for kelu124/vit3rick!
Done Gathering subscribers for kelu124/vit3rick!
Gathering Stargazers for kelu124/lit3rick...
Done Gathering Stargazers for kelu124/lit3rick!
Done Gathering subscribers for kelu124/lit3rick!
Gathering Stargazers for kelu124/pyUn0-lib...
Done Gathering Stargazers for kelu124/pyUn0-lib!
Done Gathering subscribers for kelu124/pyUn0-lib!
Gathering Stargazers for kelu124/un0rick...
Done Gathering Stargazers for kelu124/un0rick!
Done Gathering subscribers for kelu124/un0rick!
Gathering Stargazers for kelu124/echomods...
Done Gathering Stargazers for kelu124/echomods!
Done Gathering subscribers for kelu124/echomods!


['rrozestw',
 'goeblr',
 'rapha-opensource',
 'r06548032',
 'harlanw',
 'hwwong',
 'panamantis',
 'thewtex',
 'boyihao-utp',
 'bhattisatish',
 'zentown',
 'mlp6',
 'kelu124',
 'DircoDT',
 'davidonet',
 'diegslva',
 'tangym',
 'alsrgv',
 'matlab379',
 'DINKIN',
 'rene-aguirre',
 'yyyg45',
 'kovachwt',
 'uid89626',
 'jaanpehechaanho',
 'selftaught',
 'Felipeasg',
 'wly0215',
 'suyashkumar',
 'mediovia',
 'bensooter',
 'robksawyer',
 'anfractuosity',
 'mxochicale',
 'XinShuYang',
 'ARezaK',
 'qinshiysb',
 'julescarbon',
 'cleder',
 'orakle',
 'elmsfu',
 'shakhmetov',
 'openube',
 'paul-monokoto',
 'Guokr1991',
 'baibaratsky',
 'robotong',
 'Ritaqk',
 'Jingfeng-LU',
 'Bobbyjoness',
 'clf1985',
 'DuoBelt',
 'milkowski',
 'DigiMaxIO',
 'IanBoyanZhang',
 'Doom4535',
 'lancemk',
 'Palantir555',
 'wlmeng11',
 '07bm27',
 'annafieber',
 'JoyDoSun',
 'cooldenng',
 'pkral78',
 'josecohenca',
 'klonggan',
 'DannyRavi',
 'eobodo',
 'Alberts00',
 'guillaumedavidphd',
 'lograss',
 'speters',
 'loredana

In [4]:
import glob, os
AllUsers = []
for file in glob.glob("./project_pkl/*.pkl"):
    #print(file)
    with open(file, 'rb') as f:
        listusers = pickle.load(f)
    for OneUser in listusers:
        AllUsers.append(OneUser)
        
AllUsersList = list(set(AllUsers))

In [25]:
def CreateUsers(AllUsersList):

    for user in AllUsersList:
        username = user
        FILENAME = "./users_pkl/"+username+".pkl"
        
        if not os.path.isfile(FILENAME):        
            query_url = "https://api.github.com/users/%s?access_token=%s" % (username, mystuff.access_token)

            req = urllib.request.Request(query_url)
            try:
                response = urllib.request.urlopen(req)
            except:
                pass
            data = json.loads(response.read())

            user_id = data['id']
            num_followers = data['followers']
            num_following = data['following']
            num_repos = data['public_repos']

            created_at = datetime.datetime.strptime(data['created_at'],'%Y-%m-%dT%H:%M:%SZ')
            created_at = created_at + datetime.timedelta(hours=-5) # EST
            created_at = created_at.strftime('%Y-%m-%d %H:%M:%S')

            with open(FILENAME, 'wb') as f:
                pickle.dump(data, f)
            print (FILENAME+" saved.")
            time.sleep(1) # stay within API rate limit of 5000 requests / hour + buffer

In [29]:
CreateUsers(AllUsersList)

In [None]:
def getStars(USERNAME):


    page_number = 0
    users_processed = 0
    stars_remaining = True
    list_stars = []
 

    ###
    ###    This block of code creates a list of tuples in the form of (username, star_time)
    ###    for the Statgazers, which will laterbe used to extract full GitHub profile data
    ###
    FILENAME = "./users_pkl/"+USERNAME+"_stars.pkl"
    if not os.path.isfile(FILENAME):
        
        while stars_remaining:
            
            time.sleep(1)
            query_url = "https://api.github.com/users/%s/starred?page=%s&access_token=%s" % (USERNAME, page_number, mystuff.access_token)

            req = urllib.request.Request(query_url)
            req.add_header('Accept', 'application/vnd.github.v3.star+json')
            try:
                response = urllib.request.urlopen(req)
            except:
                pass
            data = json.loads(response.read())
            #print(data)
            for user in data:
                name = user["repo"]['name']
                full_name = user["repo"]['full_name']

                list_stars.append((name,full_name))

            if len(data) < 25:
                stars_remaining = False

            page_number += 1
            
        list_stars = list(set(list_stars)) # remove dupes 
        with open(FILENAME, 'wb') as f:
            pickle.dump(list_stars, f)
        print (FILENAME+" saved.")
        return list_stars
    else:
        (FILENAME+" existed.")
    return 'Exists'

In [None]:
for k in AllUsersList:
    getStars(k)