In [1]:
import pandas as pd
import requests
import operator
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use("ggplot")

In [2]:
TOKEN = "INSERT_YOUR_TOKEN_HERE"
LANGUAGES = ["java", "ruby", "php", "javascript", "ruby", "go"]

In [3]:
def graphql(data, variables={}):
    results = requests.post("https://api.github.com/graphql",  json={'query': data, 'variables': variables},
                      headers={"Authorization": "Bearer %s" % TOKEN})
    return results.json()


def fetchTopProjects(language_name):
    topProjectsQuery = """
    query TopProjects($search: String!) {
      search(first: 50, query: $search, type: REPOSITORY) {
        edges {
          node {
            ... on Repository {
              name
              createdAt
              description
              owner {
                login
              }
              stargazers {
                totalCount
              }
              defaultBranchRef {
                name
              }
            }
          }
        }
      }
    }
    """
    
    results = graphql(topProjectsQuery, {"search": "language:%s" % language_name})
    projects = results.get("data").get("search").get("edges")
    
    return [{
        "language": language_name,
        "name": project.get("node").get("name"),
        "owner": project.get("node").get("owner").get("login"),
        "description": project.get("node").get("description"),
        "createdAt": project.get("node").get("createdAt"),
        "defaultBranch": project.get("node").get("defaultBranchRef").get("name"),
        "stars": project.get("node").get("stargazers").get("totalCount")
    } for project in projects]


def fetchContributitors(owner, project_name):
    results = requests.get("https://api.github.com/repos/%s/%s/stats/contributors" % (owner, project_name),
                           headers={"Authorization": "token %s" % TOKEN})

    return [{
        "login": result.get("author").get("login"),
        "commits": int(result.get("total"))
    } for result in results.json()]

In [4]:
projects = pd.DataFrame(columns=["language", "name","owner", "description", "createdAt", "defaultBranch", "stars"])
projects['stars'] = projects['stars'].astype('int')
for language in LANGUAGES:
    result = pd.DataFrame(fetchTopProjects(language))
    projects = pd.concat([projects, result], sort=False)

In [5]:
projects.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 300 entries, 0 to 49
Data columns (total 7 columns):
language         300 non-null object
name             300 non-null object
owner            300 non-null object
description      299 non-null object
createdAt        300 non-null object
defaultBranch    300 non-null object
stars            300 non-null int64
dtypes: int64(1), object(6)
memory usage: 18.8+ KB


## Averange of stars by language

In [6]:
projects[["language", "stars"]].groupby(["language"]).mean()

Unnamed: 0_level_0,stars
language,Unnamed: 1_level_1
go,16699.92
java,17237.62
javascript,47400.2
php,10072.28
ruby,13039.62


## Top 10 Projects

In [8]:
projects.sort_values(by=['stars'], ascending=False).head(10)

Unnamed: 0,language,name,owner,description,createdAt,defaultBranch,stars
0,javascript,freeCodeCamp,freeCodeCamp,The https://freeCodeCamp.org open source codeb...,2014-12-24T17:49:19Z,staging,293569
1,javascript,vue,vuejs,"ðŸ–– A progressive, incrementally-adoptable JavaS...",2013-07-29T03:24:51Z,dev,111318
2,javascript,react,facebook,"A declarative, efficient, and flexible JavaScr...",2013-05-24T16:15:54Z,master,109100
3,javascript,d3,d3,"Bring data to life with SVG, Canvas and HTML. ...",2010-09-27T17:22:42Z,master,78108
4,javascript,javascript,airbnb,JavaScript Style Guide,2012-11-01T23:13:50Z,master,74870
5,javascript,react-native,facebook,A framework for building native apps with React.,2015-01-09T18:10:16Z,master,67644
6,javascript,angular.js,angular,AngularJS - HTML enhanced for web apps!,2010-01-06T00:34:37Z,master,58926
7,javascript,Font-Awesome,FortAwesome,"The iconic SVG, font, and CSS toolkit",2012-02-17T14:19:43Z,master,57249
8,javascript,create-react-app,facebook,Create React apps with no build configuration.,2016-07-17T14:55:11Z,next,53971
9,javascript,node,nodejs,Node.js JavaScript runtime :sparkles::turtle::...,2014-11-26T19:57:11Z,master,52410


In [44]:
test = fetchContributitors("golang", "go")

In [45]:
contributors = pd.DataFrame(test)

In [94]:
def getCoreDeveloper(developers):
    totalPercent = 0.0
    coreDevelopers = []
    developers = pd.DataFrame(developers).sort_values(by=["commits"], ascending=False)
    commitsCount = developers["commits"].sum()
    for _, developer in developers.iterrows():
        percent = developer.get("commits") / commitsCount
        if totalPercent < 0.8 and percent > 0.05:
            totalPercent += percent
            coreDevelopers.append(developer)
            
    return coreDevelopers
    

In [95]:
relationship = pd.DataFrame(columns=["project", "login"])
projects = projects.head(10)
for _, project in projects.iterrows():
    projectSlug = "{0}/{1}".format(project.get("owner"),  project.get("name"))
    print("fetching %s..." % projectSlug)
    contributors = fetchContributitors(project.get("owner"), project.get("name"))
    coreDevelopers = getCoreDeveloper(pd.DataFrame(contributors))
    
    for developer in coreDevelopers:
        data = {"project": projectSlug, "login": developer.get("login")}
        relationship = relationship.append(data, ignore_index=True)

fetching iluwatar/java-design-patterns...
fetching ReactiveX/RxJava...
fetching elastic/elasticsearch...
fetching square/retrofit...
fetching square/okhttp...
fetching spring-projects/spring-boot...
fetching google/guava...
fetching kdn251/interviews...
fetching PhilJay/MPAndroidChart...
fetching spring-projects/spring-framework...


In [97]:
relationship.groupby(["login"]).count()

Unnamed: 0_level_0,project
login,Unnamed: 1_level_1
Bezier89,1
JakeWharton,2
PhilJay,1
adriancole,1
akarnokd,1
benjchristensen,1
cbeams,1
cgdecker,1
cpovirk,1
danielgindi,1


In [98]:
relationship.groupby(["project"]).count()

Unnamed: 0_level_0,login
project,Unnamed: 1_level_1
PhilJay/MPAndroidChart,2
ReactiveX/RxJava,3
elastic/elasticsearch,4
google/guava,5
iluwatar/java-design-patterns,2
kdn251/interviews,2
spring-projects/spring-boot,4
spring-projects/spring-framework,4
square/okhttp,4
square/retrofit,1
