In [7]:
import pandas as pd
import requests
import operator
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use("ggplot")

In [40]:
TOKEN = "INSERT_YOUR_TOKEN_HERE"
LANGUAGES = ["java", "ruby", "php", "javascript", "ruby", "go"]

In [38]:
def graphql(data, variables={}):
    results = requests.post("https://api.github.com/graphql",  json={'query': data, 'variables': variables},
                      headers={"Authorization": "Bearer %s" % TOKEN})
    return results.json()


def fetchTopProjects(language_name):
    topProjectsQuery = """
    query TopProjects($search: String!) {
      search(first: 50, query: $search, type: REPOSITORY) {
        edges {
          node {
            ... on Repository {
              name
              createdAt
              description
              owner {
                login
              }
              stargazers {
                totalCount
              }
              defaultBranchRef {
                name
              }
            }
          }
        }
      }
    }
    """
    
    results = graphql(topProjectsQuery, {"search": "language:%s" % language_name})
    projects = results.get("data").get("search").get("edges")
    
    return [{
        "language": language_name,
        "name": project.get("node").get("name"),
        "owner": project.get("node").get("owner").get("login"),
        "description": project.get("node").get("description"),
        "createdAt": project.get("node").get("createdAt"),
        "defaultBranch": project.get("node").get("defaultBranchRef").get("name"),
        "stars": project.get("node").get("stargazers").get("totalCount")
    } for project in projects]


def fetchContributitors(owner, project_name):
    results = requests.get("https://api.github.com/repos/%s/%s/stats/contributors" % (owner, project_name),
                           headers={"Authorization": "token %s" % TOKEN})

    return [{
        "login": result.get("author").get("login"),
        "commits": int(result.get("total"))
    } for result in results.json()]

In [28]:
projects = pd.DataFrame(columns=["language", "name","owner", "description", "createdAt", "defaultBranch", "stars"])
projects['stars'] = projects['stars'].astype('int')
for language in LANGUAGES:
    result = pd.DataFrame(fetchTopProjects(language))
    projects = pd.concat([projects, result], sort=False)

In [30]:
projects.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 300 entries, 0 to 49
Data columns (total 7 columns):
language         300 non-null object
name             300 non-null object
owner            300 non-null object
description      299 non-null object
createdAt        300 non-null object
defaultBranch    300 non-null object
stars            300 non-null int64
dtypes: int64(1), object(6)
memory usage: 18.8+ KB


## Averange of stars by language

In [26]:
projects[["language", "stars"]].groupby(["language"]).mean()

Unnamed: 0_level_0,stars
language,Unnamed: 1_level_1
go,16697.62
java,17236.5
javascript,47395.22
php,10071.8
ruby,13038.56


In [22]:
projects.dtypes

language         object
name             object
owner            object
description      object
createdAt        object
defaultBranch    object
stars            object
dtype: object

## Top 10 Projects

In [79]:
projects.sort_values(by=['stars'], ascending=False).head(10)

Unnamed: 0,createdAt,defaultBranch,description,language,name,owner,stars
0,2014-12-24T17:49:19Z,staging,The https://freeCodeCamp.org open source codeb...,javascript,freeCodeCamp,freeCodeCamp,293546.0
1,2013-07-29T03:24:51Z,dev,"🖖 A progressive, incrementally-adoptable JavaS...",javascript,vue,vuejs,111214.0
2,2013-05-24T16:15:54Z,master,"A declarative, efficient, and flexible JavaScr...",javascript,react,facebook,109022.0
3,2010-09-27T17:22:42Z,master,"Bring data to life with SVG, Canvas and HTML. ...",javascript,d3,d3,78096.0
4,2012-11-01T23:13:50Z,master,JavaScript Style Guide,javascript,javascript,airbnb,74817.0
5,2015-01-09T18:10:16Z,master,A framework for building native apps with React.,javascript,react-native,facebook,67604.0
6,2010-01-06T00:34:37Z,master,AngularJS - HTML enhanced for web apps!,javascript,angular.js,angular,58924.0
7,2012-02-17T14:19:43Z,master,"The iconic SVG, font, and CSS toolkit",javascript,Font-Awesome,FortAwesome,57236.0
8,2016-07-17T14:55:11Z,next,Create React apps with no build configuration.,javascript,create-react-app,facebook,53914.0
9,2014-11-26T19:57:11Z,master,Node.js JavaScript runtime :sparkles::turtle::...,javascript,node,nodejs,52357.0


In [41]:
sample = projects.head(5)
for index, row in sample.iterrows():
    results = fetchContributitors(row['owner'], row['name'])
    contributors = pd.DataFrame(results)
    totalCommits = contributors["commits"].sum()
    print("Total commits in %s = %d " % (row["name"], totalCommits))
    

Total commits in java-design-patterns = 1614 
Total commits in RxJava = 3648 
Total commits in elasticsearch = 34003 
Total commits in retrofit = 933 
Total commits in okhttp = 1716 
