In [1]:
# !pip install pygithub
# !pip install pprintpp
# !pip install javalang
# !pip install http://www.boddie.org.uk/python/downloads/javaclass-0.2.tar.gz

## Get commits from GitHub based on Stack Overflow info

In [None]:
import base64
import javalang
import requests as req
from collections import OrderedDict

# Get commits from GitHub based on Stack Overflow info
def get_commits(user, repo, filepath, snippet="//stackoverflow.com"):
    file_tokens = filepath.split("/")

    commits = OrderedDict()
    earliest_sha_containing_snippet = None

    try:
        resp = req.request(method='GET', url="https://api.github.com/repos/"+user+"/"+repo+"/commits?path=" + filepath, headers={'Authorization': 'token ' +'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'})
    except req.exceptions.RequestException as e: 
        raise SystemExit(e)
    json = resp.json()
    for i in range(len(json)):
        # GET https://api.github.com/repos/:owner/:repo/contents/:FILE_PATH?ref=SHA
        try:
            sha = json[i]["sha"]
        except Exception:
            continue
        try:
            resp_temp = req.request(method='GET', url="https://api.github.com/repos/"+user+"/"+repo+"/contents/"+filepath+"?ref="+sha, headers={'Authorization': 'token ' +'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'})
        except req.exceptions.RequestException as e: 
            raise SystemExit(e)
        json_committed_file = resp_temp.json()
        # if (not "content" in json_committed_file) or ("message" in json_committed_file and json_committed_file["message"]=='Not Found'):
        #   continue
        try:
            content = base64.b64decode(json_committed_file["content"])
        except Exception:
            continue
        
        commits[(sha)] = content
        if snippet in str(content):
            earliest_sha_containing_snippet = sha

    return commits, earliest_sha_containing_snippet


In [None]:
# Test get_commits function

user = "GunoH"
repo = "intellij-community"
filepath = "plugins/tasks/tasks-core/jira/src/com/intellij/tasks/jira/soap/JiraLegacyApi.java"
snippet = "//stackoverflow.com"

complete_commits, earliest_sha_containing_snippet = get_commits(user, repo, filepath, snippet)
print(earliest_sha_containing_snippet)
print(complete_commits)
print(list(complete_commits.keys()).index(earliest_sha_containing_snippet))
print(list(complete_commits.keys())[0])

b96c582506a4768c49ba8a0457eb04965b9452a5
OrderedDict([('de539170f9480d28da8cfda1611c42206b70ad25', b'package com.intellij.tasks.jira.soap;\n\nimport com.intellij.openapi.diagnostic.Logger;\nimport com.intellij.tasks.CustomTaskState;\nimport com.intellij.tasks.LocalTask;\nimport com.intellij.tasks.Task;\nimport com.intellij.tasks.TaskBundle;\nimport com.intellij.tasks.impl.TaskUtil;\nimport com.intellij.tasks.jira.JiraRemoteApi;\nimport com.intellij.tasks.jira.JiraRepository;\nimport com.intellij.util.containers.ContainerUtil;\nimport org.apache.commons.httpclient.HttpStatus;\nimport org.apache.commons.httpclient.NameValuePair;\nimport org.apache.commons.httpclient.methods.GetMethod;\nimport org.jdom.Element;\nimport org.jdom.input.SAXBuilder;\nimport org.jetbrains.annotations.NonNls;\nimport org.jetbrains.annotations.NotNull;\nimport org.jetbrains.annotations.Nullable;\n\nimport java.util.Collections;\nimport java.util.List;\nimport java.util.Set;\n\n/**\n * Legacy integration restored

## Calculate metrics from the commit information

In [None]:
import javalang

# Calculate metrics from the commit information
def analyze(s):
  all_attrs = []
  tree = javalang.parse.parse(s)
  num_attrs = len(list(tree.filter(javalang.tree.FieldDeclaration)))
  num_methods = len(list(tree.filter(javalang.tree.MethodDeclaration)))
  num_methods_for_attrs = [0 for i in range(num_attrs)]

  
  for path, node in tree.filter(javalang.tree.FieldDeclaration):
      all_attrs.append(str(node.declarators[0].name))
  methods_attrs = []
  for path, node in tree.filter(javalang.tree.MethodDeclaration):
      method_attrs = set()
      for i in range(num_attrs):
          attr = all_attrs[i]
          if attr in str(node):
              method_attrs.add(attr)
              num_methods_for_attrs[i] = num_methods_for_attrs[i]+1
      methods_attrs.append(method_attrs)


  # Calculate LSCC
  MAR = num_methods_for_attrs

  k = num_methods
  l = num_attrs
  LSCC = 0
  # LSCC formula calculation
  if (l == 0 and k > l):
      LSCC = 0
  elif ((l > 0 and k == 0) or k == 1):
      LSCC = 1
  else:
      for x in MAR:
          LSCC += x*(x-1)
      LSCC = LSCC/(l*k*(k-1))


  # Calculate Class Cohesion (CC)
  CC = 0

  # Need to pull these values out of the GH files
  I = methods_attrs # The sets represent the attributes referenced by a method.

  # CC formula calculation
  for i in range(1,k-1):
      for j in range(i+1,k):
          if not len(I[i-1].union(I[j-1])) == 0:
            CC += len(I[i-1].intersection(I[j-1]))/len(I[i-1].union(I[j-1]))
  if k==1 or k==0:
    CC=1
  else:
    CC = 2*CC/(k*(k-1))
  return LSCC, CC








## Using file from the Big Query database to pull info about GitHub commits and analyse them for calculation of metrics

In [None]:
import csv
import webbrowser
rows = []


# Using file from the Big Query database to pull info about GitHub commits and analyse them for calculation of metrics
with open("samples.csv", 'r') as csvfile, open('data.csv', 'w') as csv_out:
  csvreader = csv.reader(csvfile)
  csvwriter = csv.writer(csv_out)
  row_num = -1
  for row in csvreader:
        row_num = row_num + 1
        row = row[:-12]
        if csvreader.line_num ==1:
          continue
        repo = row[3]
        user = row[2]
        filepath = row[5]
        snippet = "//stackoverflow.com"
        commits, earliest_sha_containing_snippet = get_commits(user, repo, filepath, snippet)
        if earliest_sha_containing_snippet == None:
          continue
        earliest_index = list(commits.keys()).index(earliest_sha_containing_snippet)
        
        earliest_before_index = earliest_index + 1

        if earliest_before_index >= len(list(commits.keys())):
          continue

        try: 
          for i in range(earliest_before_index, -1, -1):
            content = list(commits.items())[i][1]
            val1, val2 = analyze(content)
            row.append([val1,val2])
          csvwriter.writerow(row)
        except Exception:
          continue


## Output final data analysis results

In [None]:
# Output final data analysis results
data0, data1, data2, data3 = None, None, None, None

with open('data-0.csv') as fp:
    data0 = fp.read()
  
# Reading data from file2
with open('data-1.csv') as fp:
    data1 = fp.read()
with open('data-2.csv') as fp:
    data2 = fp.read()
with open('data-3.csv') as fp:
    data3 = fp.read()
  
# Merging 2 files
# To add the data of file2
# from next line
data0 += "\n"
data1 += "\n"
data2 += "\n"

data0 += data1
data0 += data2
data0 += data3
  
with open ('data_combined.csv', 'w') as fp:
    fp.write(data0)

In [None]:
#@title
# abandoned

from github import Github
import csv
import webbrowser

# using an access token
g = Github("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")

# Github Enterprise with custom hostname
#g = Github(base_url="https://{hostname}/api/v3", login_or_token="access_token")
for repo in g.get_user().get_repos():
    print(repo.name)
    repo.edit(has_wiki=False)
    # to see all the available attributes and methods
    print(dir(repo))

with open("samples.csv", 'r') as csvfile:
  csvreader = csv.reader(csvfile)
  for row in csvreader:
        if csvreader.line_num ==1:
          continue
        repo_name = row[3]
        user  = row[2]
        repo = gh.get_repo(repo_name)
        contents = repository.get_dir_contents(urllib.parse.quote(server_path), ref=sha)
        try:
          resp = req.request(method='GET', url="https://api.github.com/repos/"+user+"/"+repo+"/commits", headers={'Authorization': 'token ' +'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'})
        except req.exceptions.RequestException as e: 
          raise SystemExit(e)
        json = resp.json()

        if csvreader.line_num ==2:
          break

small-boquet
['CHECK_AFTER_INIT_FLAG', '_CompletableGithubObject__complete', '_CompletableGithubObject__completed', '_GithubObject__makeSimpleAttribute', '_GithubObject__makeSimpleListAttribute', '_GithubObject__makeTransformedAttribute', '_Repository__create_pull', '_Repository__create_pull_1', '_Repository__create_pull_2', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_allow_merge_commit', '_allow_rebase_merge', '_allow_squash_merge', '_archive_url', '_archived', '_assignees_url', '_blobs_url', '_branches_url', '_clone_url', '_collaborators_url', '_comments_url', '_commits_url', '_compare_url', '_completeIfNeeded', '_completeIfNotSet', '_contents_url', '_contributors_url', '_created_at', '_