In [None]:
import csv
import base64
import time
import fnmatch
from datetime import datetime, timedelta
from github import Github, StatsContributor
from sustainbeat import awesome2py
from urllib.parse import urlparse 
from os import getenv, path
from dotenv import load_dotenv

In [None]:
def countdown(t):
    while t:
        mins, secs = divmod(t, 60)
        timeformat = '{:02d}:{:02d}'.format(mins, secs)
        print(timeformat, end='\r')
        time.sleep(1)
        t -= 1
    print('\n\n\n\n\n')

In [None]:
load_dotenv()
g = Github(getenv("GITHUB"))

In [None]:
repo_dict = awesome2py.AwesomeList(".awesome.md")
print(repo_dict)

In [None]:
csv_github = open('./csv/GitHub.csv', 'w', newline='') 
fieldnames = ['project_name','oneliner','project_namespace','project_url', 'topics', 'rubric', 'last_update', 'stargazers_count','dominating_languages', 'organization', 'languages', 'homepage', 'created' , 'license','total_commits_last_year','last_issue_closed', 'issue_labels','good_first_issue','contributors', 'organization_avatar', 'funding' ]
writer = csv.DictWriter(csv_github, fieldnames=fieldnames)
writer.writeheader()
for r in repo_dict.rubrics:        
    for e in r.entries: 
        print("------------------------")
        print("Processing %s" % e.name)
        print("URL: %s" % e.url)
        if urlparse(e.url).netloc != 'github.com':
            print("%s is not a GitHub project" % e.name)
            print("Skip processing")
            print("------------------------")
            continue
        try:

            remaining,limit  = g.rate_limiting
            resettime = g.rate_limiting_resettime

            if(remaining<20):
                print("------------------------")
                print("Waiting for more available GitHub requests:")
                current_time = datetime.now().timestamp() 
                countdown((int(resettime)-int(current_time)+2))

            print("GitHub Requests | Limit: {}, Remaining: {}".format(limit, remaining))
            # Gather project information from GitHub
            # https://pygithub.readthedocs.io/en/latest/github_objects/Repository.html
            repo_path = urlparse(e.url).path.strip("/")
            user, project_name  = path.split(repo_path)
            repo = g.get_repo(repo_path)
            files = repo.get_contents("")

            try: 
                license = repo.get_license()
                if license.license.spdx_id == 'NOASSERTION':
                    print("Custom license found")
                    license_name = "CUSTOM"
                else:
                    license_name = license.license.spdx_id
            except:
                print("No license information found")
                license_name == "UNDEFINED"


            #readme = base64.b64decode(repo.get_readme().content)
            issues = repo.get_issues(state='closed')

            ## preprocessing
            if issues.totalCount > 0:
                last_issue_closed = issues[0].updated_at

            labels = (",".join([entry.name for entry in repo.get_labels()])) 
            topics = (",".join(repo.get_topics())) 

            languages_states = repo.get_languages()
            programming_languages = (",".join(languages_states.keys()))
            dominating_languages = list(languages_states.keys())[0]

            try:
                funding_file = repo.get_contents(".github")
                for file in funding_file:
                    if file.path==".github/FUNDING.yml":
                        print("Funding found")
                        funding_information = base64.b64decode(file.content)
                    else:
                        funding_information = None
            except:
                print("No funding information found")
                funding_information = None


            if repo.organization is None: 
                organization_name = None
            else: 
                organization_name = repo.organization.login
                organization_avatar = repo.organization.avatar_url
                organization_public_repos = repo.organization.public_repos
                organization_location = repo.organization.location
                organization_description = repo.organization.description
                organization_public_members = repo.organization.get_public_members()
                organization_repos = repo.organization.get_repos()
                total_organization_stars = 0
                for k in organization_repos:
                    print(dir(k))
                    total_organization_stars = total_organization_stars + k.stargazers_count

            total_commits_last_year = 0
            activity = repo.get_stats_commit_activity()
            for event in activity:
                total_commits_last_year = total_commits_last_year + event.total

            last_issue_delta = datetime.utcnow() - last_issue_closed

            contributors = repo.get_stats_contributors()
            contributor_activity = {}
            commits_total = 0
            for individuum in contributors:
                contributor_activity[individuum.author.login] = individuum.total
                commits_total = commits_total + individuum.total

            sorted_contributor = dict(sorted(contributor_activity.items(), key=lambda item: item[1]))
            weighted_contribution = {k: v / commits_total for k, v in sorted_contributor.items()}
            pairs = weighted_contribution.items()
            print(pairs)
            filtered_contributors = {key: value for key, value in pairs if value == 1}
                                          
            # Do not process inactive projects
            if last_issue_delta > timedelta(days=365) and total_commits_per_year == 0:
                print("%s is an inactive project" % e.name)
                print("Skip processing")
                print("------------------------")
                continue

            entry_data = {    'project_name': e.name,
                              'project_namespace': user,
                              'project_url': repo.clone_url,
                              'rubric':r.key,
                              'oneliner':e.text[2:],
                              'topics':topics,
                              'organization':organization_name,
                              'created':repo.created_at.strftime("%Y/%m/%d, %H:%M:%S"),
                              'last_update':repo.updated_at.strftime("%Y/%m/%d, %H:%M:%S"),
                              'total_commits_last_year':total_commits_last_year,
                              'last_issue_closed':last_issue_closed.strftime("%Y/%m/%d, %H:%M:%S"),
                              'stargazers_count':repo.stargazers_count,
                              'dominating_languages':dominating_languages,
                              'languages':programming_languages,
                              'homepage':repo.homepage,
                              'issue_labels': labels,
                              'good_first_issue':repo.get_issues(state='open',labels=['good first issue']).totalCount,
                              'license':license_name,
                              'contributors':repo.get_contributors().totalCount,
                              'organization_avatar': organization_avatar,
                              'funding': funding_information,

                                }
            print(entry_data)
            writer.writerow(entry_data)

        except Exception as e:
            print(e)
                
            
