In [34]:
import requests
from bs4 import BeautifulSoup
import html5lib
import urllib.request, json
from collections import OrderedDict

In [35]:
def find_repositories_and_stars(soup):
    
    repository_list = []
    number_of_stars = OrderedDict()
    
    for repo in soup.find_all(class_='wb-break-all'):
        repository_list.append(repo.find('a')['href'])
        repo_name = repo.find('a').text.strip(' \n')
        number_of_stars[repo_name] = 0

    total_stars = 0
    for element in soup.find_all('svg', {'aria-label': 'star'}):
        repo_name = element.parent['href'].split('/')[-2]
        stars = int(element.parent.text.strip('\n '))
        number_of_stars[repo_name] = stars
        total_stars += stars

    return repository_list, number_of_stars, total_stars


def list_languages(repository_list):  

    is_limit = False
    total_language_use_in_bytes = OrderedDict()

    for repo in repository_list:
        try:
            with urllib.request.urlopen('https://api.github.com/repos' + repo) as url:
                data = json.loads(url.read().decode())
                repo_size = float(data['size'])
        except urllib.error.HTTPError as exception:
            is_limit = True
            repo_size = 0
            print(exception)
            print('Languages found in all repositories will be printed alphabetically.')

        temp_repo = requests.get('https://github.com/' + repo) 
        soup = BeautifulSoup(temp_repo.content,'html5lib')
        try:
            languages_in_percent = find_used_languages_by_percent(soup)
        except AttributeError:
            print('There are no languages specified for repository ' + repo)

        languages_in_bytes = {k: v*repo_size for k, v in languages_in_percent.items()}

        for language, size in languages_in_bytes.items():
            try:
                total_language_use_in_bytes[language] += size
            except KeyError:
                total_language_use_in_bytes[language] = size
    
    if is_limit:
        
        print('alphabetical order')
    else:
        return total_language_use_in_bytes
    

def find_used_languages_by_percent(soup):
    languages_dict = OrderedDict()

    header = soup.find(lambda elm: elm.name == "h2" and "Languages" in elm.text)
    child = header.find_next_siblings()[0].find('span')
    for element in child.find_all('span'):
        language = element['aria-label'].rsplit(' ', 1)[0]
        percent_usage = float(element['aria-label'].rsplit(' ', 1)[1])
        languages_dict[language] = percent_usage
    return languages_dict



In [36]:
username = 'm-kosik'

repos = requests.get('https://github.com/' + username + '?tab=repositories') 
soup = BeautifulSoup(repos.content,'html5lib')

repository_list, number_of_stars, total_stars = find_repositories_and_stars(soup)
print(repository_list, number_of_stars, total_stars)

total = list_languages(repository_list)
print(total)

    
    
# test idea - total stars equal to the sum number_of_stars.values()

['/m-kosik/house_price_prediction', '/m-kosik/langtons-ant', '/m-kosik/maze_solving', '/m-kosik/square-nim-game-analysis', '/m-kosik/language-localizer', '/m-kosik/ml-kaggle-challenges'] OrderedDict([('house_price_prediction', 0), ('langtons-ant', 0), ('maze_solving', 0), ('square-nim-game-analysis', 0), ('language-localizer', 0), ('ml-kaggle-challenges', 0)]) 0
There are no languages specified for repository /m-kosik/language-localizer
OrderedDict([('Jupyter Notebook', 1797789.4000000001), ('Python', 48110.6)])


In [37]:
username = 'aorvnpeinvjrepin'
repos = requests.get('https://github.com/' + username + '?tab=repositories') 
if not repos.ok:
    print('errr')

errr
