In [3]:
import requests
from bs4 import BeautifulSoup
import html5lib
import urllib.request, json

In [4]:
def find_repositories_and_stars(soup):
    
    repository_list = []
    number_of_stars = {}
    
    for repo in soup.find_all(class_='wb-break-all'):
        repository_list.append(repo.find('a')['href'])
        repo_name = repo.find('a').text.strip(' \n')
        number_of_stars[repo_name] = None

    total_stars = 0
    for element in soup.find_all('svg', {'aria-label': 'star'}):
        repo_name = element.parent['href'].split('/')[-2]
        stars = int(element.parent.text.strip('\n '))
        number_of_stars[repo_name] = stars
        total_stars += stars

    for k,v in number_of_stars.items():
        print(f'{k} : {v}')

    return repository_list, number_of_stars, total_stars


def list_languages(repository_list):  

    is_limit = False
    total_language_use_in_bytes = {}

    for repo in repository_list:
        try:
            with urllib.request.urlopen('https://api.github.com/repos' + repo) as url:
                data = json.loads(url.read().decode())
                repo_size = float(data['size'])
        except urllib.error.HTTPError as exception:
            is_limit = True
            repo_size = 1000
            print(exception)
            print('Languages found in all repositories will be printed alphabetically.')

        temp_repo = requests.get('https://github.com/' + repo) 
        soup = BeautifulSoup(temp_repo.content,'html5lib')
        try:
            languages_in_percent = find_used_languages_by_percent(soup)
        except AttributeError:
            print('There are no languages specified for this repository.')
        print(languages_in_percent)
        
        languages_in_bytes = {k: v*repo_size for k, v in languages_in_percent.items()}
        print(languages_in_bytes)

        for language, size in languages_in_bytes.items():
            try:
                total_language_use_in_bytes[language] += size
            except KeyError:
                total_language_use_in_bytes[language] = size
    if is_limit:
        print('alphabetical order')
    else:
        print(total_language_use_in_bytes)
    

def find_used_languages_by_percent(soup):
    languages_dict = {}

    header = soup.find(lambda elm: elm.name == "h2" and "Languages" in elm.text)
    child = header.find_next_siblings()[0].find('span')
    for element in child.find_all('span'):
        language = element['aria-label'].rsplit(' ', 1)[0]
        percent_usage = float(element['aria-label'].rsplit(' ', 1)[1])
        languages_dict[language] = percent_usage
    return languages_dict



In [5]:
username = 'm-kosik'

repos = requests.get('https://github.com/' + username + '?tab=repositories') 
soup = BeautifulSoup(repos.content,'html5lib')

repository_list, number_of_stars, total_stars = find_repositories_and_stars(soup)
print(repository_list, number_of_stars, total_stars)

        

# test idea - total stars equal to the sum number_of_stars.values()

house_price_prediction : None
langtons-ant : None
maze_solving : None
square-nim-game-analysis : None
language-localizer : None
ml-kaggle-challenges : None
['/m-kosik/house_price_prediction', '/m-kosik/langtons-ant', '/m-kosik/maze_solving', '/m-kosik/square-nim-game-analysis', '/m-kosik/language-localizer', '/m-kosik/ml-kaggle-challenges'] {'house_price_prediction': None, 'langtons-ant': None, 'maze_solving': None, 'square-nim-game-analysis': None, 'language-localizer': None, 'ml-kaggle-challenges': None} 0
{'Jupyter Notebook': 100.0}
{'Jupyter Notebook': 15400.0}
{'Jupyter Notebook': 100.0}
{'Jupyter Notebook': 9600.0}
{'Jupyter Notebook': 96.8, 'Python': 3.2}
{'Jupyter Notebook': 6388.8, 'Python': 211.20000000000002}
{'Jupyter Notebook': 97.0, 'Python': 3.0}
{'Jupyter Notebook': 1529302.0, 'Python': 47298.0}
There are no languages specified for this repository.
{'Jupyter Notebook': 97.0, 'Python': 3.0}
{'Jupyter Notebook': 4365.0, 'Python': 135.0}
{'Jupyter Notebook': 99.8, 'Python'