In [56]:
from github import Github
import os
import datetime
import pandas as pd
import numpy as np

from dash import Dash, html, dcc, callback, Input, Output, State
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"

import dotenv
# Reload the variables in your '.env' file (override the existing variables)
dotenv.load_dotenv("~/workspace/.env", override=True)

# Authentication is defined via github.Auth
from github import Auth

# using an access token
auth = Auth.Token(os.getenv('GITHUB_ACCESS_TOKEN'))
g = Github(auth=auth)

def get_top_repo(topics):
    page=0
    top = 25

    rows_list = []

    if len(topics) > 1:
        return
    # for t in topics:

    t = topics[0]
    repositories = g.search_repositories(query=f'stars:>0 topic:{t} is:public', sort='stars', order='desc')
    print(t, repositories.totalCount) # Not clear why it prints 1000 for totalCount and passes a differnt value in return
    # pages of 30 items. We get the first page
    page0 = repositories.get_page(page)

    row = ()
    for repo in page0:
        row = (t, repo.name, repo.stargazers_count, repo.forks_count, repo.topics, repo.description, repo.language, repo.created_at.date())
        rows_list.append(row)
        df = pd.DataFrame(columns=['topic','repo', 'stars', 'forks', 'topics', 'description', 'language', 'created_at'], data = rows_list)

    return df.head(top), repositories, repositories.totalCount

In [57]:
# user input
search_repo_full = 'microsoft/semantic-kernel'
search_topics = ['llm']

In [58]:
# keep the get_result object
df_top, get_result, total = get_top_repo(search_topics)
top_repo = df_top.iloc[0]
search_repo_obj = g.get_repo(search_repo_full)

better_repositories = g.search_repositories(query=f'stars:>{search_repo_obj.stargazers_count} topic:{topics} is:public', sort='stars', order='desc')

llm 1000


In [59]:
if len(search_topics) == 1:
    topics = search_topics[0]


print(f'{search_repo_full} ranks {better_repositories.totalCount + 1 } with {search_repo_obj.stargazers_count} stars. There are {total} repos with at least one star on {topics} topic. The repo with most stars in the topic is {top_repo.repo}.')

microsoft/semantic-kernel ranks 14 with 16793 stars. There are 3665 repos with at least one star on llm topic. The repo with most stars in the topic is ollama.


In [63]:
def get_trend_data(r1_repo, r2_repo):
    today = datetime.date.today()
    r2_repo_name = r2_repo.name
    r2_created_at = r2_repo.created_at.date()

    data1 = np.array([
    [r1_repo.created_at, 0, r1_repo.repo],
    [r2_created_at, 0, r2_repo_name],
    [today, r1_repo.stars, r1_repo.repo],
    [today, r2_repo.stargazers_count, r2_repo_name]])


    df_trend = pd.DataFrame(data=data1, columns=['date', 'stars', 'repo'])
    return df_trend


def plot_trend(df_trend, r1_repo, r2_repo):
    r2_repo_name = r2_repo.name
    r2_created_at = r2_repo.created_at.date()

    title=f'{r2_repo_name} vs {r1_repo.repo}'
    fig = px.line(df_trend, x=df_trend.date, y=df_trend.stars, color=df_trend.repo, labels=dict(date='',stars=''), markers=True, text=df_trend.stars,)
    fig.update_traces(textposition='top center', texttemplate='%{text:.2~s}')
    fig.update_xaxes(
        showgrid=False,
        tickmode = 'array',
        ticktext=[f"{r1_repo.repo}<br>created on<br>{r1_repo.created_at}", 
                f"{r2_repo_name}<br>created on<br>{r2_created_at}", 
                "today"],
        tickvals=[r1_repo.created_at, 
                r2_created_at, 
                datetime.date.today()],
    )
    fig.update_yaxes(showticklabels=False,    showgrid=False)
    fig.update_layout(width=600, showlegend=False, title=title)
    return fig


In [64]:

df_trend = get_trend_data(top_repo, search_repo_obj)
fig_trend = plot_trend(df_trend, top_repo, search_repo_obj)

fig_trend.show()


In [None]:

# Top 10
def plot_top(df_top, search_repo_full):
    title = f'Top {len(df_top)} repos'
    colors = [px.colors.qualitative.Plotly[0]] * len(df_top)
    search_repo_name = search_repo_full.split('/')[1]

    # pos = None
    if len(df_top[df_top.repo == search_repo_name]) > 0:
        pos = df_top[df_top.repo == search_repo_name].index[0]
        colors[pos] = px.colors.qualitative.Plotly[1]
    # else:
    #     pos = 10


    fig = go.Figure(data=[go.Bar(
                x=df_top.stars, y=df_top.repo,
                text=df_top.stars,
                textposition='auto',
                orientation='h',
            )])

    fig.update_traces(texttemplate='%{text:.2s}', marker_color=colors)
    fig.update_xaxes(showticklabels=False, showgrid=False)
    fig.update_layout(height=800, yaxis={'categoryorder':'total ascending'}, title=title)
    return fig

In [62]:
fig_top = plot_top(df_top, search_repo_full)
fig_top.show()