In [1]:
import re

import pandas as pd
from tabulate import tabulate

In [2]:
data_path = "outputs/2024-03-12_1817_repo_stats.csv"
df_raw = pd.read_csv(data_path)

In [3]:
df = df_raw.copy()
df.head(2)

Unnamed: 0,Owner,Repository Name,Stars,Forks,Contributors,Issues,Releases,Time Since Last Commit,Watchers,License,About,Languages,URL
0,huggingface,transformers,121891,24181,434,1032,141,"0 days, 8 hrs, 4 mins",1086,Apache License 2.0,🤗 Transformers: State-of-the-art Machine Learn...,"Python, Cuda, Shell, C++, Dockerfile, C, Makef...",https://github.com/huggingface/transformers
1,ChatGPTNextWeb,ChatGPT-Next-Web,64022,52899,167,223,57,"0 days, 11 hrs, 13 mins",382,MIT License,A cross-platform ChatGPT/Gemini UI (Web / PWA ...,"TypeScript, SCSS, JavaScript, Shell, Dockerfil...",https://github.com/ChatGPTNextWeb/ChatGPT-Next...


In [4]:
df.columns.tolist()

['Owner',
 'Repository Name',
 'Stars',
 'Forks',
 'Contributors',
 'Issues',
 'Releases',
 'Time Since Last Commit',
 'Watchers',
 'License',
 'About',
 'Languages',
 'URL']

In [5]:
df["Repo"] = df.apply(
    lambda row: f'[{row["Repository Name"]}](https://github.com/{row["Owner"]}/{row["Repository Name"]})',
    axis=1,
)

In [6]:
# Add column for index starting from 1
df["#"] = df.index + 1


In [7]:
col_order = [
    "#",
    "Repo",
    "About",
    "Stars",
    "Forks",
    "Issues",
    "Contributors",
    "Releases",
    "Time Since Last Commit",
    "License",
]

In [8]:
df = df[col_order]

In [9]:
# Add comma for every 3 digits for numerical columns
for col in df.columns:
    if pd.api.types.is_numeric_dtype(df[col]):
        df[col] = df[col].apply(lambda x: "{:,}".format(x))

In [10]:
df.head()

Unnamed: 0,#,Repo,About,Stars,Forks,Issues,Contributors,Releases,Time Since Last Commit,License
0,1,[transformers](https://github.com/huggingface/...,🤗 Transformers: State-of-the-art Machine Learn...,121891,24181,1032,434,141,"0 days, 8 hrs, 4 mins",Apache License 2.0
1,2,[ChatGPT-Next-Web](https://github.com/ChatGPTN...,A cross-platform ChatGPT/Gemini UI (Web / PWA ...,64022,52899,223,167,57,"0 days, 11 hrs, 13 mins",MIT License
2,3,[gpt4all](https://github.com/nomic-ai/gpt4all),gpt4all: run open-source LLMs anywhere,62317,6832,364,88,10,"0 days, 9 hrs, 34 mins",MIT License
3,4,[llama.cpp](https://github.com/ggerganov/llama...,LLM inference in C/C++,52671,7411,1229,479,1544,"0 days, 8 hrs, 47 mins",MIT License
4,5,[privateGPT](https://github.com/imartinez/priv...,Interact with your documents using the power o...,48358,6351,142,62,6,"0 days, 9 hrs, 7 mins",Apache License 2.0


In [11]:
markdown_table = tabulate(df, headers="keys", tablefmt="github", showindex=False)

In [12]:

condensed_markdown_table = re.sub(r" {3,}", "  ", markdown_table)
condensed_markdown_table = re.sub(r"-{4,}", "----------", condensed_markdown_table)

In [13]:
print(condensed_markdown_table)

|  # | Repo  | About  | Stars  | Forks  | Issues  |  Contributors | Releases  | Time Since Last Commit  | License  |
|----------|----------|----------|----------|----------|----------|----------|----------|----------|----------|
|  1 | [transformers](https://github.com/huggingface/transformers)  | 🤗 Transformers: State-of-the-art Machine Learning for Pytorch, TensorFlow, and JAX.  | 121,891 | 24,181  | 1,032  |  434 | 141  | 0 days, 8 hrs, 4 mins  | Apache License 2.0  |
|  2 | [ChatGPT-Next-Web](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web)  | A cross-platform ChatGPT/Gemini UI (Web / PWA / Linux / Win / MacOS). 一键拥有你自己的跨平台 ChatGPT/Gemini 应用。  | 64,022  | 52,899  | 223  |  167 | 57  | 0 days, 11 hrs, 13 mins  | MIT License  |
|  3 | [gpt4all](https://github.com/nomic-ai/gpt4all)  | gpt4all: run open-source LLMs anywhere  | 62,317  | 6,832  | 364  |  88 | 10  | 0 days, 9 hrs, 34 mins  | MIT License  |
|  4 | [llama.cpp](https://github.com/ggerganov/llama.cpp)  | LLM inference in 

In [14]:
# Save markdown_table to file
with open("outputs/2024-03-12_1817_repo_stats.md", "w") as f:
    f.write(condensed_markdown_table)