In [240]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
# GitHub Trending URL
url = "https://github.com/trending"

# Fetch the HTML content
response = requests.get(url)
if response.status_code != 200:
    print("Failed to fetch data")
    exit()

# Parse the HTML
soup = BeautifulSoup(response.text, "html.parser")

# Find the projects
projects = soup.find_all("article", class_="Box-row")

list = []
for r in projects:
    # Get the project name
    project = r.h2.a.text.strip().replace("\n", "").replace(" ", "")
    matches = re.match(r"(.*)\/(.*)", project)
    user = matches[1]
    name = matches[2]
    
    # Get description
    description = r.find("p").text.strip()
    
    # Get the language
    language_t = r.find("span", itemprop="programmingLanguage")
    language = language_t.text.strip() if language_t else None
    
    # Get stars and forks
    stars = r.find("a", href=f"/{project}/stargazers").text.strip()
    forks = r.find("a", href=f"/{project}/forks").text.strip()
    url = f"https://github.com/{project}"
    
    list.append({
        'user': user,
        'name': name,
        'project': project,
        'description': description,
        'language': language,
        'stars': stars,
        'forks': forks,
        'url': url
    })

# create a dataframe and clean up columns
df = pd.DataFrame(list)
df["stars"] = (df["stars"].str.replace(",","")).astype(int)
df["forks"] = (df["forks"].str.replace(",","")).astype(int)

In [None]:
from sqlalchemy import create_engine, text
from sqlalchemy import Column, Integer, String
from sqlalchemy.orm import declarative_base, sessionmaker


engine = create_engine('sqlite:///mydatabase.db')
with engine.connect() as connection:
    result = connection.execute(text("SELECT 'Hello, SQLAlchemy!'"))
    print(result.scalar())

Base = declarative_base()

# Define the Repository class, which maps to the 'repository' table
class TrendingRepo(Base):
    __tablename__ = 'trending_repos'

    id = Column(Integer, primary_key=True)
    name = Column(String, nullable=False)
    description = Column(String, nullable=True)
    language = Column(String, nullable=True)
    stars = Column(Integer, nullable=False)
    forks = Column(Integer, nullable=False)
    url = Column(String, nullable=False)
    
    def __repr__(self):
        return f"<Repository(name='{self.name}', url='{self.url}')>"

Base.metadata.create_all(engine)

def insert_data(repos_data):
    Session = sessionmaker(bind=engine)
    session = Session()
    for repo in repos_data:
        new_repo = TrendingRepo(
            name=repo["name"],
            description=repo["description"],
            language=repo["language"],
            stars=int(repo["stars"].replace(",", "")),  # Convert to int
            forks=int(repo["forks"].replace(",", "")),  # Convert to int
            url=repo["url"]
        )
        session.add(new_repo)
    session.commit()
    session.close()
    print("✅ Data inserted successfully!")
insert_data(list)