In [None]:
import requests
import pandas as pd
import mysql.connector
from mysql.connector import connect
from datetime import datetime

In [None]:
#Set the github username

username = "microsoft"    #it can be change to any user/org

In [None]:
#Github API URL to get repos
url = f'https://api.github.com/users/{username}/repos'

In [None]:
# Extract user profile info
user_url = f"https://api.github.com/users/{username}"
user_response = requests.get(user_url)
user_data = user_response.json()

user_info = {
    "login" :  user_data.get("login"),
    "name" :  user_data.get("name"),
    "company" :  user_data.get("company"),
    "location" :  user_data.get("location"),
    "followers" :  user_data.get("followers"),
    "public_repos" :  user_data.get("public_repos"),
    "created_at" :  user_data.get("created_at")
}

print("\n User Profile Info: ")
print(user_info)


In [None]:
#Extract Repos
#send request
response = requests.get(url)
repos = response.json()

In [None]:
#Extract Repositries
repo_data = []
for repo in repos:
    repo_data.append({
        "repo_name": repo["name"],
        "description": repo["description"],
        "language": repo["language"],
        "stars": repo["stargazers_count"],
        "forks": repo["forks_count"],
        "updated_at": repo["updated_at"]
    })

#convert to Dataframe

df_repos = pd.DataFrame(repo_data)
print("Top Repositories: ")
print(df_repos.head())

## Insert Data to Notebook


In [None]:
# Connect to mysql

try: 
    conn = mysql.connector.connect(
        host = 'localhost',
        user = 'root',
        password = 'your_password',
        database = 'your_database_name'
    )
    cursor = conn.cursor()
    print('Connected to database')
except Error as e:
    print(f'Connection Error: e')

In [None]:
#Create Table if not exists

cursor.execute("""
create table if not exists github_users(
    login varchar(100) primary key,
    name varchar(100),
    company varchar(100),    
    location varchar(100),
    followers int,
    public_repos int,
    created_at datetime
)
""")
cursor.execute("""
create table if not exists github_repos(
    id int auto_increment primary key,
    login varchar(100),
    repo_name varchar(100),
    description text,
    language varchar(100),
    stars int,
    forks int,
    updated_at datetime,
    foreign key (login) references github_users(login)
)
""")

#Insert User Data in the table

cursor.execute("""
    insert ignore into github_users( login, name, company, location, followers, public_repos, created_at)
    values(%s, %s, %s, %s, %s, %s, %s)
""",(
    user_info["login"],
    user_info["name"],
    user_info["company"],
    user_info["location"],
    user_info["followers"],
    user_info["public_repos"],
    datetime.strptime(user_info["created_at"], "%Y-%m-%dT%H:%M:%SZ")
))

#Adding login column to repos
df_repos["login"] = user_info["login"]

#Insert Repo Data
for _, row in df_repos.iterrows():
    cursor.execute("""
        insert into github_repos(login, repo_name, description, language, stars, forks, updated_at)
        values(%s, %s, %s, %s, %s, %s, %s)
    """,(
        row["login"],
        row["repo_name"],
        row["description"],
        row["language"],
        row["stars"],
        row["forks"],
        datetime.strptime(row["updated_at"], "%Y-%m-%dT%H:%M:%SZ")
    ))

conn.commit()
cursor.close()
conn.close()
print("Github data loaded in MYSQL database.")

## SQL Queries and Github Repo Analysis

In [None]:
#reconect to database
conn = mysql.connector.connect(
    host = 'localhost',
    user = 'root',
    password = 'your_password',
    database = 'your_database_name'
)
cursor = conn.cursor() 

In [None]:
# 1. Most Starresd Repo

df_top_star = pd.read_sql("""
    Select repo_name, stars
    From github_repos
    Where login = %s
    Order By stars DESC
    Limit 1
""",conn, params= [user_info['login']])
print("Most Starred Repos : ")
print(df_top_star)

In [None]:
# 2. Most Forked Repo

df_top_fork = pd.read_sql("""
    Select repo_name, forks
    From github_repos
    Where login = %s
    Order By forks DESC
    Limit 1
""",conn, params= [user_info['login']])
print("\nMost Forked Repos : ")
print(df_top_fork)

In [None]:
# 3. Most Used Programming Languages

df_lang = pd.read_sql("""
    Select language, count(*) as repo_count
    From github_repos
    Where login = %s
    Group By language
    Order By repo_count DESC
""",conn, params= [user_info['login']])
print("\nMost Used Language : ")
print(df_lang)

In [None]:
# 4. Recently Updated Repos

df_recent = pd.read_sql("""
    Select repo_name, updated_at
    From github_repos
    Where login = %s
    Order By updated_at DESC
    Limit 5
""",conn, params= [user_info['login']])
print("\nRecently Updated Repos: ")
print(df_recent)

### Exporting to CSV

In [None]:
df_top_star.to_csv("./data/top_starred_repo.csv", index=False)
df_top_fork.to_csv("./data/top_forked_repo.csv", index=False)
df_lang.to_csv("./data/language_stats.csv", index=False)
df_recent.to_csv("./data/recently_updated_repos.csv", index=False)

print("All queries has been exported into CSV in the 'data/' folder ")