In [1]:
import requests

# Your GitHub token
token = "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"
headers = {"Authorization": f"token {token}"}

response = requests.get("https://api.github.com/user/repos", headers=headers)
repos = response.json()

for repo in repos:
    print(repo["name"])

Airbnb_Analysis
BizCardX
Django_Learning_with_Movie_Review_System_App
Electronics_sales_Analysis_and_Modeling
GIfusion_assesment
guvi_final_project
Industrial_copper_modeling
Interview_Assesment
My_Train_AI_Chatbot
Phonepe_pulse_analysis
python_assesment
screen_recorder_with_python
Singapore_resale_flat_price_prediction
to_do_list_app_with_Django
ytdataharvesting


In [6]:
import requests
import pandas as pd
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

def fetch_repos(topic, token, max_pages=10, per_page=100, retries=3, backoff_factor=1):
    repos = []
    session = requests.Session()
    retry = Retry(
        total=retries,
        backoff_factor=backoff_factor,
        status_forcelist=[429, 500, 502, 503, 504]
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)

    for page in range(1, max_pages + 1):
        url = f"https://api.github.com/search/repositories?q=topic:{topic}&sort=stars&order=desc&per_page={per_page}&page={page}"
        headers = {"Authorization": f"token {token}"}
        response = session.get(url, headers=headers)

        if response.status_code == 200:
            data = response.json()
            items = data.get("items", [])
            repos.extend(items)
            if len(items) < per_page:
                break
        else:
            print(f"Failed to fetch page {page}: {response.status_code}")
            break

    return repos

In [7]:
topics = ["AI", "machine learning", "deep learning", "data visualization", "NLP", 
          "big data", "computer vision", "reinforcement learning", "data science", "neural networks"]

all_repos = []
for topic in topics:
    topic_repos = fetch_repos(topic, token)
    all_repos.extend(topic_repos)

data = pd.json_normalize(all_repos)

repos = pd.DataFrame({
    'Repository_Name': data['name'],
    'Owner': data['owner.login'].fillna('Unknown'),
    'Description': data['description'].fillna('No description'),
    'URL': data['html_url'],
    'Programming_Language': data['language'].fillna('Not specified'),
    'Creation_Date': data['created_at'],
    'Last_Updated_Date': data['updated_at'],
    'Number_of_Stars': data['stargazers_count'],
    'Number_of_Forks': data['forks_count'],
    'Number_of_Open_Issues': data['open_issues_count'],
    'License_Type': data['license.name'].fillna('No license')
})

repos.to_csv('github_repos.csv', index=False)


In [8]:
df = repos
df.head()

Unnamed: 0,Repository_Name,Owner,Description,URL,Programming_Language,Creation_Date,Last_Updated_Date,Number_of_Stars,Number_of_Forks,Number_of_Open_Issues,License_Type
0,AutoGPT,Significant-Gravitas,AutoGPT is the vision of accessible AI for eve...,https://github.com/Significant-Gravitas/AutoGPT,Python,2023-03-16T09:21:07Z,2024-10-10T11:39:25Z,167371,44208,121,Other
1,stable-diffusion-webui,AUTOMATIC1111,Stable Diffusion web UI,https://github.com/AUTOMATIC1111/stable-diffus...,Python,2022-08-22T14:05:26Z,2024-10-10T11:37:06Z,140905,26656,2294,GNU Affero General Public License v3.0
2,supabase,supabase,The open source Firebase alternative. Supabase...,https://github.com/supabase/supabase,TypeScript,2019-10-12T05:56:49Z,2024-10-10T11:15:39Z,72474,6965,503,Apache License 2.0
3,generative-ai-for-beginners,microsoft,"21 Lessons, Get Started Building with Generati...",https://github.com/microsoft/generative-ai-for...,Jupyter Notebook,2023-06-19T16:28:59Z,2024-10-10T11:33:53Z,63658,32351,14,MIT License
4,ChatGPT,lencx,"ðŸ”® ChatGPT Desktop Application (Mac, Windows an...",https://github.com/lencx/ChatGPT,Rust,2022-12-07T09:43:02Z,2024-10-10T09:14:57Z,52545,5908,739,No license


In [9]:
df.shape

(5703, 11)

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5703 entries, 0 to 5702
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Repository_Name        5703 non-null   object
 1   Owner                  5703 non-null   object
 2   Description            5703 non-null   object
 3   URL                    5703 non-null   object
 4   Programming_Language   5703 non-null   object
 5   Creation_Date          5703 non-null   object
 6   Last_Updated_Date      5703 non-null   object
 7   Number_of_Stars        5703 non-null   int64 
 8   Number_of_Forks        5703 non-null   int64 
 9   Number_of_Open_Issues  5703 non-null   int64 
 10  License_Type           5703 non-null   object
dtypes: int64(3), object(8)
memory usage: 490.2+ KB


In [11]:
df.isnull().sum()

Repository_Name          0
Owner                    0
Description              0
URL                      0
Programming_Language     0
Creation_Date            0
Last_Updated_Date        0
Number_of_Stars          0
Number_of_Forks          0
Number_of_Open_Issues    0
License_Type             0
dtype: int64

In [12]:
import psycopg2
from psycopg2 import sql

def store_in_postgresql(repos, db_credentials):
    try:
        conn = psycopg2.connect(
            dbname=db_credentials["dbname"],
            user=db_credentials["user"],
            password=db_credentials["password"],
            host=db_credentials["host"],
            port=db_credentials["port"]
        )
        cursor = conn.cursor()

        create_table_query = '''
        CREATE TABLE IF NOT EXISTS github_repositories (
            id SERIAL PRIMARY KEY,
            repository_name TEXT,
            owner TEXT,
            description TEXT,
            url TEXT,
            programming_language TEXT,
            creation_date TIMESTAMP,
            last_updated_date TIMESTAMP,
            number_of_stars INT,
            number_of_forks INT,
            number_of_open_issues INT,
            license_type TEXT
        );
        '''
        cursor.execute(create_table_query)
        conn.commit()

        insert_query = '''
        INSERT INTO github_repositories (
            repository_name, owner, description, url, programming_language,
            creation_date, last_updated_date, number_of_stars, number_of_forks,
            number_of_open_issues, license_type
        ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        '''
        for _, row in repos.iterrows():
            cursor.execute(insert_query, (
                row['Repository_Name'], row['Owner'], row['Description'],
                row['URL'], row['Programming_Language'], row['Creation_Date'],
                row['Last_Updated_Date'], row['Number_of_Stars'],
                row['Number_of_Forks'], row['Number_of_Open_Issues'],
                row['License_Type']
            ))
        
        conn.commit()
        print("Data inserted successfully into PostgreSQL database")

    except Exception as e:
        print(f"Error: {e}")
    finally:
        if conn:
            cursor.close()
            conn.close()

db_credentials = {
    "dbname": "postgres",
    "user": "postgres",
    "password": "123456",
    "host": "localhost",
    "port": "5432"
}

store_in_postgresql(repos, db_credentials)


Data inserted successfully into PostgreSQL database
