This document is dedicated for the function **find_repo** used to extract repositories based on keywords from Github:

### Function input:
+ *term*: Keywords for searching repositories
+ *day*: Number of days you want to extract the data. Eg: Day = 30 --> Extracting all repositories created within the last 30days.

### Function output:
A csv file is saved into your working directory named as [term + date of extraction], including: 
+ Repository info such as: id, name, url, language, data of creation
+ And stats: forks, stars, watch

In [1]:
#Function to fletch repositories by "term" within a certain number of "days" from today. 

def find_repo(term, day):
    import requests
    import math
    from datetime import datetime, timedelta
    from dateutil.relativedelta import relativedelta
    import csv
    import pandas as pd 
    
    URL = f'https://api.github.com/search/repositories?q={term}+created:SINCE..UNTIL&per_page=100'
    HEADERS = {'Authorization': 'token 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'}

    since = datetime.today() - relativedelta(days= day)  # Start fetching repo created {day} days ago
    until = since + timedelta(hours=12) # dividing the total No.of repo into segments of 12 hours each
    repo_list = []
    dt = []
    repo = []

    #Fletching repositories:
    while until < datetime.today():
        day_url = URL.replace('SINCE', since.strftime('%Y-%m-%dT%H:%M:%SZ')).replace('UNTIL', until.strftime('%Y-%m-%dT%H:%M:%SZ'))
        repo_request = requests.get(day_url, headers=HEADERS)
        #print(f'Repositories created between {since} and {until}: {repo_request.json().get("total_count")}')
        no_page = math.ceil(repo_request.json().get("total_count")/100) #calculating the total No. of pages
        for i in range(1, no_page + 1): #running a loop to fetch each page
            page_url = f'{day_url}&page={i}'
            page_request = requests.get(page_url, headers=HEADERS)
            #update list of repositories
            repo_list.extend(page_request.json().get("items")) #adding the fetched page to the list
        # Update dates for the next search
        since = until #move start-date and end-date up 12hours
        until = since + timedelta(hours=12)
    
    #Saving relevant variables into a list:
    for item in repo_list:
        id = item.get("id")
        name = item.get("name")
        url = item.get("html_url")
        created = item.get("created_at")
        stars = item.get("stargazers_count")
        watch = item.get("watchers_count")
        language = item.get("language")
        forks = item.get("forks_count")
        dt.append({"id": id, 
                   "name": name, 
                   "url": url, 
                   "created": created,
                   "stars": stars,
                   "watch": watch,
                   "language": language,
                   "forks": forks})
        
    #Writing data into .csv file and returning the table:
    with open(f"{term}_{datetime.now().date()}.csv", "w") as csv_file:
        writer = csv.writer(csv_file, delimiter = ";")
        writer.writerow(["id", "name", "url", "language", "created", "stars", "watch", "forks"])
        for repo in dt:
            writer.writerow([repo['id'], repo['name'], repo['url'], repo['language'], repo['created'], repo['stars'], repo['watch'], repo['forks']])
    rep = pd.read_csv(f"{term}_{datetime.now().date()}.csv", delimiter= ";")
    
    return rep

In [2]:
#Give it a try:
find_repo("python",1)

Unnamed: 0,id,name,url,language,created,stars,watch,forks
0,413226906,HacktoberFest2021-python-For-RUSL-Students,https://github.com/Priyasad1997/HacktoberFest2...,Python,2021-10-04T00:09:01Z,2,2,12
1,413151356,new-python-codes,https://github.com/DheerajMandvi9/new-python-c...,Python,2021-10-03T17:40:24Z,1,1,5
2,413271258,python_wikipedia,https://github.com/harinandanan2112/python_wik...,Python,2021-10-04T04:17:18Z,0,0,3
3,413151410,StreamlitModelApp,https://github.com/sonamehdi19/StreamlitModelApp,Python,2021-10-03T17:40:41Z,0,0,4
4,413147970,Hacktoberfest-python-code-bunch,https://github.com/Ankitkundu21/Hacktoberfest-...,Python,2021-10-03T17:26:25Z,0,0,2
...,...,...,...,...,...,...,...,...
1471,413374787,EEE3097S-Project-Repository,https://github.com/MikeMillard/EEE3097S-Projec...,,2021-10-04T10:28:37Z,0,0,0
1472,413393269,Prediction-of-Default-Customers-based-on-credi...,https://github.com/maheshk-DS/Prediction-of-De...,Jupyter Notebook,2021-10-04T11:31:14Z,0,0,0
1473,413455845,FolderOrganizer,https://github.com/arevish/FolderOrganizer,Python,2021-10-04T14:25:26Z,0,0,0
1474,413282742,talo-hacktoberfest2021,https://github.com/abhishek213-alb/talo-hackto...,,2021-10-04T05:14:34Z,0,0,0
