### Getting a tree from remote repo

In [6]:
from github import Github
import requests
import os
from dotenv import load_dotenv
import json

token = os.getenv("GITHUB_TOKEN")
if not token:
    raise ValueError("Github token is not set")
# Initialize the GitHub object using your token
g = Github(token)

# Get the repository object
repo = g.get_repo("minki-j/gitmeetup")

# Get the base tree object
base_tree = repo.get_git_tree(
    "ee85e2c53e0c97c523df2016e6488440fd324a55", recursive=True
)

In [42]:
from include.github_api_call.request import github_api_request
import pendulum
res = github_api_request(
    "GET",
    "https://api.github.com/search/users?",
    None,
    params={
        "q": f"location:ottawa created:>{pendulum.now().subtract(days=30).format('YYYY-MM-DD')}",
        "page": 1,
        "per_page": 100,
        "sort": "joined",
        "order": "desc",
    },
)
res.json()

Requesting https://api.github.com/search/users?


{'message': 'Validation Failed',
 'errors': [{'resource': 'Search', 'field': 'q', 'code': 'missing'}],
 'documentation_url': 'https://docs.github.com/v3/search',
 'status': '422'}

In [22]:
base_tree

GitTree(sha="ee85e2c53e0c97c523df2016e6488440fd324a55")

In [26]:
base_tree.tree[0]

GitTreeElement(sha="bd1d7af4f5338b838f545acd0cf802bf8127fc09", path=".gitignore")

In [31]:
from collections import defaultdict


def build_tree(paths):
    tree = lambda: defaultdict(tree)
    root = tree()
    for path in paths:
        parts = path.split("/")
        current_level = root
        for part in parts:
            current_level = current_level[part]
    return root


def tree_to_string(tree, indent=""):
    tree_str = ""
    for key, subtree in sorted(tree.items()):
        tree_str += f"{indent}├── {key}\n"
        if subtree:
            tree_str += tree_to_string(subtree, indent + "│ ")
    return tree_str

In [32]:
path_lists = [element.path for element in base_tree.tree]

tree = build_tree(path_lists)
tree_string = tree_to_string(tree)

with open("tree.txt", "w") as file:
    file.write(tree_string)

In [None]:
# Create a new tree
element = github.InputGitTreeElement(
    path="file.rb",
    mode="100644",
    type="blob",
    sha="44b4fc6d56897b048c772eb4087f854f46256132",
)

new_tree = repo.create_git_tree([element], base_tree)

# Print the SHA of the new tree
print(new_tree.sha)

In [None]:
from tree_generator import generate_tree

# Generate the tree structure
tree = generate_tree("./", output_file="directory_structure.txt")

print("Directory structure has been saved to directory_structure.txt")

In [8]:
import os


def create_project_structure(path, file_name):
    with open(file_name, "w") as f:
        for root, dirs, files in os.walk(path):
            level = root.replace(path, "").count(os.sep)
            indent = "  " * (level - 1)
            f.write("{}{}/\n".format(indent, os.path.basename(root)))
            sub_indent = "  " * level
            for file in files:
                f.write("{}{}\n".format(sub_indent, file))


project_path = "/Users/minkijung/Documents/2PetProjects/gitmeetup/airflow/"
output_file = "project_structure.txt"

create_project_structure(project_path, output_file)
print(f"Project structure saved to '{output_file}'")

Project structure saved to 'project_structure.txt'


In [13]:
import pendulum
pendulum.now(tz="UTC")

DateTime(2024, 7, 6, 10, 44, 0, 344441, tzinfo=Timezone('UTC'))

In [11]:
a = {"a": "b"}
a["a"]

'b'

In [10]:
from test import repos

print(len(repos))

ImportError: cannot import name 'repos' from 'test' (/Users/minkijung/Documents/2PetProjects/gitmeetup/airflow/test.py)

In [5]:
import pendulum

pendulum.now(tz="America/Montreal").to_datetime_string()

'2024-06-29 09:20:27'

In [1]:
GITHUB_USER_SCHEMA = {
    "login": str,
    "id": int,
    "node_id": str,
    "avatar_url": str,
    "gravatar_id": str,
    "url": str,
    "html_url": str,
    "followers_url": str,
    "following_url": str,
    "gists_url": str,
    "starred_url": str,
    "subscriptions_url": str,
    "organizations_url": str,
    "repos_url": str,
    "events_url": str,
    "received_events_url": str,
    "type": str,
    "site_admin": bool,
    "name": str,
    "company": str,
    "blog": str,
    "location": str,
    "email": str,
    "hireable": bool,
    "bio": str,
    "twitter_username": str,
    "public_repos": int,
    "public_gists": int,
    "followers": int,
    "following": int,
    "created_at": str,
    "updated_at": str,
}

In [3]:
type(GITHUB_USER_SCHEMA["login"])

type

In [1]:
list1 = [1, 2, 3]
list1.append(4)  # Adds 4 to the end of list1
# list1 is now [1, 2, 3, 4]

list2 = [5, 6]
list1.append(list2)  # Adds list2 as a single element to the end of list1
# list1 is now [1, 2, 3, 4, [5, 6]]

In [2]:
list1

[1, 2, 3, 4, [5, 6]]

In [2]:
import os
import modal
from dotenv import load_dotenv

load_dotenv()

client = modal.Client.from_credentials(
    token_id=os.environ["MODAL_TOKEN_ID"],
    token_secret=os.environ["MODAL_TOKEN_SECRET"],
)

In [6]:
app = modal.App(f"airflow")

with app.run(client=client):
    print("hi")

Output()

Output()

Output()

In [1]:
import json

with open("../data/users_in_montreal_deduplicated.json") as f:
    users = json.load(f)

len(users)

23502

In [4]:
from dags.utils.date_utils import generate_date_intervals
dates = generate_date_intervals(interval_days=60)
dates[0]

'2008-01-01..2008-02-01'

In [9]:
import json
with open("../data/github_accounts_202406250917.json") as f:
    github_accounts_from_airflow = json.load(f)
    print("length of github_accounts_from_airflow: ", len(github_accounts_from_airflow))

with open("../data/users_in_montreal_deduplicated.json") as f:
    users = json.load(f)
    print("length of users: ", len(users))

length of github_accounts_from_airflow:  10492
length of users:  23502


In [10]:
# Find users that is not in github_accounts_from_airflow but in users
ids_in_github_accounts_from_airflow = [account["id"] for account in github_accounts_from_airflow]

users_not_in_github_accounts_from_airflow = []
for user in users:
    if user["id"] not in ids_in_github_accounts_from_airflow:
        users_not_in_github_accounts_from_airflow.append(user)

len(users_not_in_github_accounts_from_airflow)

13237

In [12]:
len(users) - len(users_not_in_github_accounts_from_airflow)

10265

In [11]:
users_not_in_github_accounts_from_airflow[:3]

[{'login': 'vinokurov-nikola',
  'id': 173086762,
  'node_id': 'U_kgDOClEYKg',
  'avatar_url': 'https://avatars.githubusercontent.com/u/173086762?v=4',
  'gravatar_id': '',
  'url': 'https://api.github.com/users/vinokurov-nikola',
  'html_url': 'https://github.com/vinokurov-nikola',
  'followers_url': 'https://api.github.com/users/vinokurov-nikola/followers',
  'following_url': 'https://api.github.com/users/vinokurov-nikola/following{/other_user}',
  'gists_url': 'https://api.github.com/users/vinokurov-nikola/gists{/gist_id}',
  'starred_url': 'https://api.github.com/users/vinokurov-nikola/starred{/owner}{/repo}',
  'subscriptions_url': 'https://api.github.com/users/vinokurov-nikola/subscriptions',
  'organizations_url': 'https://api.github.com/users/vinokurov-nikola/orgs',
  'repos_url': 'https://api.github.com/users/vinokurov-nikola/repos',
  'events_url': 'https://api.github.com/users/vinokurov-nikola/events{/privacy}',
  'received_events_url': 'https://api.github.com/users/vinokuro

In [1]:
import requests
import os
from dotenv import load_dotenv
import json

token = os.getenv("GITHUB_TOKEN")
if not token:
    raise ValueError("Github token is not set")
headers = {"Authorization": f"Bearer {token}"}
params = {"recursive": "true"}

res = requests.get(
    "https://api.github.com/repos/minki-j/GitMeetUp/git/trees",
    headers=headers,
    params=params
)
res.json()

{'message': 'Not Found',
 'documentation_url': 'https://docs.github.com/rest',
 'status': '404'}

In [20]:
res.headers.get("ETag")

'W/"1f6e94f0e0723d0e1d81464d2d1a48d8025436eeddba38746cc125d733d5459e"'

In [26]:
last_modified = res.headers.get("Last-Modified")
print(f"==>> last_modified: {last_modified}")

==>> last_modified: Fri, 28 Jun 2024 00:08:24 GMT


In [33]:
import requests
import os
from dotenv import load_dotenv
import json

token = os.getenv("GITHUB_TOKEN")
if not token:
    raise ValueError("Github token is not set")
headers = {
    "Authorization": f"Bearer {token}",
    "if-modified-since": "Fri, 28 Jun 2024 00:08:24 GMT",
}
res = requests.get("https://api.github.com/repos/minki-j/GitMeetUp", headers=headers)
if res.status_code == 304:
    print("Not modified since last time")
else:
    print("Modified since last time")
    print(res.json())
print("x-ratelimit-remaining:", res.headers.get("x-ratelimit-remaining"))

Not modified since last time
4990


Rate limit doesn't get deducted when I'm using if header properly

In [19]:
list(res.headers.keys())

['Server',
 'Date',
 'Content-Type',
 'Cache-Control',
 'Vary',
 'ETag',
 'Last-Modified',
 'X-OAuth-Scopes',
 'X-Accepted-OAuth-Scopes',
 'github-authentication-token-expiration',
 'X-GitHub-Media-Type',
 'x-github-api-version-selected',
 'X-RateLimit-Limit',
 'X-RateLimit-Remaining',
 'X-RateLimit-Reset',
 'X-RateLimit-Used',
 'X-RateLimit-Resource',
 'Access-Control-Expose-Headers',
 'Access-Control-Allow-Origin',
 'Strict-Transport-Security',
 'X-Frame-Options',
 'X-Content-Type-Options',
 'X-XSS-Protection',
 'Referrer-Policy',
 'Content-Security-Policy',
 'Content-Encoding',
 'x-envoy-upstream-service-time',
 'Transfer-Encoding',
 'X-GitHub-Request-Id']

In [18]:
import requests

res = requests.get("https://api.github.com/users/macournoyer")
type(res.json()["hireable"])

NoneType