![ga4](https://www.google-analytics.com/collect?v=2&tid=G-6VDTYWLKX6&cid=1&en=page_view&dt=tracking_github.ipynb&dl=statmike%2Fvertex-ai-mlops%2Farchitectures%2Ftracking)
# GitHub Traffic For Repository

## Setup

In [6]:
project = !gcloud config get-value project
PROJECT_ID = project[0]
PROJECT_ID

'statmike-mlops-349915'

In [33]:
import requests
import json
import time
from google.cloud import bigquery

---
## Get Secret

You need to create a secret to hold the PAT for accessing the GitHub API.  More information on [creating a secret manager](https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets#secretmanager-create-secret-console).


In [3]:
try:
    import google.cloud.secretmanager
except ImportError:
    print('You need to pip install google-cloud-secret-manager')
    !pip install google-cloud-secret-manager -q

You need to pip install google-cloud-secret-manager


In [4]:
from google.cloud import secretmanager

In [5]:
client = secretmanager.SecretManagerServiceClient()

In [7]:
secret = client.access_secret_version(request = {"name": f'projects/{PROJECT_ID}/secrets/github_api/versions/latest'})

In [10]:
pat = secret.payload.data.decode('utf-8')

---
## GitHub Traffic API

- GitHub [traffic API](https://docs.github.com/en/rest/metrics/traffic#about-the-repository-traffic-api)
- Permission the PAT will need are under [adminstration](https://docs.github.com/en/rest/overview/permissions-required-for-fine-grained-personal-access-tokens#administration)

In [13]:
## all three work:
response = requests.get('https://api.github.com/repos/statmike/vertex-ai-mlops/traffic/clones', headers = {'Authorization': f'Bearer {pat}', 'Accept': 'application/vnd.github+json'})
#response = requests.get('https://api.github.com/repos/statmike/vertex-ai-mlops/traffic/clones', auth = ('statmike', f'{pat}'), headers = {'Accept': 'application/vnd.github+json'})
#response = requests.get('https://api.github.com/repos/statmike/vertex-ai-mlops/traffic/clones', auth = ('statmike', f'{pat}'), headers = {'Authorization': f'Bearer {pat}', 'Accept': 'application/vnd.github+json'})

In [14]:
response

<Response [200]>

In [15]:
response.text

'{"count":49,"uniques":39,"clones":[{"timestamp":"2022-11-12T00:00:00Z","count":2,"uniques":2},{"timestamp":"2022-11-13T00:00:00Z","count":2,"uniques":1},{"timestamp":"2022-11-14T00:00:00Z","count":15,"uniques":11},{"timestamp":"2022-11-15T00:00:00Z","count":3,"uniques":3},{"timestamp":"2022-11-16T00:00:00Z","count":2,"uniques":2},{"timestamp":"2022-11-17T00:00:00Z","count":2,"uniques":2},{"timestamp":"2022-11-18T00:00:00Z","count":3,"uniques":3},{"timestamp":"2022-11-19T00:00:00Z","count":3,"uniques":3},{"timestamp":"2022-11-20T00:00:00Z","count":2,"uniques":2},{"timestamp":"2022-11-21T00:00:00Z","count":7,"uniques":5},{"timestamp":"2022-11-22T00:00:00Z","count":2,"uniques":1},{"timestamp":"2022-11-23T00:00:00Z","count":1,"uniques":1},{"timestamp":"2022-11-24T00:00:00Z","count":1,"uniques":1},{"timestamp":"2022-11-25T00:00:00Z","count":4,"uniques":3}]}'

### Review metrics

In [21]:
response = requests.get('https://api.github.com/repos/statmike/vertex-ai-mlops/community/profile', headers = {'Authorization': f'Bearer {pat}', 'Accept': 'application/vnd.github+json'})
json.loads(response.text)

{'health_percentage': 42,
 'description': 'Google Cloud Platform Vertex AI end-to-end workflows for machine learning operations',
 'documentation': None,
 'files': {'code_of_conduct': None,
  'code_of_conduct_file': None,
  'contributing': None,
  'issue_template': None,
  'pull_request_template': None,
  'license': {'key': 'apache-2.0',
   'name': 'Apache License 2.0',
   'spdx_id': 'Apache-2.0',
   'url': 'https://api.github.com/licenses/apache-2.0',
   'node_id': 'MDc6TGljZW5zZTI=',
   'html_url': 'https://github.com/statmike/vertex-ai-mlops/blob/main/LICENSE'},
  'readme': {'url': 'https://api.github.com/repos/statmike/vertex-ai-mlops/contents/readme.md',
   'html_url': 'https://github.com/statmike/vertex-ai-mlops/blob/main/readme.md'}},
 'updated_at': None}

In [37]:
# timestamp (12AM Sunday each week for last 52 weeks), additions, deletions
response = requests.get('https://api.github.com/repos/statmike/vertex-ai-mlops/stats/code_frequency', headers = {'Authorization': f'Bearer {pat}', 'Accept': 'application/vnd.github+json'})

while response.status_code == 202:
    time.sleep(30)
    response = requests.get('https://api.github.com/repos/statmike/vertex-ai-mlops/stats/code_frequency', headers = {'Authorization': f'Bearer {pat}', 'Accept': 'application/vnd.github+json'})
    
json.loads(response.text)

[[1616889600, 2983, -547],
 [1617494400, 7461, -3499],
 [1618099200, 12394, -6314],
 [1618704000, 7904, -6179],
 [1619308800, 0, 0],
 [1619913600, 0, 0],
 [1620518400, 0, 0],
 [1621123200, 0, 0],
 [1621728000, 0, 0],
 [1622332800, 0, 0],
 [1622937600, 695, -899],
 [1623542400, 0, 0],
 [1624147200, 246, -359],
 [1624752000, 0, 0],
 [1625356800, 14349, -11041],
 [1625961600, 3422, -1542],
 [1626566400, 1582, -1716],
 [1627171200, 0, 0],
 [1627776000, 0, 0],
 [1628380800, 1693, -2389],
 [1628985600, 0, 0],
 [1629590400, 0, 0],
 [1630195200, 10777, -6390],
 [1630800000, 400423, -206756],
 [1631404800, 217627, -217787],
 [1632009600, 14216, -10514],
 [1632614400, 1370, -1244],
 [1633219200, 2047, -715],
 [1633824000, 2705, -2357],
 [1634428800, 37, -62],
 [1635033600, 207, -161],
 [1635638400, 0, 0],
 [1636243200, 0, 0],
 [1636848000, 0, 0],
 [1637452800, 134, -139],
 [1638057600, 3731, -3358],
 [1638662400, 287, -296],
 [1639267200, 0, 0],
 [1639872000, 0, 0],
 [1640476800, 0, 0],
 [164108

In [38]:
# commits per day [sunday, ..., saturday], total, week (timestamp)
response = requests.get('https://api.github.com/repos/statmike/vertex-ai-mlops/stats/commit_activity', headers = {'Authorization': f'Bearer {pat}', 'Accept': 'application/vnd.github+json'})

while response.status_code == 202:
    time.sleep(30)
    response = requests.get('https://api.github.com/repos/statmike/vertex-ai-mlops/stats/commit_activity', headers = {'Authorization': f'Bearer {pat}', 'Accept': 'application/vnd.github+json'})
    
json.loads(response.text)

[{'days': [0, 1, 2, 1, 0, 1, 0], 'total': 5, 'week': 1638057600},
 {'days': [0, 0, 0, 3, 0, 0, 0], 'total': 3, 'week': 1638662400},
 {'days': [0, 0, 0, 0, 0, 0, 0], 'total': 0, 'week': 1639267200},
 {'days': [0, 0, 0, 0, 0, 0, 0], 'total': 0, 'week': 1639872000},
 {'days': [0, 0, 0, 0, 0, 0, 0], 'total': 0, 'week': 1640476800},
 {'days': [0, 16, 3, 1, 0, 1, 1], 'total': 22, 'week': 1641081600},
 {'days': [0, 0, 0, 0, 0, 0, 0], 'total': 0, 'week': 1641686400},
 {'days': [0, 0, 0, 0, 0, 0, 0], 'total': 0, 'week': 1642291200},
 {'days': [0, 0, 0, 2, 0, 0, 2], 'total': 4, 'week': 1642896000},
 {'days': [0, 0, 0, 1, 1, 1, 0], 'total': 3, 'week': 1643500800},
 {'days': [0, 5, 0, 3, 2, 1, 1], 'total': 12, 'week': 1644105600},
 {'days': [0, 2, 1, 0, 0, 2, 0], 'total': 5, 'week': 1644710400},
 {'days': [0, 1, 0, 0, 0, 1, 0], 'total': 2, 'week': 1645315200},
 {'days': [0, 0, 0, 0, 0, 0, 0], 'total': 0, 'week': 1645920000},
 {'days': [0, 0, 0, 2, 1, 6, 1], 'total': 10, 'week': 1646524800},
 {'day

In [39]:
# list of dict for each author with 52 week data: total alltime, week list [week timestamp, additions, deletions, commits], author info
response = requests.get('https://api.github.com/repos/statmike/vertex-ai-mlops/stats/contributors', headers = {'Authorization': f'Bearer {pat}', 'Accept': 'application/vnd.github+json'})

while response.status_code == 202:
    time.sleep(30)
    response = requests.get('https://api.github.com/repos/statmike/vertex-ai-mlops/stats/contributors', headers = {'Authorization': f'Bearer {pat}', 'Accept': 'application/vnd.github+json'})
    
json.loads(response.text)

[{'total': 1,
  'weeks': [{'w': 1616889600, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1617494400, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1618099200, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1618704000, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1619308800, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1619913600, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1620518400, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1621123200, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1621728000, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1622332800, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1622937600, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1623542400, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1624147200, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1624752000, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1625356800, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1625961600, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1626566400, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1627171200, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1627776000, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1628380800, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1628985600, 'a': 0, 'd': 0, 'c': 0},
   {'w': 16

In [40]:
# total commit counts for all and owner as list for last 52 weeks (index=0 is oldest week, to most recent)
response = requests.get('https://api.github.com/repos/statmike/vertex-ai-mlops/stats/participation', headers = {'Authorization': f'Bearer {pat}', 'Accept': 'application/vnd.github+json'})

#while response.status_code == 202:
#    time.sleep(30)
#    response = requests.get('https://api.github.com/repos/statmike/vertex-ai-mlops/stats/participation', headers = {'Authorization': f'Bearer {pat}', 'Accept': 'application/vnd.github+json'})
    
json.loads(response.text)

{'all': [5,
  3,
  0,
  0,
  0,
  21,
  1,
  0,
  2,
  6,
  11,
  6,
  2,
  0,
  9,
  14,
  9,
  0,
  13,
  4,
  2,
  0,
  1,
  0,
  1,
  12,
  3,
  0,
  0,
  2,
  3,
  3,
  0,
  0,
  10,
  9,
  15,
  10,
  9,
  12,
  2,
  22,
  34,
  30,
  29,
  45,
  17,
  11,
  13,
  13,
  7,
  6],
 'owner': [5,
  3,
  0,
  0,
  0,
  21,
  1,
  0,
  2,
  6,
  11,
  6,
  2,
  0,
  9,
  14,
  8,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  0,
  2,
  3,
  3,
  0,
  0,
  10,
  9,
  15,
  10,
  9,
  12,
  2,
  22,
  34,
  30,
  29,
  45,
  15,
  10,
  13,
  12,
  6,
  6]}

In [41]:
# clone stats for last 14 days
response = requests.get('https://api.github.com/repos/statmike/vertex-ai-mlops/traffic/clones', headers = {'Authorization': f'Bearer {pat}', 'Accept': 'application/vnd.github+json'})
json.loads(response.text)

{'count': 49,
 'uniques': 39,
 'clones': [{'timestamp': '2022-11-12T00:00:00Z', 'count': 2, 'uniques': 2},
  {'timestamp': '2022-11-13T00:00:00Z', 'count': 2, 'uniques': 1},
  {'timestamp': '2022-11-14T00:00:00Z', 'count': 15, 'uniques': 11},
  {'timestamp': '2022-11-15T00:00:00Z', 'count': 3, 'uniques': 3},
  {'timestamp': '2022-11-16T00:00:00Z', 'count': 2, 'uniques': 2},
  {'timestamp': '2022-11-17T00:00:00Z', 'count': 2, 'uniques': 2},
  {'timestamp': '2022-11-18T00:00:00Z', 'count': 3, 'uniques': 3},
  {'timestamp': '2022-11-19T00:00:00Z', 'count': 3, 'uniques': 3},
  {'timestamp': '2022-11-20T00:00:00Z', 'count': 2, 'uniques': 2},
  {'timestamp': '2022-11-21T00:00:00Z', 'count': 7, 'uniques': 5},
  {'timestamp': '2022-11-22T00:00:00Z', 'count': 2, 'uniques': 1},
  {'timestamp': '2022-11-23T00:00:00Z', 'count': 1, 'uniques': 1},
  {'timestamp': '2022-11-24T00:00:00Z', 'count': 1, 'uniques': 1},
  {'timestamp': '2022-11-25T00:00:00Z', 'count': 4, 'uniques': 3}]}

In [42]:
# top 10 popular content for previous 14 days
response = requests.get('https://api.github.com/repos/statmike/vertex-ai-mlops/traffic/popular/paths', headers = {'Authorization': f'Bearer {pat}', 'Accept': 'application/vnd.github+json'})
json.loads(response.text)

[{'path': '/statmike/vertex-ai-mlops',
  'title': 'statmike/vertex-ai-mlops: Google Cloud Platform Vertex AI end-to-end workflow...',
  'count': 435,
  'uniques': 185},
 {'path': '/statmike/vertex-ai-mlops/tree/main/02%20-%20Vertex%20AI%20AutoML',
  'title': 'vertex-ai-mlops/02 - Vertex AI AutoML at main · statmike/vertex-ai-mlops · Gi...',
  'count': 74,
  'uniques': 38},
 {'path': '/statmike/vertex-ai-mlops/tree/main/00%20-%20Setup',
  'title': 'vertex-ai-mlops/00 - Setup at main · statmike/vertex-ai-mlops · GitHub',
  'count': 57,
  'uniques': 33},
 {'path': '/statmike/vertex-ai-mlops/tree/main/05%20-%20TensorFlow',
  'title': 'vertex-ai-mlops/05 - TensorFlow at main · statmike/vertex-ai-mlops · GitHub',
  'count': 55,
  'uniques': 32},
 {'path': '/statmike/vertex-ai-mlops/blob/main/00%20-%20Setup/00%20-%20Environment%20Setup.ipynb',
  'title': 'vertex-ai-mlops/00 - Environment Setup.ipynb at main · statmike/vertex-ai-mlops',
  'count': 46,
  'uniques': 28},
 {'path': '/statmike/ver

In [43]:
# top 10 referral sources for last 14 days
response = requests.get('https://api.github.com/repos/statmike/vertex-ai-mlops/traffic/popular/referrers', headers = {'Authorization': f'Bearer {pat}', 'Accept': 'application/vnd.github+json'})
json.loads(response.text)

[{'referrer': 'youtube.com', 'count': 325, 'uniques': 75},
 {'referrer': 'github.com', 'count': 221, 'uniques': 38},
 {'referrer': 'Google', 'count': 146, 'uniques': 54},
 {'referrer': 'notebooks.githubusercontent.com', 'count': 25, 'uniques': 13},
 {'referrer': 'art-analytics.appspot.com', 'count': 7, 'uniques': 2},
 {'referrer': 'statics.teams.cdn.office.net', 'count': 4, 'uniques': 2},
 {'referrer': 'm.facebook.com', 'count': 3, 'uniques': 1}]

In [44]:
# number of views for last 14 days
response = requests.get('https://api.github.com/repos/statmike/vertex-ai-mlops/traffic/views', headers = {'Authorization': f'Bearer {pat}', 'Accept': 'application/vnd.github+json'})
json.loads(response.text)

{'count': 1511,
 'uniques': 253,
 'views': [{'timestamp': '2022-11-12T00:00:00Z', 'count': 32, 'uniques': 9},
  {'timestamp': '2022-11-13T00:00:00Z', 'count': 45, 'uniques': 13},
  {'timestamp': '2022-11-14T00:00:00Z', 'count': 123, 'uniques': 35},
  {'timestamp': '2022-11-15T00:00:00Z', 'count': 150, 'uniques': 33},
  {'timestamp': '2022-11-16T00:00:00Z', 'count': 128, 'uniques': 32},
  {'timestamp': '2022-11-17T00:00:00Z', 'count': 166, 'uniques': 41},
  {'timestamp': '2022-11-18T00:00:00Z', 'count': 74, 'uniques': 31},
  {'timestamp': '2022-11-19T00:00:00Z', 'count': 89, 'uniques': 13},
  {'timestamp': '2022-11-20T00:00:00Z', 'count': 147, 'uniques': 20},
  {'timestamp': '2022-11-21T00:00:00Z', 'count': 141, 'uniques': 29},
  {'timestamp': '2022-11-22T00:00:00Z', 'count': 78, 'uniques': 20},
  {'timestamp': '2022-11-23T00:00:00Z', 'count': 74, 'uniques': 21},
  {'timestamp': '2022-11-24T00:00:00Z', 'count': 156, 'uniques': 34},
  {'timestamp': '2022-11-25T00:00:00Z', 'count': 108, '

---
## IDEA

- Cloud Sheduler > PubSub > Cloud Function
    - Get Secret for PAT
    - Fetch from GitHub API
    - Store in BigQuery
    - Create Pub/Sub Topic
    - Create Cloud Schedular for each night 4AM to trigger Pub/Sub Topic
    - Write Cloud Function subscribed to Pub/Sub Topic that updates tables each night: insert, append
    - Trigger DataForm ELT process

---
### Create BigQuery Tables