# GitHub API and SQlAlchemy
We will be using GitHub API to query GitHub repos, and store the data to a SQLite DB.

In [37]:
import requests
from requests.auth import HTTPBasicAuth
import json

In [38]:
# We will be searching two topics for both Python and JavaScript
search_topics = ['machine+learning', 'web+framework']
search_languages = ['python', 'javascript']

In [39]:
# Check out Github API documentation at https://developer.github.com/v3/search/#constructing-a-search-query
baseURL = 'https://api.github.com/search/repositories?q='

In [40]:
# Import sqlalchemy methods that we will need 
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, String, Float
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session

In [41]:
# your github username and password to authenticate 
user = 'justin.t.foust@gmail.com'
password = '7EfQ0485Uj74'

In [42]:
# Create Database Connection
# ----------------------------------
# Creates a connection to our DB which we will name 'gitdb' and it is a SQLite database
engine = create_engine('sqlite:///Data/gitdb.sqlite')

In [43]:
# Create a Base to utilize declarative base in SQLAlchemy
Base = declarative_base()

In [44]:
# Creates a Class which will serve as the anchor points for our Table
# name the class GitHubRepos and the table name to be 'gitrepos'
#
# List of columns:
#  id (this is the primary key), name, html_url, description, updated_at, created_at, git_url, stargazers_count,
#     watchers_count, language, forks, search_topic, search_language

class GitHubRepos(Base):
    __tablename__ = 'gitrepos'
    id = Column(Integer, primary_key=True)
    name = Column(String)
    html_url = Column(String)
    description = Column(String)
    updated_at = Column(String)
    created_at = Column(String)
    git_url = Column(String)
    stargazers_count = Column(Integer)
    watchers_count = Column(Integer)
    language = Column(String)
    forks = Column(String)
    search_topic = Column(String)
    search_language = Column(String)
    # finish it up

In [45]:
# display tables in local metadata 
Base.metadata.tables

immutabledict({'gitrepos': Table('gitrepos', MetaData(bind=None), Column('id', Integer(), table=<gitrepos>, primary_key=True, nullable=False), Column('name', String(), table=<gitrepos>), Column('html_url', String(), table=<gitrepos>), Column('description', String(), table=<gitrepos>), Column('updated_at', String(), table=<gitrepos>), Column('created_at', String(), table=<gitrepos>), Column('git_url', String(), table=<gitrepos>), Column('stargazers_count', Integer(), table=<gitrepos>), Column('watchers_count', Integer(), table=<gitrepos>), Column('language', String(), table=<gitrepos>), Column('forks', String(), table=<gitrepos>), Column('search_topic', String(), table=<gitrepos>), Column('search_language', String(), table=<gitrepos>), schema=None)})

In [46]:
# Create a "Metadata" Layer That Abstracts our SQL Database. 
# Hint: this is when we reference engine to create the tables that currently reside localy in our code layer
Base.metadata.create_all(engine)

In [47]:
# Create a session that binds to our DB
session = Session(bind=engine)

In [90]:
# make requests and add rows into the DBs
# Hint: maybe you can leverage nested loops since we have two lists ?! 
# Hint: remember you need to write specific data to the DB based on our table structure
# Hint: remember how we parse our data when we make an API request 
# Hint: use the example URL below and check out where the variables are placed
#url = f'https://api.github.com/search/repositories?q={search_topics}+language:{search_languages}&sort=stars&order=desc'
# Hint: to pass authentication infomration 
#r = requests.get(url, auth=HTTPBasicAuth(user,password)).json()

for topic in search_topics:
    for lan in search_languages:
        url = f'https://api.github.com/search/repositories?q={topic}+language:{lan}&sort=stars&order=desc'
        r = requests.get(url, auth=HTTPBasicAuth(user,password))
        if r.ok:
            for i in r.json()['items']:
                                session.add(GitHubRepos(name=i['name'],
                                                       html_url=i['html_url'],
                                                       description=i['description'],
                                                       updated_at=i['updated_at'],
                                                       created_at=i['created_at'],
                                                       git_url=i['git_url'],
                                                       stargazers_count=i['stargazers_count'],
                                                       watchers_count=i['watchers_count'],
                                                       language=i['language'],
                                                       forks=i['forks'],
                                                       search_topic=topic,
                                                       search_language=lan))
                               
                                session.commit()

In [75]:
entry = GitHubRepos(name=r['items'][0]['name'], html_url=r['items'][0]['html_url'], description=r['items'][0]['description'], updated_at=r['items'][0]['updated_at'], created_at=r['items'][0]['created_at'], git_url=r['items'][0]['git_url'], stargazers_count=r['items'][0]['stargazers_count'], watchers_count=r['items'][0]['watchers_count'], language=r['items'][0]['language'], forks=r['items'][0]['forks'], search_topic='machine learning and Web Framework', search_language='Python and ')

KeyError: 'search_topic'

In [80]:
r

{'total_count': 2,
 'incomplete_results': False,
 'items': [{'id': 81451011,
   'node_id': 'MDEwOlJlcG9zaXRvcnk4MTQ1MTAxMQ==',
   'name': 'Detecting-Twitter-Bots-using-Machine-Learning',
   'full_name': 'emmethanratty/Detecting-Twitter-Bots-using-Machine-Learning',
   'private': False,
   'owner': {'login': 'emmethanratty',
    'id': 6748655,
    'node_id': 'MDQ6VXNlcjY3NDg2NTU=',
    'avatar_url': 'https://avatars1.githubusercontent.com/u/6748655?v=4',
    'gravatar_id': '',
    'url': 'https://api.github.com/users/emmethanratty',
    'html_url': 'https://github.com/emmethanratty',
    'followers_url': 'https://api.github.com/users/emmethanratty/followers',
    'following_url': 'https://api.github.com/users/emmethanratty/following{/other_user}',
    'gists_url': 'https://api.github.com/users/emmethanratty/gists{/gist_id}',
    'starred_url': 'https://api.github.com/users/emmethanratty/starred{/owner}{/repo}',
    'subscriptions_url': 'https://api.github.com/users/emmethanratty/subscri

In [73]:
r['items'][0]

{'id': 81451011,
 'node_id': 'MDEwOlJlcG9zaXRvcnk4MTQ1MTAxMQ==',
 'name': 'Detecting-Twitter-Bots-using-Machine-Learning',
 'full_name': 'emmethanratty/Detecting-Twitter-Bots-using-Machine-Learning',
 'private': False,
 'owner': {'login': 'emmethanratty',
  'id': 6748655,
  'node_id': 'MDQ6VXNlcjY3NDg2NTU=',
  'avatar_url': 'https://avatars1.githubusercontent.com/u/6748655?v=4',
  'gravatar_id': '',
  'url': 'https://api.github.com/users/emmethanratty',
  'html_url': 'https://github.com/emmethanratty',
  'followers_url': 'https://api.github.com/users/emmethanratty/followers',
  'following_url': 'https://api.github.com/users/emmethanratty/following{/other_user}',
  'gists_url': 'https://api.github.com/users/emmethanratty/gists{/gist_id}',
  'starred_url': 'https://api.github.com/users/emmethanratty/starred{/owner}{/repo}',
  'subscriptions_url': 'https://api.github.com/users/emmethanratty/subscriptions',
  'organizations_url': 'https://api.github.com/users/emmethanratty/orgs',
  'repos_

In [91]:
# to verify: Display first row,  all columns in dictionary format
session.query(GitHubRepos).first().__dict__

{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x24ef04a6a58>,
 'language': 'Python',
 'stargazers_count': 42535,
 'created_at': '2014-07-15T19:11:19Z',
 'description': 'A curated list of awesome Machine Learning frameworks, libraries and software.',
 'name': 'awesome-machine-learning',
 'search_language': 'python',
 'forks': '10771',
 'watchers_count': 42535,
 'git_url': 'git://github.com/josephmisiti/awesome-machine-learning.git',
 'id': 1,
 'html_url': 'https://github.com/josephmisiti/awesome-machine-learning',
 'updated_at': '2019-11-16T10:30:38Z',
 'search_topic': 'machine+learning'}

In [None]:
# write you query to display first 5 reocrds and just display name and description
