# GitHub API and SQlAlchemy
We will be using GitHub API to query GitHub repos, and store the data to a SQLite DB.

In [1]:
import requests
from requests.auth import HTTPBasicAuth
import json
from githubinfo import password, username

In [2]:
search_topics = ['machine+learning', 'web+framework']
search_languages = ['python', 'javascript']

In [3]:
baseURL = 'https://api.github.com/search/repositories?q='

In [4]:
# Import sqlalchemy methods that we will need. 
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, String, Float
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session

In [8]:
# your github username and password
user = 'justin.t.foust@gmail.com'
password = '7EfQ0485Uj74'

In [9]:
# Create Database Connection
# ----------------------------------
# Creates a connection to our DB
engine = create_engine('sqlite:///gitdb.sqlite')

In [10]:
# Create a Base to utilize declarative base in SQL Alchemy
Base = declarative_base()

In [11]:
# Creates a Class which will serve as the anchor points for our Table
# name the class GitHubRepos and the table name to be 'gitrepos'
# here are the columns:
#  id, name, html_url, description, updated_at, created_at, git_url, stargazers_count,
#     watchers_count, language, forks, search_topic, search_language

class GitHubRepos(Base):
    __tablename__ = 'gitrepos'
    id = Column(Integer, primary_key=True)
    name = Column(String)
    html_url = Column(String)
    description = Column(String)
    updated_at = Column(String)
    created_at = Column(String)
    git_url = Column(String)
    stargazers_count = Column(Integer)
    watchers_count = Column(Integer)
    language = Column(String)
    forks = Column(Integer)
    search_topic = Column(String)
    search_language = Column(String)

In [12]:
# display tables in local metadata
Base.metadata.tables

immutabledict({'gitrepos': Table('gitrepos', MetaData(bind=None), Column('id', Integer(), table=<gitrepos>, primary_key=True, nullable=False), Column('name', String(), table=<gitrepos>), Column('html_url', String(), table=<gitrepos>), Column('description', String(), table=<gitrepos>), Column('updated_at', String(), table=<gitrepos>), Column('created_at', String(), table=<gitrepos>), Column('git_url', String(), table=<gitrepos>), Column('stargazers_count', Integer(), table=<gitrepos>), Column('watchers_count', Integer(), table=<gitrepos>), Column('language', String(), table=<gitrepos>), Column('forks', Integer(), table=<gitrepos>), Column('search_topic', String(), table=<gitrepos>), Column('search_language', String(), table=<gitrepos>), schema=None)})

In [13]:
# Create a "Metadata" Layer That Abstracts our SQL Database 
Base.metadata.create_all(engine)

In [14]:
# Create a session that binds to our DB
session = Session(bind=engine)

In [15]:
# make requests and add rows into the DBs
# Hint: maybe you can leverage nested loops
# Hint: remember you need to write specific data to the DB based on our table structure
# Hint: remember how we parse our data when we make an API request 
for topic in search_topics:
    for lang in search_languages:
        url = f'https://api.github.com/search/repositories?q={topic}+language:{lang}&sort=stars&order=desc'
        r = requests.get(url, auth=HTTPBasicAuth(user,password))
        if r.ok:
            for i in r.json()['items']:
                session.add(GitHubRepos(name=i['name'],
                            html_url=i['html_url'],
                            description=i['description'],
                            updated_at=i['updated_at'],
                            created_at=i['created_at'],
                            git_url=i['git_url'],
                            stargazers_count=i['stargazers_count'],
                            watchers_count=i['watchers_count'],
                            language=i['language'],
                            search_topic=topic,
                            search_language=lang))
                session.commit()

In [16]:
# to verify: Display first row,  all columns in dictionary format
session.query(GitHubRepos).first().__dict__

{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x1e9e7823ef0>,
 'search_language': 'python',
 'forks': None,
 'watchers_count': 42535,
 'git_url': 'git://github.com/josephmisiti/awesome-machine-learning.git',
 'updated_at': '2019-11-16T10:30:38Z',
 'description': 'A curated list of awesome Machine Learning frameworks, libraries and software.',
 'name': 'awesome-machine-learning',
 'search_topic': 'machine+learning',
 'language': 'Python',
 'stargazers_count': 42535,
 'created_at': '2014-07-15T19:11:19Z',
 'html_url': 'https://github.com/josephmisiti/awesome-machine-learning',
 'id': 1}

In [17]:
# show first 5 reocrds and just display name and description
session.query(GitHubRepos.name, GitHubRepos.description).limit(5).all()

[('awesome-machine-learning',
  'A curated list of awesome Machine Learning frameworks, libraries and software.'),
 ('scikit-learn', 'scikit-learn: machine learning in Python'),
 ('ML-From-Scratch',
  'Machine Learning From Scratch. Bare bones NumPy implementations of machine learning models and algorithms with a focus on accessibility. Aims to cover everything from linear regression to deep learning.'),
 ('DeepFaceLab',
  'DeepFaceLab is a tool that utilizes machine learning to replace faces in videos. Includes prebuilt ready to work standalone Windows 7,8,10 binary (look readme.md).'),
 ('ChatterBot',
  'ChatterBot is a machine learning, conversational dialog engine for creating chat bots')]