# Exploring the GitHub Data inside the sqlite DB

In [1]:
from sqlalchemy import create_engine, func, desc, inspect
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
import sqlalchemy

In [2]:
# Create engine using the `gitlab.sqlite` database file
engine = create_engine("sqlite:///gitdb.sqlite")

In [3]:
# Declare a Base using `automap_base()`
Base = automap_base()

In [4]:
# Use the Base class to reflect the database tables
Base.prepare(engine, reflect=True)

In [5]:
# Print all of the classes mapped to the Base
Base.classes.keys()

['gitrepos']

In [6]:
# Create the inspector and connect it to the engine
inspector = inspect(engine)

In [7]:
# Collect the names of tables within the database
inspector.get_table_names()

['gitrepos']

In [8]:
# use inspector to remind yourself of the table structure. Display columns name and type
for i in inspector.get_columns('gitrepos'):
    print(i['name'], i['type'])

id INTEGER
name VARCHAR
html_url VARCHAR
description VARCHAR
updated_at VARCHAR
created_at VARCHAR
git_url VARCHAR
stargazers_count INTEGER
watchers_count INTEGER
language VARCHAR
forks INTEGER
search_topic VARCHAR
search_language VARCHAR


In [9]:
# Assign the dow class to a variable called `Git` 
# hint: Base.something.something

Git = Base.classes.gitrepos

In [10]:
# Create a session that binds to the DB
session = Session(engine)

In [11]:
# Display the row's columns and data in dictionary format
 session.query(Git).first().__dict__

{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x104980eb8>,
 'html_url': 'https://github.com/josephmisiti/awesome-machine-learning',
 'search_language': 'python',
 'forks': None,
 'id': 1,
 'stargazers_count': 42531,
 'created_at': '2014-07-15T19:11:19Z',
 'description': 'A curated list of awesome Machine Learning frameworks, libraries and software.',
 'name': 'awesome-machine-learning',
 'language': 'Python',
 'search_topic': 'machine+learning',
 'watchers_count': 42531,
 'git_url': 'git://github.com/josephmisiti/awesome-machine-learning.git',
 'updated_at': '2019-11-16T06:48:37Z'}

In [12]:
# Count number of records by language
session.query(Git.language, func.count(Git.id)).group_by(Git.language).all()

[('JavaScript', 60), ('Python', 60)]

In [13]:
# compare this with the search_lang you used. So, count number of records by search_language
session.query(Git.search_language, func.count(Git.id)).group_by(Git.search_language).all()

[('javascript', 60), ('python', 60)]

In [14]:
# show top 10 by stargazer count, display repo name, and stargazer counts
session.query(Git.name, Git.stargazers_count).\
                order_by(desc(Git.stargazers_count)).\
                limit(10).all()

[('vue', 152240),
 ('bootstrap', 136856),
 ('flask', 47524),
 ('express', 46178),
 ('django', 45246),
 ('awesome-machine-learning', 42531),
 ('scikit-learn', 38010),
 ('scrapy', 35081),
 ('serverless', 32813),
 ('phaser', 26074)]

In [77]:
# show top 10 by language and sort by stargazer count. You should Display repo name, repo language, description, and star count.
subq1 = session.query(Git.name, Git.description, Git.stargazers_count, Git.language).\
                filter(Git.language == 'Python').\
                order_by(desc(Git.stargazers_count)).\
                limit(10).subquery()

subq2 = session.query(Git.name, Git.description, Git.stargazers_count, Git.language).\
                filter(Git.language == 'JavaScript').\
                order_by(desc(Git.stargazers_count)).\
                limit(10).subquery()

In [78]:
session.query(subq1.c.name, subq1.c.description, subq1.c.stargazers_count, subq1.c.language).\
            union(session.query(subq2.c.name, subq2.c.description, subq2.c.stargazers_count, subq2.c.language)).\
            order_by(desc(subq1.c.stargazers_count)).all()


[('vue',
  'ðŸ–– Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.',
  152240,
  'JavaScript'),
 ('bootstrap',
  'The most popular HTML, CSS, and JavaScript framework for developing responsive, mobile first projects on the web.',
  136856,
  'JavaScript'),
 ('flask',
  'The Python micro framework for building web applications.',
  47524,
  'Python'),
 ('express',
  'Fast, unopinionated, minimalist web framework for node.',
  46178,
  'JavaScript'),
 ('django',
  'The Web framework for perfectionists with deadlines.',
  45246,
  'Python'),
 ('awesome-machine-learning',
  'A curated list of awesome Machine Learning frameworks, libraries and software.',
  42531,
  'Python'),
 ('scikit-learn', 'scikit-learn: machine learning in Python', 38010, 'Python'),
 ('scrapy',
  'Scrapy, a fast high-level web crawling & scraping framework for Python.',
  35081,
  'Python'),
 ('serverless',
  'Serverless Framework â€“ Build web, mobile and IoT applicatio