In [17]:
from collections import namedtuple
import csv
import os
import sqlite3
import pandas as pd

import requests

DATA_URL = 'https://query.data.world/s/ezwk64ej624qyverrw6x7od7co7ftm'
DATA_CACHED = 'nba.data'
NBA_DB = 'nba.db'

In [18]:
# start clean
if os.path.isfile(NBA_DB):
    os.remove(NBA_DB)

Player = namedtuple('Player', ('name year first_year team college active '
                               'games avg_min avg_points'))

conn = sqlite3.connect(NBA_DB)
cur = conn.cursor()

In [19]:
def _get_csv_data():
    """GIVEN:
       Load in CSV data in from remote URL or local cache file"""
    if os.path.isfile(DATA_CACHED):
        with open(DATA_CACHED) as f:
            return f.read()
    else:
        with requests.Session() as session:
            return session.get(DATA_URL).content.decode('utf-8')


def load_data():
    """GIVEN:
       Converts NBA CSV data into a list of Player namedtuples"""
    content = _get_csv_data()
    reader = csv.DictReader(content.splitlines(), delimiter=',')
    for row in reader:
        player = Player(name=row['Player'],
                        year=row['Draft_Yr'],
                        first_year=row['first_year'],
                        team=row['Team'],
                        college=row['College'],
                        active=row['Yrs'],
                        games=row['Games'],
                        avg_min=row['Minutes.per.Game'],
                        avg_points=row['Points.per.Game'])
        yield player

# CODE HERE (tests under __main__):

In [20]:
cur.execute("""
    CREATE TABLE players (
        name, 
        year, 
        first_year, 
        team, 
        college,
        active, 
        games, 
        avg_min, 
        avg_points)""")

<sqlite3.Cursor at 0x6ee6ffa0>

In [21]:
players = list(load_data())
cur.executemany("""INSERT INTO players 
                          (name, year, first_year, team, college, active, games, avg_min, avg_points) 
                   VALUES (   ?,    ?,          ?,    ?,       ?,      ?,     ?,       ?,          ?)""", players)



<sqlite3.Cursor at 0x6ee6ffa0>

In [24]:
df = pd.read_sql("SELECT * FROM players", con=conn);
df.head()

Unnamed: 0,name,year,first_year,team,college,active,games,avg_min,avg_points
0,Robert Parish,1976,0,GSW,Centenary College of Louisiana,21,1611,28.4,14.5
1,Sonny Parker,1976,0,GSW,Texas A&M University,6,452,24.2,9.9
2,Marshall Rogers,1976,0,GSW,University of Texas-Pan American,1,26,6.8,3.8
3,Jeff Fosnes,1976,0,GSW,Vanderbilt University,0,0,0.0,0.0
4,Carl Bird,1976,0,GSW,University of California,0,0,0.0,0.0


In [26]:
type(df["name"])

pandas.core.series.Series

In [None]:
def import_to_db(players=None):
    """Create database table in sqlite3 and import the players data

       required table SQL:
       CREATE TABLE players (name, year, first_year, team, college,
                             active, games, avg_min, avg_points)
    """
    if players is None:
        players = list(load_data())

    # you code ...


def player_with_max_points_per_game():
    """The player with highest average points per game (don't forget to CAST to
       numeric in your SQL query)"""
    pass


def number_of_players_from_duke():
    """Return the number of players with college == Duke University"""
    pass


def percentage_of_players_first_year():
    """Return 2 digit percentage of players whose first year it is
       (first_year column)"""
    pass


def avg_years_active_players_stanford():
    """Return the average years that players from "Stanford University
       are active ("active" column)"""
    pass


def year_with_most_drafts():
    """Return the year with the most drafts, in SQL you can use GROUP BY"""
    pass


def most_games_per_year_for_veterans():
    """Top 6 players that are > 10 years active, that have the
       highest # games / year"""
    pass


if __name__ == '__main__':
    import_to_db()

    # A. check if the import went well
    def _verify_total_row_count_after_import():
        sql = '''SELECT COUNT(*) FROM players'''
        cur.execute(sql)
        ret = cur.fetchall()
        return ret[0][0]

    assert _verify_total_row_count_after_import() == 3961

    # B. some simple asserts of the data analysis functions
    assert player_with_max_points_per_game() == 'Michael Jordan'

    assert number_of_players_from_duke() == 58

    assert round(avg_years_active_players_stanford(), 2) == 4.58

    assert round(percentage_of_players_first_year(), 2) == 1.51

    assert int(year_with_most_drafts()) == 1984

    expected = ['A.C. Green', 'Alex English', 'Jack Sikma',
                'John Stockton', 'Mark Eaton', 'Terry Tyler']
    assert sorted(most_games_per_year_for_veterans()) == expected
