## Data structure

Here's what we want to be able to easily get: <br>
(1) List of players competing in game <br>
(2) List of games a player has played in <br>
(3) List of games based on date range <br>
(4) All stats for each game <br>
(5) Summation of stats for a particular player's games over a particular date range <br>

Tables <br>
(1) GameOverallStat (game id, date, team) <br>
(2) SkaterGameStat (keyed on game_id, player_id) <br>
(3) GoalieGameStat (keyed on game_id, player_id
(4) Player (keyed on player_id: contains position,name)
(5) Team

In [91]:
import sys
import pandas as pd
import sqlalchemy
import glob
from IPython.core import display as ICD

PLAYERGAME_DIR = "./Games/"
GAMESFILE = "./Games/2015Games.csv"
PLAYER_STATS_FILE = "PlayerStats"
HOME = "Home"
AWAY = "Away"

DATABASE_LOC = "test3.db"

TABLE_GAMEOVERALL = "GAME_OVERALL"
TABLE_PLAYERGAME = "PLAYER_GAME"

def get_playerstats_files(gamename):
    team1 = PLAYERGAME_DIR + gamename + '_' + PLAYER_STATS_FILE + HOME + '.csv'
    team2 = PLAYERGAME_DIR + gamename + '_' + PLAYER_STATS_FILE + AWAY + '.csv'
    return team1, team2

def gamefile_to_gamename(gamefile):
    gamename = str(gamefile)
    gamename = gamename.replace('.csv', '')
    gamename = gamename.replace("./Games\\", '')
    return gamename

def append_columns(game, team1, team2):
    d_game['GameName'] = gamename
    d_team1['GameName'] = gamename
    d_team2['GameName'] = gamename
    d_team1['Result'] = TEAM1
    
def get_gamename(game):
    return str(2015) + "_" + str(game[0])

def build_database(databasename):    
    engine = sqlalchemy.create_engine('sqlite:///' + databasename)
    d_games = pd.read_csv(GAMESFILE)
    gamenames = []
    dates = []
    for game in d_games.iterrows():
        gamename = get_gamename(game)
        gamenames.append(gamename)
        dt = datetime.datetime.strptime(game[1][0], '%Y-%m-%d')
        date = int(dt.timestamp())
        dates.append(date)
    d_games['DateTimestamp'] = dates
    d_games['GameName'] = gamenames
    d_games = d_games.drop('Notes', 1)
    d_games.OT = d_games.OT=="OT"
    d_games.to_sql(TABLE_GAMEOVERALL, engine, if_exists='replace')
    ICD.display(d_games[0:40])
    
build_database(DATABASE_LOC)

Unnamed: 0,Date,Visitor,G,Home,G.1,OT,Att.,LOG,DateTimestamp,GameName
0,2015-10-07,Vancouver Canucks,5,Calgary Flames,1,False,19289.0,2:32,1444194000,2015_0
1,2015-10-07,New York Rangers,3,Chicago Blackhawks,2,False,22104.0,2:28,1444194000,2015_1
2,2015-10-07,San Jose Sharks,5,Los Angeles Kings,1,False,18230.0,2:40,1444194000,2015_2
3,2015-10-07,Montreal Canadiens,3,Toronto Maple Leafs,1,False,19241.0,2:36,1444194000,2015_3
4,2015-10-08,Winnipeg Jets,6,Boston Bruins,2,False,17565.0,2:26,1444280400,2015_4
5,2015-10-08,Ottawa Senators,3,Buffalo Sabres,1,False,19070.0,2:37,1444280400,2015_5
6,2015-10-08,Minnesota Wild,5,Colorado Avalanche,4,False,18007.0,2:36,1444280400,2015_6
7,2015-10-08,Pittsburgh Penguins,0,Dallas Stars,3,False,18532.0,2:41,1444280400,2015_7
8,2015-10-08,Carolina Hurricanes,1,Nashville Predators,2,False,17204.0,2:25,1444280400,2015_8
9,2015-10-08,Edmonton Oilers,1,St. Louis Blues,3,False,19327.0,2:25,1444280400,2015_9


In [93]:
def games_in_daterange(date1, date2):
    date1 = int(date1.timestamp())
    date2 = int(date2.timestamp())
    engine = sqlalchemy.create_engine('sqlite:///' + DATABASE_LOC)
    sql_query = 'SELECT * from ' + TABLE_GAMEOVERALL + ' WHERE DateTimestamp >= ' + str(date1) + ' AND ' + 'DateTimestamp <= ' + str(date2)
    games = pd.read_sql_query(sql_query, engine)
    return games

date1 = datetime.datetime(2015, 10, 7)


Unnamed: 0,index,Date,Visitor,G,Home,1,OT,Unnamed: 8,LOG,DateTimestamp,GameName
0,0,2015-10-07,Vancouver Canucks,5,Calgary Flames,1,0,19289.0,2:32,1444194000,2015_0
1,1,2015-10-07,New York Rangers,3,Chicago Blackhawks,2,0,22104.0,2:28,1444194000,2015_1
2,2,2015-10-07,San Jose Sharks,5,Los Angeles Kings,1,0,18230.0,2:40,1444194000,2015_2
3,3,2015-10-07,Montreal Canadiens,3,Toronto Maple Leafs,1,0,19241.0,2:36,1444194000,2015_3
4,4,2015-10-08,Winnipeg Jets,6,Boston Bruins,2,0,17565.0,2:26,1444280400,2015_4
5,5,2015-10-08,Ottawa Senators,3,Buffalo Sabres,1,0,19070.0,2:37,1444280400,2015_5
6,6,2015-10-08,Minnesota Wild,5,Colorado Avalanche,4,0,18007.0,2:36,1444280400,2015_6
7,7,2015-10-08,Pittsburgh Penguins,0,Dallas Stars,3,0,18532.0,2:41,1444280400,2015_7
8,8,2015-10-08,Carolina Hurricanes,1,Nashville Predators,2,0,17204.0,2:25,1444280400,2015_8
9,9,2015-10-08,Edmonton Oilers,1,St. Louis Blues,3,0,19327.0,2:25,1444280400,2015_9
