In [1]:
import numpy as np
import sklearn as sk
import pandas as pd
import sqlite3

In [2]:
nfl2016 = pd.read_csv('../data/pbp-2016.csv')

In [3]:
var_list = ['GameDate', 'Quarter', 'Minute', 'Second', 'OffenseTeam',
       'DefenseTeam', 'Down', 'ToGo', 'YardLine', 'Description',
       'SeasonYear', 'Yards', 'Formation',
       'PlayType', 'IsRush', 'IsPass', 'IsIncomplete', 'IsTouchdown',
       'PassType', 'IsSack']

nfl2016 = nfl2016[var_list]

In [4]:
nfl2016 = nfl2016.dropna(subset=['OffenseTeam'])

In [2]:
def feat_pass_yrds(row):
    return row.IsPass * row.Yards

def feat_rush_yrds(row):
    return row.IsRush * row.Yards

def feat_pass_tds(row):
    return row.IsPass * row.IsTouchdown

def feat_rush_tds(row):
    return row.IsRush * row.IsTouchdown

In [6]:
nfl2016['PassYrds'] = nfl2016.apply(feat_pass_yrds, axis=1)
nfl2016['RushYrds'] = nfl2016.apply(feat_rush_yrds, axis=1)
nfl2016['PassTDs'] = nfl2016.apply(feat_pass_tds, axis=1)
nfl2016['RushTDs'] = nfl2016.apply(feat_rush_tds, axis=1)

In [7]:
off_vars = ['SeasonYear', 'Yards', 'IsRush', 'IsPass', 'IsIncomplete', 
            'PassYrds', 'RushYrds', 'PassTDs', 'RushTDs', 'IsTouchdown']

teams_off_counts = nfl2016.groupby('OffenseTeam').size()
teams_off = nfl2016.groupby('OffenseTeam')
teams_off = pd.DataFrame(teams_off.agg({'SeasonYear': 'first', 'Yards': 'sum', 'IsRush': 'sum', 'IsPass': 'sum', 
                                        'IsIncomplete': 'sum', 'PassYrds': 'sum', 'RushYrds': 'sum', 'PassTDs': 'sum', 
                                        'RushTDs': 'sum', 'IsTouchdown': 'sum'})[off_vars])
teams_off['Plays'] = teams_off_counts
teams_off.reset_index(level=0, inplace=True)
teams_off = teams_off.rename(columns={'OffenseTeam': 'Team', 'SeasonYear': 'Season', 'IsRush': 'Rushes', 'IsPass': 'Passes', 
                                      'IsIncomplete': 'Incompletes', 'IsTouchdown': 'TDs'})

In [3]:
def get_off_stats(filename):
    nfl = pd.read_csv(filename)
    var_list = ['GameDate', 'Quarter', 'Minute', 'Second', 'OffenseTeam',
       'DefenseTeam', 'Down', 'ToGo', 'YardLine', 'Description',
       'SeasonYear', 'Yards', 'Formation',
       'PlayType', 'IsRush', 'IsPass', 'IsIncomplete', 'IsTouchdown',
       'PassType', 'IsSack']

    nfl = nfl[var_list]
    nfl = nfl.dropna(subset=['OffenseTeam'])
    
    nfl['PassYrds'] = nfl.apply(feat_pass_yrds, axis=1)
    nfl['RushYrds'] = nfl.apply(feat_rush_yrds, axis=1)
    nfl['PassTDs'] = nfl.apply(feat_pass_tds, axis=1)
    nfl['RushTDs'] = nfl.apply(feat_rush_tds, axis=1)
    
    off_vars = ['SeasonYear', 'Yards', 'IsRush', 'IsPass', 'IsIncomplete', 
            'PassYrds', 'RushYrds', 'PassTDs', 'RushTDs', 'IsTouchdown']
    
    teams_off_counts = nfl.groupby('OffenseTeam').size()
    teams_off = nfl.groupby('OffenseTeam')
    teams_off = pd.DataFrame(teams_off.agg({'SeasonYear': 'first', 'Yards': 'sum', 'IsRush': 'sum', 'IsPass': 'sum', 
                                        'IsIncomplete': 'sum', 'PassYrds': 'sum', 'RushYrds': 'sum', 'PassTDs': 'sum', 
                                        'RushTDs': 'sum', 'IsTouchdown': 'sum'})[off_vars])
    teams_off['Plays'] = teams_off_counts
    teams_off.reset_index(level=0, inplace=True)
    teams_off = teams_off.rename(columns={'OffenseTeam': 'Team', 'SeasonYear': 'Season', 'IsRush': 'Rushes', 'IsPass': 'Passes', 
                                      'IsIncomplete': 'Incompletes', 'IsTouchdown': 'TDs'})
    
    return teams_off

In [4]:
df = get_off_stats('../data/pbp-2016.csv')
df = df.append(get_off_stats('../data/pbp-2015.csv'))
df = df.append(get_off_stats('../data/pbp-2014.csv'))

In [5]:
conn = sqlite3.connect('fantasy.db')
c = conn.cursor()

In [9]:
df.head()

Unnamed: 0,Team,Season,Yards,Rushes,Passes,Incompletes,PassYrds,RushYrds,PassTDs,RushTDs,TDs,Plays
0,ARI,2016,5803,365,647,255,4487,1627,27,19,50,1324
1,ATL,2016,6456,393,526,159,4847,1865,36,19,55,1240
2,BAL,2016,5789,345,658,222,4478,1551,21,9,31,1311
3,BUF,2016,5695,458,472,180,3245,2647,18,29,47,1250
4,CAR,2016,5718,426,563,252,4215,1760,26,16,43,1293


In [6]:
c.execute('''CREATE TABLE OffenseSeason
             (Team VARCHAR(3), 
             Season CHARACTER(4), 
             Yards SMALLINT,
             Rushes SMALLINT,
             Passes SMALLINT,
             Incompletes SMALLINT,
             PassYrds SMALLINT,
             RushYrds SMALLINT,
             PassTDs SMALLINT,
             RushTDs SMALLINT,
             TDs SMALLINT,
             Plays SMALLINT,
             UNIQUE (Team, Season))''')

<sqlite3.Cursor at 0x8bb6420>

In [26]:
for ndx in range(len(df)):
    print(str(tuple(df.iloc[ndx].astype('str'))))
    try:
        c.execute('INSERT INTO OffenseSeason VALUES (?,?,?,?,?,?,?,?,?,?,?,?)', tuple(df.iloc[ndx].astype('str'))) 
    except:
        print(str(df.iloc[ndx]['Team'])+' '+str(df.iloc[ndx]['Season'])+' already in table')

('ARI', '2016', '5803', '365', '647', '255', '4487', '1627', '27', '19', '50', '1324')
ARI 2016 already in table
('ATL', '2016', '6456', '393', '526', '159', '4847', '1865', '36', '19', '55', '1240')
ATL 2016 already in table
('BAL', '2016', '5789', '345', '658', '222', '4478', '1551', '21', '9', '31', '1311')
BAL 2016 already in table
('BUF', '2016', '5695', '458', '472', '180', '3245', '2647', '18', '29', '47', '1250')
BUF 2016 already in table
('CAR', '2016', '5718', '426', '563', '252', '4215', '1760', '26', '16', '43', '1293')
CAR 2016 already in table
('CHI', '2016', '5918', '334', '573', '206', '4504', '1584', '19', '10', '31', '1163')
CHI 2016 already in table
('CIN', '2016', '5606', '400', '563', '197', '4216', '1664', '17', '15', '32', '1254')
CIN 2016 already in table
('CLE', '2016', '4567', '267', '557', '224', '3687', '1296', '14', '12', '29', '1172')
CLE 2016 already in table
('DAL', '2016', '6124', '479', '473', '153', '3779', '2503', '27', '25', '52', '1244')
DAL 2016 a

In [25]:
for row in c.execute('SELECT * FROM OffenseSeason'):
    print(row)

('ARI', b'\xe0\x07\x00\x00\x00\x00\x00\x00', b'\xab\x16\x00\x00\x00\x00\x00\x00', b'm\x01\x00\x00\x00\x00\x00\x00', b'\x87\x02\x00\x00\x00\x00\x00\x00', b'\xff\x00\x00\x00\x00\x00\x00\x00', b'\x87\x11\x00\x00\x00\x00\x00\x00', b'[\x06\x00\x00\x00\x00\x00\x00', b'\x1b\x00\x00\x00\x00\x00\x00\x00', b'\x13\x00\x00\x00\x00\x00\x00\x00', b'2\x00\x00\x00\x00\x00\x00\x00', b',\x05\x00\x00\x00\x00\x00\x00')
('ATL', b'\xe0\x07\x00\x00\x00\x00\x00\x00', b'8\x19\x00\x00\x00\x00\x00\x00', b'\x89\x01\x00\x00\x00\x00\x00\x00', b'\x0e\x02\x00\x00\x00\x00\x00\x00', b'\x9f\x00\x00\x00\x00\x00\x00\x00', b'\xef\x12\x00\x00\x00\x00\x00\x00', b'I\x07\x00\x00\x00\x00\x00\x00', b'$\x00\x00\x00\x00\x00\x00\x00', b'\x13\x00\x00\x00\x00\x00\x00\x00', b'7\x00\x00\x00\x00\x00\x00\x00', b'\xd8\x04\x00\x00\x00\x00\x00\x00')
('BAL', b'\xe0\x07\x00\x00\x00\x00\x00\x00', b'\x9d\x16\x00\x00\x00\x00\x00\x00', b'Y\x01\x00\x00\x00\x00\x00\x00', b'\x92\x02\x00\x00\x00\x00\x00\x00', b'\xde\x00\x00\x00\x00\x00\x00\x00', b'~\

In [28]:
conn.close()