In [24]:
import pandas as pd
import numpy as np
import re

In [25]:
cdata_all = pd.read_csv("Data/Football-Scenarios-DFE-832307.csv")

In [26]:
cdata_all.head()

Unnamed: 0,_unit_id,_golden,_unit_state,_trusted_judgments,_last_judgment_at,antecedent,antecedent:confidence,orig_antecedent,antecedent_gold,option1,option2,option3,option4,option5
0,831005673,False,finalized,5,11/20/15 20:20,kick a field goal,0.8092,It is first down and 10. The ball is on your o...,,punt,kick a field goal,run,pass,kneel down
1,831005674,False,finalized,5,11/18/15 21:59,kick a field goal,1.0,It is second down and inches. The ball is on y...,,punt,kick a field goal,run,pass,kneel down
2,831005675,False,finalized,5,11/20/15 22:43,kick a field goal,0.6211,It is second down and inches. The ball is on y...,,punt,kick a field goal,run,pass,kneel down
3,831005676,False,finalized,5,11/19/15 7:41,kick a field goal,0.8073,It is second down and inches. The ball is on y...,,punt,kick a field goal,run,pass,kneel down
4,831005677,False,finalized,5,11/21/15 8:01,kick a field goal,1.0,It is second down and inches. The ball is on y...,,punt,kick a field goal,run,pass,kneel down


In [27]:
len(cdata_all)

3730

In [28]:
cdata = cdata_all[cdata_all['_golden']==False]

In [29]:
len(cdata)

3706

In [30]:
cdata.orig_antecedent.str.split('.').head()


0    [It is first down and 10,  The ball is on your...
1    [It is second down and inches,  The ball is on...
2    [It is second down and inches,  The ball is on...
3    [It is second down and inches,  The ball is on...
4    [It is second down and inches,  The ball is on...
Name: orig_antecedent, dtype: object

In [31]:
split_scenarios = cdata.orig_antecedent.str.split('.').tolist()
split_scenarios = [scenario[0:-1] for scenario in split_scenarios]

 **down | yards to 1st down | field position | quarter | time left on the clock | score differential**

In [32]:
def convert_ordinal(value):
    if value == 'first':
        return 1
    elif value == 'second':
        return 2
    elif value == 'third':
        return 3
    elif value == 'fourth':
        return 4
    else:
        return value
    

In [38]:
def convert_clock(value):
    min = re.search('(.+?) minute', value)
    sec = re.search('(.+?) second', value)
    if min:
        return int(min.group(1)) * 60
    elif sec:
        return int(sec.group(1))
    else:
        return value

In [43]:
def convert_scoredelta(value):
    down_by = re.search('down by (.+?)', value)
    up_by = re.search('up by (.+?)', value)
    
    if down_by:
        return int(down_by.group(1)) * -1
    elif up_by:
        return int(up_by.group(1))
    else:
        return value

In [44]:
def convert_fieldpos(value):
    _list = value.split(' ')
    if len(_list) > 1:
        return int(_list[1])
    elif len(_list) == 1:
        return 100 - int(_list[0])
    else:
        return value

In [45]:
def extract(line):
    down = re.search('It is (.+?) down', line)
    ytg = re.search('down and (.+?). ',line)
    fieldpos = re.search("your (.+?) yardline", line)
    quarter = re.search('the (.+?) quarter', line)
    clock = re.search('There is (.+?) left', line)
    scoredelta = re.search('You are (.+?) points',line)
    
    extraction = (
        line,
        convert_ordinal(down.group(1)) if down else np.NaN,
        int(ytg.group(1).replace('inches','0')) if ytg else np.NaN,
        convert_fieldpos(fieldpos.group(1)) if fieldpos else np.NaN,
        convert_ordinal(quarter.group(1)) if quarter else np.NaN,
        convert_clock(clock.group(1)) if clock else np.NaN,
        convert_scoredelta(scoredelta.group(1)) if scoredelta else np.NaN
    )
    return extraction

In [46]:
data = [
    extract(scenario)
    for scenario in cdata.orig_antecedent
]

In [48]:
df = pd.DataFrame.from_records(data, columns=['scenario','down','ytg','fieldpos','quarter','clock','scoredelta'])
df

Unnamed: 0,scenario,down,ytg,fieldpos,quarter,clock,scoredelta
0,It is first down and 10. The ball is on your o...,1.0,10.0,20.0,2.0,3.0,-3.0
1,It is second down and inches. The ball is on y...,2.0,0.0,5.0,2.0,3.0,-3.0
2,It is second down and inches. The ball is on y...,2.0,0.0,20.0,2.0,3.0,-3.0
3,It is second down and inches. The ball is on y...,2.0,0.0,5.0,4.0,3.0,-3.0
4,It is second down and inches. The ball is on y...,2.0,0.0,20.0,4.0,3.0,-3.0
5,It is second down and inches. The ball is on y...,2.0,0.0,55.0,4.0,3.0,-7.0
6,It is second down and inches. The ball is on y...,2.0,0.0,80.0,4.0,3.0,-7.0
7,It is second down and 3. The ball is on your o...,2.0,3.0,5.0,4.0,3.0,-3.0
8,It is second down and 3. The ball is on your o...,2.0,3.0,20.0,4.0,3.0,-3.0
9,It is second down and 3. The ball is on your o...,2.0,3.0,20.0,2.0,3.0,-7.0


In [52]:
cdata.loc[:,['scenario','down','ytg','fieldpos','quarter','clock','scoredelta']]=df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [53]:
cdata

Unnamed: 0,_unit_id,_golden,_unit_state,_trusted_judgments,_last_judgment_at,antecedent,antecedent:confidence,orig_antecedent,antecedent_gold,option1,...,option3,option4,option5,scenario,down,ytg,fieldpos,quarter,clock,scoredelta
0,831005673,False,finalized,5,11/20/15 20:20,kick a field goal,0.8092,It is first down and 10. The ball is on your o...,,punt,...,run,pass,kneel down,It is first down and 10. The ball is on your o...,1.0,10.0,20.0,2.0,3.0,-3.0
1,831005674,False,finalized,5,11/18/15 21:59,kick a field goal,1.0000,It is second down and inches. The ball is on y...,,punt,...,run,pass,kneel down,It is second down and inches. The ball is on y...,2.0,0.0,5.0,2.0,3.0,-3.0
2,831005675,False,finalized,5,11/20/15 22:43,kick a field goal,0.6211,It is second down and inches. The ball is on y...,,punt,...,run,pass,kneel down,It is second down and inches. The ball is on y...,2.0,0.0,20.0,2.0,3.0,-3.0
3,831005676,False,finalized,5,11/19/15 7:41,kick a field goal,0.8073,It is second down and inches. The ball is on y...,,punt,...,run,pass,kneel down,It is second down and inches. The ball is on y...,2.0,0.0,5.0,4.0,3.0,-3.0
4,831005677,False,finalized,5,11/21/15 8:01,kick a field goal,1.0000,It is second down and inches. The ball is on y...,,punt,...,run,pass,kneel down,It is second down and inches. The ball is on y...,2.0,0.0,20.0,4.0,3.0,-3.0
5,831005678,False,finalized,5,11/21/15 18:25,pass,1.0000,It is second down and inches. The ball is on y...,,punt,...,run,pass,kneel down,It is second down and inches. The ball is on y...,2.0,0.0,55.0,4.0,3.0,-7.0
6,831005679,False,finalized,5,11/20/15 20:31,pass,1.0000,It is second down and inches. The ball is on y...,,punt,...,run,pass,kneel down,It is second down and inches. The ball is on y...,2.0,0.0,80.0,4.0,3.0,-7.0
7,831005680,False,finalized,5,11/20/15 19:10,kick a field goal,0.7953,It is second down and 3. The ball is on your o...,,punt,...,run,pass,kneel down,It is second down and 3. The ball is on your o...,2.0,3.0,5.0,4.0,3.0,-3.0
8,831005681,False,finalized,5,11/20/15 18:20,kick a field goal,1.0000,It is second down and 3. The ball is on your o...,,punt,...,run,pass,kneel down,It is second down and 3. The ball is on your o...,2.0,3.0,20.0,4.0,3.0,-3.0
9,831005682,False,finalized,5,11/20/15 9:58,pass,0.5874,It is second down and 3. The ball is on your o...,,punt,...,run,pass,kneel down,It is second down and 3. The ball is on your o...,2.0,3.0,20.0,2.0,3.0,-7.0


In [19]:
cdata[(cdata.clock <= 10)&(cdata.down == 1)&(cdata.fieldpos)]

Unnamed: 0,_unit_id,_golden,_unit_state,_trusted_judgments,_last_judgment_at,antecedent,antecedent:confidence,orig_antecedent,antecedent_gold,option1,...,option3,option4,option5,scenario,down,ytg,fieldpos,quarter,clock,scoredelta
0,831005673,False,finalized,5,11/20/15 20:20,kick a field goal,0.8092,It is first down and 10. The ball is on your o...,,punt,...,run,pass,kneel down,It is first down and 10. The ball is on your o...,1.0,10.0,20.0,2.0,3.0,-3.0
24,831005697,False,finalized,5,11/19/15 17:44,kick a field goal,0.5694,It is first down and 10. The ball is on your o...,,punt,...,run,pass,kneel down,It is first down and 10. The ball is on your o...,1.0,10.0,40.0,2.0,3.0,-3.0
25,831005698,False,finalized,5,11/19/15 17:05,pass,0.8155,It is first down and 10. The ball is on your 4...,,punt,...,run,pass,kneel down,It is first down and 10. The ball is on your 4...,1.0,10.0,55.0,2.0,3.0,-3.0
26,831005699,False,finalized,5,11/19/15 2:55,pass,0.3762,It is first down and 10. The ball is on your 2...,,punt,...,run,pass,kneel down,It is first down and 10. The ball is on your 2...,1.0,10.0,80.0,2.0,3.0,-3.0
39,831005712,False,finalized,5,11/20/15 20:00,kick a field goal,1.0,It is first down and 10. The ball is on your o...,,punt,...,run,pass,kneel down,It is first down and 10. The ball is on your o...,1.0,10.0,20.0,4.0,3.0,-3.0
40,831005713,False,finalized,5,11/20/15 21:31,pass,0.6238,It is first down and 10. The ball is on your o...,,punt,...,run,pass,kneel down,It is first down and 10. The ball is on your o...,1.0,10.0,40.0,4.0,3.0,-3.0
41,831005714,False,finalized,5,11/21/15 16:34,pass,0.8247,It is first down and 10. The ball is on your 4...,,punt,...,run,pass,kneel down,It is first down and 10. The ball is on your 4...,1.0,10.0,55.0,4.0,3.0,-3.0
42,831005715,False,finalized,5,11/20/15 16:01,pass,0.8031,It is first down and 10. The ball is on your 2...,,punt,...,run,pass,kneel down,It is first down and 10. The ball is on your 2...,1.0,10.0,80.0,4.0,3.0,-3.0
56,831005729,False,finalized,5,11/18/15 22:07,kick a field goal,1.0,It is first down and 10. The ball is on your o...,,punt,...,run,pass,kneel down,It is first down and 10. The ball is on your o...,1.0,10.0,20.0,2.0,3.0,-7.0
57,831005730,False,finalized,5,11/20/15 19:04,kick a field goal,0.5984,It is first down and 10. The ball is on your o...,,punt,...,run,pass,kneel down,It is first down and 10. The ball is on your o...,1.0,10.0,40.0,2.0,3.0,-7.0
