In [30]:
#USER INPUT HERE
modweight = 0.4 #change this value

#change the name of the filename being used as the mod_epv for the current matchup
from mod_epv import mod_epv as m_epv

In [31]:
#import statements
import numpy as np
import pandas as pd
#these two imports will always be the same:
from avg_epv import avg_epv as a_epv
from r_conv_pct import conv_pct as r_conv

In [32]:
r_conv = np.matrix(r_conv)

In [33]:
#fn for ceiling division
def ceildiv(a, b):
    return -(-a // b)

#fn for grouping rows into bins of size binsize and taking the average of each bin
def binaverage(matrix, binsize):
    lst = np.array(matrix)
    output = np.zeros((len(lst), (ceildiv(len(lst[0]),binsize))))
    for i in range(0, len(lst)):
        temp = []
        for j in range(0, len(lst[0]), binsize):
            temp2 = lst[i][j:j + binsize]
            temp2 = [x for x in temp2 if x is not None]
            temp.append(sum(temp2) / len(temp2))
        output[i] = np.array(temp)
    return output

In [34]:
#output the average conversion percentage independent of yardline100 bin
print('conversionbydist =', binaverage(r_conv, 10))
#not necessary but cool to look at if we want to use this for something

conversionbydist = [[ 0.724]
 [ 0.614]
 [ 0.532]
 [ 0.473]
 [ 0.432]
 [ 0.405]
 [ 0.386]
 [ 0.371]
 [ 0.355]]


In [35]:
#place EPVs into 10 yard bins to match conversion percentages
a_epv10 = binaverage(a_epv, 10)
m_epv10 = binaverage(m_epv, 10)

In [36]:
#print 10 yard bin averages
np.set_printoptions(precision=3, suppress = True)
print(a_epv10, '\n')
print(m_epv10)

[[ 3.059  2.534  1.869  1.322  0.775  0.228 -0.32  -0.867 -1.414 -1.715]
 [ 2.178  2.081  1.469  0.922  0.375 -0.172 -0.72  -1.267 -1.814 -2.142]
 [ 1.903  1.733  1.201  0.654  0.107 -0.441 -0.988 -1.535 -2.082 -2.438]
 [ 1.679  1.55   1.055  0.508 -0.039 -0.586 -1.134 -1.681 -2.228 -2.611]
 [ 1.492  1.381  0.921  0.373 -0.174 -0.721 -1.268 -1.815 -2.362 -2.773]
 [ 1.385  1.23   0.788  0.241 -0.306 -0.853 -1.401 -1.948 -2.495 -2.933]
 [ 1.338  1.105  0.681  0.134 -0.414 -0.961 -1.508 -2.055 -2.602 -3.067]
 [ 1.259  1.04   0.598  0.05  -0.497 -1.044 -1.591 -2.138 -2.685 -3.178]
 [ 1.252  0.982  0.515 -0.033 -0.58  -1.127 -1.674 -2.221 -2.768 -3.288]] 

[[ 3.705  2.557  1.63   0.711 -0.209 -1.128 -2.047 -2.966 -3.885 -4.391]
 [ 3.783  2.7    1.785  0.866 -0.054 -0.973 -1.892 -2.811 -3.73  -4.282]
 [ 3.812  2.794  1.868  0.949  0.029 -0.89  -1.809 -2.728 -3.647 -4.245]
 [ 3.825  2.863  1.933  1.014  0.095 -0.825 -1.744 -2.663 -3.582 -4.226]
 [ 3.831  2.918  1.989  1.07   0.151 -0.768 -1.6

In [37]:
#create matrix compare fn with multiple use cases
def matrixcompare(m1, m2, operation):
    assert np.matrix(m1).shape == np.matrix(m2).shape, 'matrix size mismatch, cannot compare matrices of different sizes'
    lst1 = np.array(m1)
    lst2 = np.array(m2)
    output = np.zeros((len(lst1), len(lst1[0])))
    for i in range(0, len(lst1)):
        for j in range(0, len(lst1[0])):
            if operation == 'normeddiff':
                #calculate the difference between the mod and the average (how much bigger is the mod epv compared to average?)
                output[i][j] = lst2[i][j] - lst1[i][j]
            elif operation == 'combine':
                output[i][j] = (lst1[i][j] * lst2[i][j]) + lst2[i][j]
            else:
                break
        if operation == 'normeddiff':
            #normalize differences by row (normalized to be a percentage between - modweight and modweight)
            romi = -14 #these two values can be adjusted, but essentially they represent the max differences possible in 2 epvs. 
            romx = 14 #the most 2 epvs can be different is 14 or negative 14, which are the floor and ceiling of m_epv-a_epv
            output[i] = ((2 * (1 + modweight))*((np.array(output[i]) - romi) / (romx - romi)) - (1 + modweight))      
    return output

In [38]:
#compare mod epv with avg epv to create a percent of average epv that represents how much different the modded epv is from the norm
epv_diff = matrixcompare(a_epv10, m_epv10, 'normeddiff') #average EPV *FIRST*. 

#multiply this with our average conversion percentages to modify conversion percentage
m_conv = matrixcompare(epv_diff, r_conv, 'combine')

In [39]:
r_conv = np.array(r_conv)
r_conv100 = pd.Series(np.repeat([j for i in r_conv for j in i ], 10))
r_conv = np.array(m_conv)
m_conv100 = pd.Series(np.repeat([j for i in m_conv for j in i ], 10))

In [40]:
#insert output statement here
np.savetxt("avg_conv_pct.csv", r_conv100, fmt = '%10.5f')
np.savetxt("mod_conv_pct.csv", m_conv100, fmt = '%10.5f')

In [41]:
#print values for testing purposes:

print('\n raw:\n')
print(r_conv)
print('\n adjusted:\n')
print(m_conv)


 raw:

[[ 0.749  0.698  0.694  0.67   0.654  0.631  0.612  0.58   0.566  0.542]
 [ 0.603  0.646  0.623  0.609  0.592  0.596  0.577  0.55   0.519  0.461]
 [ 0.494  0.594  0.566  0.552  0.537  0.554  0.531  0.508  0.471  0.385]
 [ 0.448  0.542  0.525  0.503  0.492  0.512  0.485  0.464  0.429  0.324]
 [ 0.444  0.502  0.501  0.468  0.462  0.477  0.446  0.427  0.397  0.287]
 [ 0.457  0.471  0.487  0.444  0.441  0.449  0.418  0.399  0.373  0.272]
 [ 0.464  0.447  0.474  0.428  0.424  0.425  0.4    0.382  0.351  0.278]
 [ 0.442  0.429  0.453  0.415  0.404  0.404  0.393  0.376  0.327  0.304]
 [ 0.36   0.416  0.417  0.403  0.376  0.386  0.4    0.384  0.297  0.349]]

 adjusted:

[[ 0.749  0.698  0.694  0.67   0.654  0.631  0.612  0.58   0.566  0.542]
 [ 0.603  0.646  0.623  0.609  0.592  0.596  0.577  0.55   0.519  0.461]
 [ 0.494  0.594  0.566  0.552  0.537  0.554  0.531  0.508  0.471  0.385]
 [ 0.448  0.542  0.525  0.503  0.492  0.512  0.485  0.464  0.429  0.324]
 [ 0.444  0.502  0.501  0.468