In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 
%matplotlib inline
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import pymc3 as pm
from scipy import stats
import matplotlib.pyplot as plt
pd.plotting.register_matplotlib_converters()
plt.style.use('seaborn-darkgrid')

import seaborn as sns

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

#Data
#    GameId - a unique game identifier
#    PlayId - a unique play identifier
#    Team - home or away
#    X - player position along the long axis of the field. See figure below.
#    Y - player position along the short axis of the field. See figure below.
#    S - speed in yards/second
#    A - acceleration in yards/second^2
#    Dis - distance traveled from prior time point, in yards
#    Orientation - orientation of player (deg)
#    Dir - angle of player motion (deg)
#    NflId - a unique identifier of the player
#    DisplayName - player's name
#    JerseyNumber - jersey number
#    Season - year of the season
#    YardLine - the yard line of the line of scrimmage
#    Quarter - game quarter (1-5, 5 == overtime)
#    GameClock - time on the game clock
#    PossessionTeam - team with possession
#    Down - the down (1-4)
#    Distance - yards needed for a first down
#    FieldPosition - which side of the field the play is happening on
#    HomeScoreBeforePlay - home team score before play started
#    VisitorScoreBeforePlay - visitor team score before play started
#    NflIdRusher - the NflId of the rushing player
#    OffenseFormation - offense formation
#    OffensePersonnel - offensive team positional grouping
#    DefendersInTheBox - number of defenders lined up near the line of scrimmage, spanning the width of the offensive line
#    DefensePersonnel - defensive team positional grouping
#    PlayDirection - direction the play is headed
#    TimeHandoff - UTC time of the handoff
#    TimeSnap - UTC time of the snap
#    Yards - the yardage gained on the play (you are predicting this)
#    PlayerHeight - player height (ft-in)
#    PlayerWeight - player weight (lbs)
#    PlayerBirthDate - birth date (mm/dd/yyyy)
#    PlayerCollegeName - where the player attended college
#    HomeTeamAbbr - home team abbreviation
#    VisitorTeamAbbr - visitor team abbreviation
#    Week - week into the season
#    Stadium - stadium where the game is being played
#    Location - city where the game is being player
#    StadiumType - description of the stadium environment
#    Turf - description of the field surface
#    GameWeather - description of the game weather
#    Temperature - temperature (deg F)
#    Humidity - humidity
#    WindSpeed - wind speed in miles/hour
#    WindDirection - wind direction

# Training data is in the competition dataset as usual
train_df = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2020/train.csv', low_memory=False)
#Play + Rusher 
play_df=train_df[train_df.NflId==train_df.NflIdRusher]
#Game
game_df=play_df.drop_duplicates(subset='GameId')

bins=[-10,-3,-2,-1,0,1, 2, 3, 4,5,6,7,8,9,10,11,12,13,14,15,100]

In [None]:
#env.write_submission_file()#Yards Distribution
f, ax = plt.subplots(figsize=(15, 10))
plt.subplot(221)

#Dist of Yards : A Normal Inverse Gaussian continuous random variable.
sns.distplot(play_df['Yards'], hist=True, bins=bins,  kde=True, fit=stats.norminvgauss)
# Get the fitted parameters for RB
(a, b, loc ,shape) = stats.norminvgauss.fit(play_df[train_df.NflId==train_df.NflIdRusher]['Yards'])
#print ("Param: a={0}, b={1}, loc={2}, shape={3}".format(a, b, loc ,shape))


In [None]:
def train_my_model(train_df):
    (a, b, loc ,shape) = stats.norminvgauss.fit(play_df[train_df.NflId==train_df.NflIdRusher]['Yards'])
    return (a, b, loc ,shape)

#def make_my_predictions(test_df, sample_prediction_df,param):
#    CDF=stats.norminvgauss.cdf(np.linspace(-99,99,199),param[0],param[1],param[2],param[3])
#    return pd.DataFrame(data=[CDF], columns= sample_prediction_df.columns)

In [None]:
# You can only iterate through a result from `env.iter_test()` once
# so be careful not to lose it once you start iterating.
#from kaggle.competitions import nflrush
#env = nflrush.make_env()
#iter_test = env.iter_test()
#(test_df, sample_prediction_df) = next(iter_test)
#pred=pd.DataFrame(data=[CDF], columns= sample_prediction_df.columns)
#env.predict(pred)


In [None]:
from kaggle.competitions import nflrush
env = nflrush.make_env()

# Training data is in the competition dataset as usual
param=train_my_model(train_df)
CDF=stats.norminvgauss.cdf(np.linspace(-99,99,199),param[0],param[1],param[2],param[3])

for (test_df, sample_prediction_df) in env.iter_test():
    predictions_df = pd.DataFrame(data=[CDF], columns= sample_prediction_df.columns)   
    env.predict(predictions_df)

env.write_submission_file()
# We've got a submission file!
import os
print([filename for filename in os.listdir('/kaggle/working') if '.csv' in filename])