![NFL](https://static.clubs.nfl.com/image/private/t_editorial_landscape_12_desktop/steelers/xk5boplpxmrobtfa0u0r)

In [None]:
import pandas as pd
from datetime import datetime, date

import plotly.express as px
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import plotly.offline as offline
import plotly.graph_objs as go
offline.init_notebook_mode(connected = True)

import warnings
warnings.filterwarnings("ignore")

## Dataset Description 

### Games Data
> The games file ```games.csv``` includes information about the <b>764 teams</b> playing in each games. 

üìù The key variable is gameId.


| Name                       | Description                                                                                                                                                                                                                                                                                                                    |
|----------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| <b>gameId</b>                    | Consits of unique Id (numeric)                                                                                                                                                                                                                                                                                           |
| <b>gameDate</b>                        | Consits of Game Date                                                                                                                                                                                                                                                                                               |
| <b>gameTimeEastern</b>               | Starting time of the game                                                                                                                                                                                                                                                                                                  |
| <b>homeTeamAbbr</b>      | Home Team                                                                                                                                                                                                                                                                                                   |
| <b>visitorTeamAbbr</b>                  | Visitors Team                                                                                                                                                                                                                                                                                  |
| <b>week</b> | Week when the games was played |
| <b>season</b> | Consits the season when the games were played
| <b>month</b> | Month when the games was played |

                                                                                         

> The idea of the table was taken [from](https://www.kaggle.com/ruchi798/covid-19-impact-on-digital-learning-eda-w-b) 


In [None]:
# game data
df_game = pd.read_csv('../input/nfl-big-data-bowl-2022/games.csv')
df_game['gameDate'] = pd.to_datetime(df_game['gameDate'])
df_game['month'] = df_game['gameDate'].dt.month
df_game[:2]

### Players Data
> The players file ```players.csv``` includes information about the <b>2731</b> players playing in games. 


| Name                       | Description                                                |
|----------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| <b>nflld</b>                      | Players identification number; having unique number                                                                                                                                                                                                                                                                                           |
| <b>height</b>                        | Players height                                                                                                                                                                                                                                                                                               |
| <b>weight</b>               | Players weight                                                                                                                                                                                                                                                                                                   |
| <b>birthDate</b>      | Players birthdate                                                                                                                                                                                                                                                                                                   |
| <b>collegeName</b>                  | Players college Name                                                                                                                                                                                                                                                                                  |
| <b>Position</b> | Players poisition in the game |
| <b>displayName</b> | Players name |
| <b>age</b> | Players age |
| <b>agegroup</b> | Players agegroup |
|                            |                                                                                                                                                                                

In [None]:
# player's data
# adding 2 features age and agegroup

# data
df_player = pd.read_csv('../input/nfl-big-data-bowl-2022/players.csv')
df_player['birthDate'] = pd.to_datetime(df_player['birthDate'])

# Adding age feature in players data
today = datetime.today()
df_player['age'] = df_player['birthDate'].apply(
               lambda x: today.year - x.year - 
               ((today.month, today.day) < (x.month, x.day)) 
               )

# Adding agegroup feature in players data
bins= [21.0,26.0,31.0,36.0,41.0,46.0,51.0]
labels = ['21-25','26-30','31-35','36-40','41-45','46-50']
df_player['agegroup'] = pd.cut(df_player['age'], bins=bins, labels=labels, right=False)
df_player[:2]

In [None]:
# plays data
df_plays = pd.read_csv('../input/nfl-big-data-bowl-2022/plays.csv')
df_plays[:2]

In [None]:
# tracking data - 2018
df_trac2018 = pd.read_csv('../input/nfl-big-data-bowl-2022/tracking2018.csv')
df_trac2018[:2]

# Visualization

In [None]:
# data
df_sea = pd.DataFrame(df_game['season'].value_counts()).reset_index().rename(columns={'index':'season','season':'count'})

df_gd2018 = pd.DataFrame(df_game[df_game['season']==2018]['month'].value_counts()).reset_index().rename(columns={'index':'month','month':'count'}).sort_values(by='month')
df_gd2019 = pd.DataFrame(df_game[df_game['season']==2019]['month'].value_counts()).reset_index().rename(columns={'index':'month','month':'count'}).sort_values(by='month')
df_gd2020 = pd.DataFrame(df_game[df_game['season']==2020]['month'].value_counts()).reset_index().rename(columns={'index':'month','month':'count'}).sort_values(by='month')
df_gd2018['month'] =['9M','10M','11M','12M']
df_gd2019['month'] =['9M','10M','11M','12M']
df_gd2020['month'] =['1M','9M','10M','11M','12M']

# chart color
df_gd2018['color'] = '#c6ccd8'
df_gd2018['color'][3:] = '#496595'
df_gd2019['color'] = '#a8b1c3'
df_gd2019['color'][3:] = '#496595'
df_gd2020['color'] = '#99a3b9'
df_gd2020['color'][4:] = '#496595'

# chart
fig = make_subplots(rows=1, cols=4, column_widths=[0.26,0.26,0.26,0.2],
                    specs=[[{"type": "bar"},{"type": "bar"},{"type": "bar"},{"type": "pie"}]],
                    subplot_titles=("2018 Season", "2019 Season", "2020 Season"))
fig.add_trace(go.Bar(x=df_gd2018['count'], y=df_gd2018['month'], name='2018', 
                     marker=dict(color= df_gd2018['color']), orientation='h'),
                     row=1, col=1)
fig.add_trace(go.Bar(x=df_gd2019['count'], y=df_gd2019['month'], name='2019',
                     marker=dict(color= df_gd2019['color']), orientation='h'),
              row=1, col=2)
fig.add_trace(go.Bar(x=df_gd2020['count'], y=df_gd2020['month'], name='2020', 
                     marker=dict(color= df_gd2020['color']), orientation='h'),
              row=1, col=3)
fig.add_trace(go.Pie(values=df_sea['count'], labels=df_sea['season'], name='Season',
                    marker=dict(colors=['#99a3b9','#a8b1c3','#c6ccd8']), hole=0.7,
                    rotation=90, hoverinfo='label+percent', textinfo='label'),
              row=1, col=4)
fig.update_yaxes(showgrid=False, ticksuffix=' ')
fig.update_xaxes(visible=False)
fig.update_layout(height=550, bargap=0.2,
                  margin=dict(b=0,r=0),
                  hovermode="y unified",
                  title_text="Most played Games: Season wiseMonth",
                  plot_bgcolor='#fff', paper_bgcolor='#fff',
                  title_font=dict(size=25, color='#8a8d93', family="Lato, sans-serif"),
                  font=dict(color='#8a8d93'),
                  hoverlabel=dict(bgcolor="#f2f2f2", font_size=13, font_family="Lato, sans-serif"),
                  showlegend=False
                 )
fig.show()

In [None]:
fig = px.histogram(df_game, x='week', color_discrete_sequence=['#85a1c1'],
                   title='Distribution of Week', height=350, 
                   template='plotly_white'
                  )
fig.update_yaxes(visible=False)
fig.update_traces(hovertemplate=None, marker=dict(line=dict(width=0)))
fig.update_layout(margin=dict(b=0, l=20, r=30),
                  hovermode="x unified", bargap=0.2, xaxis_title=" ",
                  title_font=dict(size=25, color='#8a8d93', family="Lato, sans-serif"),
                  font=dict(color='#8a8d93'),
                  hoverlabel=dict(bgcolor="#f2f2f2", font_size=13, font_family="Lato, sans-serif"),
                  xaxis = dict(tickmode = 'array',
                               tickvals = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
                               ticktext = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17])
                 )
fig.show()

### Interpret
___
Most of the games were played in 2020 in November month.<br>
Generally games are played in the month September to January
___

In [None]:
df_s_vta = pd.crosstab(df_game['season'], df_game['visitorTeamAbbr']).T.reset_index()
df_s_hta = pd.crosstab(df_game['season'], df_game['homeTeamAbbr']).T.reset_index()
df_s_vta.columns=['visitorTeamAbbr','2018V','2019V','2020V']
df_s_hta.columns=['homeTeamAbbr','2018H','2019H','2020H']


fig = make_subplots(rows=1, cols=3, shared_yaxes = True, horizontal_spacing = 0,
                    subplot_titles=("2018 Season", "2019 Season", "2020 Season"))
fig.add_trace(go.Bar(y=df_s_hta['homeTeamAbbr'], x=df_s_hta['2018H'], marker=dict(color='#c6ccd8'),
                     name='Home', orientation='h'),
              row=1, col=1)
fig.add_trace(go.Bar(y=df_s_vta['visitorTeamAbbr'], x=df_s_vta['2018V'], marker=dict(color='#496595'),
                     name='Visitor', orientation='h'),
              row=1, col=1)
fig.add_trace(go.Bar(y=df_s_hta['homeTeamAbbr'], x=df_s_hta['2019H'], marker=dict(color='#c6ccd8'),
                     name='Home', orientation='h'),
              row=1, col=2)
fig.add_trace(go.Bar(y=df_s_vta['visitorTeamAbbr'], x=df_s_vta['2019V'], marker=dict(color='#496595'),
                     name='Visitor', orientation='h'),
              row=1, col=2)
fig.add_trace(go.Bar(y=df_s_hta['homeTeamAbbr'], x=df_s_hta['2020H'], marker=dict(color='#c6ccd8'),
                     name='Home', orientation='h'),
              row=1, col=3)
fig.add_trace(go.Bar(y=df_s_vta['visitorTeamAbbr'], x=df_s_vta['2020V'], marker=dict(color='#496595'),
                     name='Visitor', orientation='h'),
              row=1, col=3)


fig.update_xaxes(visible=False)
fig.update_yaxes(ticksuffix=' ')
title = "No of Games for every Season: <span style='color:#c6ccd8'>Home <span style='color:#8a8d93'>& <span style='color:#496595'>Visitors"
fig.update_layout(title=title,
                  height=800, bargap=0.3, 
                  hovermode="y unified",
                  plot_bgcolor='#fff', paper_bgcolor='#fff',
                  title_font=dict(size=25, color='#8a8d93', family="Lato, sans-serif"),
                  font=dict(color='#8a8d93'),
                  hoverlabel=dict(bgcolor="#f2f2f2", font_size=13, font_family="Lato, sans-serif"),
                  margin=dict(t=100, b=0, l=80, r=40),
                  showlegend=False
                 )
fig.show()

In [None]:
df_gt = df_game.sort_values(by='gameTimeEastern')
df_gt['hour'] = df_gt['gameTimeEastern'].apply(lambda x: int(x.split(':')[0]))
fig = px.histogram(df_gt, x='hour', height = 350, color_discrete_sequence=['#85a1c1'],
                   title='Distribution of Game Time(Hour)',
                   template='plotly_white'
                  )
fig.update_yaxes(visible=False)
fig.update_traces(hovertemplate=None, marker=dict(line=dict(width=0)))
fig.update_layout(hovermode="x unified", xaxis_title=" ", bargap=0.2,
                  title_font=dict(size=25, color='#8a8d93', family="Lato, sans-serif"),
                  font=dict(color='#8a8d93'),
                  hoverlabel=dict(bgcolor="#f2f2f2", font_size=13, font_family="Lato, sans-serif"),
                  margin=dict(b=0),
                  xaxis = dict(tickmode = 'array',
                               tickvals = [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22],
                               ticktext = [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22])
                 )
fig.add_vrect(x0=9.7, x1=11.4,
              xref = "paper",
              yref = "y2",
              fillcolor= "#e9cf87",
              opacity=0.25, 
              line_width=0
)
fig.add_annotation(x=10.4, y=100,
            text="<b>0</b> Hour",
            showarrow=False)
fig.add_vrect(x0=13.6, x1=14.4,
              xref = "paper",
              yref = "y2",
              fillcolor= "#e9cf87",
              opacity=0.25, 
              line_width=0
)
fig.add_annotation(x=14, y=100,
            text="<b>0</b><br>Hour",
            showarrow=False)
fig.add_vrect(x0=17.6, x1=18.4,
              xref = "paper",
              yref = "y2",
              fillcolor= "#e9cf87",
              opacity=0.25, 
              line_width=0
)
fig.add_annotation(x=18, y=100,
            text="<b>0</b><br>Hour",
            showarrow=False)
fig.show()

In [None]:
fig = px.histogram(df_player, y='Position', height=600, template='plotly_white',
                   color_discrete_sequence=['#85a1c1'], title='Players count by position')
fig.update_xaxes(visible=False)
fig.update_yaxes(showgrid=False, categoryorder='total ascending', ticksuffix=' ', showline=False)
fig.update_traces(hovertemplate=None, marker=dict(line=dict(width=0)))
fig.update_layout(hovermode="y unified", yaxis_title=" ",
                  title_font=dict(size=25, color='#8a8d93', family="Lato, sans-serif"),
                  font=dict(color='#8a8d93'),
                  hoverlabel=dict(bgcolor="#f2f2f2", font_size=13, font_family="Lato, sans-serif"),
                  showlegend=False,
                  margin=dict(b=10)
                 )
fig.add_hrect(y0=19.6, y1=25.4,
              xref = "paper",
              yref = "y2",
              fillcolor= "#e9cf87",
              opacity=0.25, 
              line_width=0
)
fig.add_annotation(x=300, y=21.5,
            text="<b>50%</b> of Players<br>lies here",
            showarrow=False)
fig.show()

In [None]:
df_cn = pd.DataFrame(df_player['collegeName'].value_counts()[:20]).reset_index().rename(columns={'index':'collegeName','collegeName':'count'})
fig = px.histogram(df_cn, x='count', y='collegeName', color_discrete_sequence=['#85a1c1'],
                   title='Top 10 Colleges having highest players')
fig.update_xaxes(visible=False)
fig.update_yaxes(showgrid=False, categoryorder='total ascending', ticksuffix=' ', showline=False)
fig.update_traces(hovertemplate=None, marker=dict(line=dict(width=0)))
fig.update_layout(hovermode="y unified", yaxis_title=" ",
                  plot_bgcolor='#fff', paper_bgcolor='#fff',
                  title_font=dict(size=25, color='#8a8d93', family="Lato, sans-serif"),
                  font=dict(color='#8a8d93'),
                  hoverlabel=dict(bgcolor="#f2f2f2", font_size=13, font_family="Lato, sans-serif"),
                  showlegend=False,
                  margin=dict(b=10)
                 )
fig.show()

In [None]:
df_pa = df_player[['Position','agegroup']].dropna()
fig = px.histogram(df_pa, y='Position', color='agegroup', height=550,
                   color_discrete_sequence=['#c6ccd8', '#a8b1c3', '#99a3b9', '#85a1c1', '#222'],
                   title='Distribution of Players Age vs Position')
fig.update_xaxes(visible=False)
fig.update_yaxes(showgrid=False, categoryorder='total ascending', ticksuffix=' ', showline=False)
fig.update_traces(hovertemplate=None, marker=dict(line=dict(width=0)))
fig.update_layout(hovermode="y unified", yaxis_title=" ",
                  plot_bgcolor='#fff', paper_bgcolor='#fff',
                  title_font=dict(size=25, color='#8a8d93', family="Lato, sans-serif"),
                  font=dict(color='#8a8d93'), bargap=0.1,
                  hoverlabel=dict(bgcolor="#f2f2f2", font_size=13, font_family="Lato, sans-serif"),
                #  showlegend=False,
                  margin=dict(b=0)
                 )
fig.show()

## If you like the plotly charts do upvote
I have lot of things to analyze I will be keep updating. 