---
# <U>***Importing librabries***

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd

---
* ***Creating SanFran data frame for the past 3 years to current.***

In [3]:
df = pd.read_excel("nfl.xlsx")
df.set_index("Date", inplace=True)
df_years = df.loc["2017":"2020"]
df_san_fran = df_years.loc[(df_years["Home Team"] == "df_San Francisco 49ers") | (df_years["Away Team"] == "San Francisco 49ers")]
df_san_fran = df_san_fran[["Home Team", "Away Team", "Home Score", "Away Score", "Total Score Open"]]
df_san_fran["Home Team"].loc["2019-10-20"] = "Washington Football Team"
df_san_fran["Home Team"].loc["2017-10-15"] = "Washington Football Team"
df_san_fran.tail()

Unnamed: 0_level_0,Home Team,Away Team,Home Score,Away Score,Total Score Open
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-10-29,Philadelphia Eagles,San Francisco 49ers,33,10,47.5
2017-10-15,Washington Football Team,San Francisco 49ers,26,24,46.5
2017-10-08,Indianapolis Colts,San Francisco 49ers,26,23,43.0
2017-10-01,Arizona Cardinals,San Francisco 49ers,18,15,44.5
2017-09-17,Seattle Seahawks,San Francisco 49ers,12,9,43.0


---
* ***Creating data frame with the days, and time of day that they played.***

In [4]:
df_time_day = pd.read_html("https://www.sbnation.com/2017/4/20/15376922/2017-nfl-schedule-released-dates-times-highlights")
df_time_day = df_time_day[0]
df_time_day = df_time_day.drop(columns=(["Date", "Away Team", "Week", "TV"]))
df_time_day.tail()

Unnamed: 0,Home Team,Time (ET)
253,Los Angeles Chargers,4:25 p.m.
254,Los Angeles Rams,4:25 p.m.
255,Seattle Seahawks,4:25 p.m.
256,TBD,8:30 p.m.
257,Home Team,Time (ET)


---
* ***Combing, and cleaning, data frames***

In [5]:
df_stadium = pd.read_html("https://en.wikipedia.org/wiki/List_of_current_National_Football_League_stadiums")
df_stadium = df_stadium[1]
df_stadium = df_stadium.drop(columns=(["Image", "Capacity", "Opened", "Ref(s)"]))
df_stadium = df_stadium.append(df_stadium.iloc[[20,25]])
df_stadium.reset_index(inplace=True,drop=True)
# Chaning the names of a few rows
df_stadium["Team(s)"].loc[20] = "New York Giants"
df_stadium["Team(s)"].loc[30] = "New York Jets"
df_stadium["Team(s)"].loc[25] = "Los Angeles Rams"
df_stadium["Team(s)"].loc[31] = "Los Angeles Chargers"

In [6]:
df_san_fran = df_san_fran.join(df_stadium.set_index("Team(s)"), on = "Home Team")

In [7]:
df_san_fran = df_san_fran.join(df_time_day.set_index("Home Team"), on="Home Team")

In [8]:
df_san_fran["Time (ET)"] = df_san_fran["Time (ET)"].str.replace("p.m.", " ")
df_san_fran["Time (ET)"] = df_san_fran["Time (ET)"].str.replace("a.m.", " ")

---
EXTRA??

In [9]:
years = ['2017', '2018', '2019','2020']
list_of_dfs = []
for i in years:
    list_of_dfs.append(pd.read_html(f'https://www.pro-football-reference.com/teams/sfo/{i}.htm'))

In [10]:
yr_2017 = list_of_dfs[0][1]
yr_2018 = list_of_dfs[1][1]
yr_2019 = list_of_dfs[2][1]
yr_2020 = list_of_dfs[3][1]
#yr_2017.set_index("Unnamed: 2_level_0", inplace=True)
#yr_2018.set_index("Unnamed: 2_level_0", inplace=True)
#yr_2019.set_index("Unnamed: 2_level_0", inplace=True)
#yr_2020.set_index("Unnamed: 2_level_0", inplace=True)

In [11]:
from dateparser import parse

In [12]:
parse("September 10, 2017")

datetime.datetime(2017, 9, 10, 0, 0)

In [13]:
parse("September 10, 2017").strftime('%Y-%m-%d')

'2017-09-10'

In [14]:
yr_2017["Unnamed: 2_level_0"]

Unnamed: 0,Date
0,September 10
1,September 17
2,September 21
3,October 1
4,October 8
5,October 15
6,October 22
7,October 29
8,November 5
9,November 12


In [33]:
def custom_date_parser(row, year):
    try:
        x = parse(row + f", {year}")
        print(x)
        dt = parse(x).strftime('%Y-%m-%d')
        print(dt)
    except:
        return None


In [34]:
yr_2017['dt'] = yr_2017["Unnamed: 2_level_0"].apply(lambda row: custom_date_parser(row, '2017'))

In [25]:
yr_2017['dt']

0     NaN
1     NaN
2     NaN
3     NaN
4     NaN
5     NaN
6     NaN
7     NaN
8     NaN
9     NaN
10    NaN
11    NaN
12    NaN
13    NaN
14    NaN
15    NaN
16    NaN
Name: dt, dtype: object

In [15]:
yr_2017['dt'] = yr_2017["Unnamed: 2_level_0"].apply(lambda row: parse(str(row) + " , 2017").strftime('%Y-%m-%d'))

AttributeError: 'NoneType' object has no attribute 'strftime'

In [None]:
yr_2017["dt"]

In [None]:
yr_2017.head(1)