In [None]:
def get_wins(team_name, games=None, start=None, end=None):
    """
    Returns a dataframe of victories of a given team
    Data from boydsworld.com

    Parameter team_name: team to return victories of 
    Preconditions: team_name is a string format ex. "Cornell," "Colgate"
    Parameter start: the start year of games. To select only games from one year, leave  
    Precondition: start is an int
    Parameter end: the end year of games
    Precondition: end is an int
    """        
    if games is None: 
        games = get_games(team_name, start, end)
    wins = games[(games["team_1"] == team_name) & (games["team_1_score"] > games["team_2_score"])]
    return wins

In [None]:
def get_losses(team_name, games=None, start=None, end=None):
    """
    Returns a dataframe of losses of a given dataframe 
    Data from boydsworld.com

    Parameter team_name: team to return losses of 
    Preconditions: team_name is a string format ex. "Cornell," "Colgate"
    Parameter start: the start year of games. To select only games from one year, leave  
    Precondition: start is an int
    Parameter end: the end year of games
    Precondition: end is an int
    """  
    if games is None: 
        games = get_games(team_name, start, end)
    losses = games[(games["team_2"] == team_name) & (games["team_1_score"] > games["team_2_score"])]
    return losses

In [None]:
def calculate_runs_scored(team_name, games=None, start=None, end=None):
    """
    Returns: int equal to total number of runs scored by team_name over games
    Data from boydsworld.com

    Parameter team_name: team to calculate runs scored for 
    Preconditions: team_name is a string format ex. "Cornell," "Colgate"
    Parameter start: the start year of games.
    Precondition: start is an int
    Parameter end: the end year of games
    Precondition: end is an int
    """
    if games is None: 
        games = get_games(team_name, start, end)
    wins = get_wins(team_name, games)
    losses = get_losses(team_name, games)
    wins["winner_runs_scored"] = wins["team_1_score"]
    losses["loser_runs_scored"] = losses["team_2_score"] 
    sum_runs_scored_in_wins = wins["winner_runs_scored"].sum()
    sum_runs_scored_in_losses = losses["loser_runs_scored"].sum()
    runs_scored = sum_runs_scored_in_wins+sum_runs_scored_in_losses
    return runs_scored

In [None]:
def calculate_runs_allowed(team_name, games=None, start=None, end=None):
    """
    Returns: int equal to total number of runs allowed by team_name over games
    Data from boydsworld.com

    Parameter team_name: team to calculate runs allowed for 
    Preconditions: team_name is a string format ex. "Cornell," "Colgate"
    Parameter start: the start year of games
    Precondition: start is an int
    Parameter end: the end year of games
    Precondition: end is an int
    """
    if games is None: 
        games = get_games(team_name,start,end)
    wins = get_wins(team_name, games)
    losses = get_losses(team_name, games)
    wins["winner_runs_allowed"] = wins["team_2_score"]
    losses["loser_runs_allowed"] = losses["team_1_score"] 
    sum_runs_allowed_in_wins = wins["winner_runs_allowed"].sum()
    sum_runs_allowed_in_losses = losses["loser_runs_allowed"].sum()
    runs_allowed = sum_runs_allowed_in_wins + sum_runs_allowed_in_losses
    return runs_allowed

In [None]:
def calculate_run_difference(team_name, games=None, start=None, end=None):
    """
    Returns: The total run difference across a set of games 
    Data from boydsworld.com

    Parameter team_name: team to return run difference of
    Preconditions: team_name is a string format ex. "Cornell," "Colgate"
    Parameter start: the start year of games
    Precondition: start is an int
    Parameter end: the end year of games
    Precondition: end is an int
    """
    if games is None: 
        games = get_games(team_name, start, end)
    runs_scored = calculate_runs_scored(team_name, games)
    runs_allowed = calculate_runs_allowed(team_name, games)
    run_difference = runs_scored - runs_allowed
    return run_difference, runs_scored, runs_allowed

In [None]:
def calculate_cumulative_run_difference(team_name, games=None, start=None, end=None):
    """
    Returns copy with a given dataframe with new column containing the
    run difference for each game for a given team.
    Data from boydsworld.com

    Parameter team_name: team to return run difference of
    Precondition: team_name is a string format ex. "Cornell," "Colgate"
    Parameter games: Games to filter 
    Precondition: games is a DataFrame returned by get_games() function
    Parameter start: the start year of games
    Precondition: start is an int
    Parameter end: the end year of games
    Precondition: end is an int
    """
    if games is None:
        games = get_games(team_name, start, end)
    df_copy = games.copy()
    dif_in_wins = df_copy["team_1_score"] - df_copy["team_2_score"]
    dif_in_losses = df_copy["team_2_score"] - df_copy["team_1_score" ]
    df_copy["run_difference"] = np.where(((df_copy["team_1"] == team_name) & (df_copy["team_1_score"] > df_copy["team_2_score"])), dif_in_wins, dif_in_losses)
    df_copy["opponent"] = np.where(((df_copy["team_1"] == team_name) & (df_copy["team_1_score"] > df_copy["team_2_score"])), df_copy["team_2"], df_copy["team_1"])
    df_copy["cumulative_rd"] = df_copy["run_difference"].cumsum()
    return df_copy

In [None]:
def get_rd_data(team_name, games=None, start=None, end=None):
    """
    Returns: Dataframe with run_difference, opponent, and cumulative_rd for every game of given team
    in given seasons. 

    Parameter team_name: team to return games for
    Precondtion: team_name is a string
    Parameter start: start year, inclusive
    Precondition: start is a int 
    Parameter end: end year, inclusive
    Preconditon: end is an int
    """
    if games is None: 
        games = get_games(team_name, start=start, end=end)
    games["date"] =  pd.to_datetime(games["date"])
    games = games.sort_values(by="date")
    games = games.reset_index(drop="True")

    games = add_run_difference_column(team_name, games)
    games = games.drop(columns=["team_1","team_1_score","team_2","team_2_score","field"])
    games["date"] = games["date"].dt.strftime('%Y-%m-%d')
    return games

In [206]:
def get_intra_ivy(team_name, games=None, start=None, end=None):
    """
    Returns a dataframe of in-conference Ivy League games for team_name
    Data from boydsworld.com

    Parameter team_name: team to return victories of 
    Preconditions: team_name is a string format ex. "Cornell," "Colgate"
    Parameter start: the start year of games. To select only games from one year, leave  
    Precondition: start is an int
    Parameter end: the end year of games
    Precondition: end is an int
    """
    if games is None: 
        return get_games(team_name, start, end, ivy_only=True)
    else: 
        teams = ["Brown",
                 "Cornell",
                 "Columbia",
                 "Dartmouth",
                 "Harvard",
                 "Pennsylvania",
                 "Princeton",
                 "Yale"]
        teams.remove(team_name)
        wins = get_wins(team_name, games)
        losses = get_losses(team_name, games)
        wins["intra_ivy"] = [1 if x in teams else 0 for x in wins["team_2"]]
        losses["intra_ivy"] = [1 if x in teams else 0 for x in losses["team_1"]]
        new_df = pd.concat([wins,losses])
        df = new_df[new_df["intra_ivy"] == 1]
        return df 

In [None]:
def calculate_pythagenpat_win_percentage(team_name, games=None, start=None, end=None):
    """
    Returns: The PythagenPat winning percentage expectation of a given team over given games. 

    W% = R^x/(R^x + RA^x)
    where x = (RPG)^.287
    Developed by David Smyth and Patriot
   
    Parameter team_name: team to return expected winning % for 
    Preconditions: team_name is a string format ex. "Cornell," "Colgate"
    Parameter games: games over which to calculate
    Precondition: games is a DataFrame returned by getGames() function
    Parameter start: the start year of games
    Precondition: start is an int
    Parameter end: the end year of games
    Precondition: end is an int
    """
    if games is None: 
        games = get_games(team_name, start, end)
    runs_scored = calculate_runs_scored(team_name, games)
    runs_allowed = calculate_runs_allowed(team_name, games)    
    num_games = len(games.index)
    if not num_games == 0:
        runs_per_game = runs_scored / num_games
    else:
        runs_per_game = 0
    x = runs_per_game ** 0.287
    res  = (runs_scored ** x) / ((runs_scored ** x) + (runs_allowed ** x))
    return res

In [None]:
def calculate_actual_win_percentage(team_name, games=None, start=None, end=None):
    """
    Returns: The actual (i.e. experimental) winning percentage of a given team over given games. 

    winning_percentage = games_won / games_plated
   
    Parameter team_name: team to return actual winning % for 
    Preconditions: team_name is a string format ex. "Cornell," "Colgate"
    Parameter games: games over which to calculate
    Precondition: games is a DataFrame returned by getGames() function
    Parameter start: the start year of games
    Precondition: start is an int
    Parameter end: the end year of games
    Precondition: end is an int
    """
    if games is None: 
        games = get_games(team_name, start, end)     
    wins = get_wins(team_name, games)
    losses = get_losses(team_name,games)
    num_wins = len(wins.index)
    num_losses = len(losses.index)
    actual_win_percentage = num_wins / (num_wins + num_losses)
    return res, num_wins, num_losses

In [None]:
# Pythagenpat intra-conference win %'s by team for the Ivy League
def generate_ivy_pythags(start, end):
    """
    Returns: Dataframe of actual and expected winning percentages of Ivy League for a given set of seasons. 
    Parameter start: start year, inclusive
    Precondition: start is an int YYYY
    Parameter end: end year, inclusive
    Precondtion: end is an int YYYY
    """
    ivy_pythagenpat = {"team_name":[], "pythagenpat_pct":[], "actual_pct":[], "deviation":[]};
    for i in ["Brown", "Columbia", "Cornell", "Dartmouth", "Harvard", "Pennsylvania", "Princeton", "Yale"]:
        games = get_games(i, start=start, end=end, ivy_only=False)
        pythagenpat_pct = calculate_pythag_win_percentage(i,games)  
        actual_pct = calculate_actual_win_percentage(i,games)  
        deviation = round(pythagenpat_pct - actual_pct,3)
        wins =
        ivy_pythagenpat["team_name"].append(i)
        ivy_pythagenpat["pythagenpat_pct"].append(round(pythagenpat_pct,3))
        ivy_pythagenpat["actual_pct"].append(round(actual_pct,3))
        ivy_pythagenpat["deviation"].append(deviation)
        res = pd.DataFrame(ivy_pythagenpat)
    return res

In [None]:
# load and cache game result data for Ivy League Teams 
for team_name in ["Brown", "Columbia", "Cornell", "Dartmouth", "Harvard", "Pennsylvania", "Princeton", "Yale"]:
    games = get_games(team_name, start=1992, end=2020)
    games.to_csv("data/boydsworld/"+team_name.lower()+"_game_results_1992_to_2020", index=False)

In [1]:
def get_games(team_1, start, end=None, team_2=None):
    """
    Returns: a dataframe of all games played for a given team inclusive of given start & end year
    Data from boydsworld.com
    
    Parameter team_name: team whose games to select 
    Precondition: team_name is a lowercase string
    Parameter start: the start year of games. To select only games from one year, leave  
    Precondition: start is an int >= 1992
    Parameter end: the end year of games
    Precondition: end is an int <= 2020
    """
    if end is None:
        stop = start+1
    else: 
        stop = end
    if team_2 is None: 
        team_2 = "all"
    payload = {"team1":team_1,"firstyear":str(start),"team2":team_2,"lastyear":str(stop),"format":"Text","submit":"Fetch"}
    url = "http://www.boydsworld.com/cgi/scores.pl?" 
    s = requests.Session()
    r = requests.get(url, params=payload)
    response = r.text 
    if (len(response) < 10):
        return pd.DataFrame
    else:
        df = pd.read_fwf(StringIO(response), encoding = "utf8", header=None)
        col_names = ["date", "team_1", "team_1_score", "team_2", "team_2_score", "field"]
        col_bools = df.isnull().any(axis=0)
        bad_cols = col_bools[col_bools == True].index.to_series()
        bad_col_indexes = []
        for index, value in bad_cols.iteritems():
            bad_col_indexes.append(index)
        bad_col_indexes.sort()
        for col_index in bad_col_indexes:
            print(str(col_index)+" is wrong")
            
        if len(df.columns) < len(col_names):
            if len(df.columns) == 4: 
                # Known occurances: Yale 1992
                # In this case, read_fwf seems to append team 2's score to column 3. We correct by creating a new column with correct value
                df.insert(4, 3, 0, allow_duplicates=True)
                df.columns = col_names
                df["team_2_score"] = df["team_2"].str[-2:].str.strip()
                df["team_2_score"] = df["team_2_score"].astype(int)
                df["team_2"] = df["team_2"].str[:-2].str.strip()
            elif len(df.columns) == 3: 
                print(df.head())
                # Known occurances: Harvard 1993
                # In this case, no games were found. 
                res = pd.DataFrame
                col_names = ["date", "team_1", "team_1_score", "team_2", "team_2_score", "field"]
                res.columns = col_names
                return res 
                
        elif len(df.columns > len(col_names)):
            df = df.drop(columns = bad_col_indexes)
#             if 8 in bad_col_indexes: 
#                 # There is an extra column added because of a three word name of a school
#                 # Known instances: Yale 1992?
#                 df = df.drop(8, axis=1)
#             if 7 in bad_col_indexes: 
#                   # case where read_fwf adds an additional column
#                 # seems to happen when team_1 or team_2 is a three word name
#                 # in this case, grab the third word from row without a NaN in added column and append to associated name
#                 # Known occurances: Cornell 2015, Brown 2015
#                 # There is an extra column added because of a three word name of a school
#                 # Known instances: Yale 1992?
#                 df = df.drop(7, axis=1)
#             if 6 in bad_col_indexes: 
#                 # There is an extra column added because of a three word name of a school
#                 # Known instances: Yale 1992?
#                 df = df.drop(6, axis=1)     
#             if 4 in bad_col_indexes: 
#                 # There is an extra column added because of a three word name of a school
#                 # Known instances: Yale 1992?
#                 df = df.drop(4, axis=1)     
#             if 2 in bad_col_indexes: 
#                 # There is an extra column added because of a three word name of a school
#                 # Known instances: Yale 1992?
#                 df = df.drop(2, axis=1)     
    
        df.columns = col_names
        wins = df[(df["team_1"] == team_1) & (df["team_1_score"] > df["team_2_score"])]
        losses = df[(df["team_2"] == team_1) & (df["team_1_score"] > df["team_2_score"])]
        wins["opponent"] = wins["team_2"]
        losses["opponent"] = losses["team_1"]
        wins["runs_scored"] = wins["team_1_score"]
        wins["runs_allowed"] = wins["team_2_score"]
        losses["runs_scored"] = losses["team_2_score"]
        losses["runs_allowed"] = losses["team_1_score"]       
        df = pd.concat([wins,losses])
        df["run_difference"] = df["runs_scored"] - df["runs_allowed"]
        # setting dtypes
        df["run_difference"] = df["run_difference"].astype(int)
        df["runs_allowed"] = df["runs_allowed"].astype(int)
        df["runs_scored"] = df["runs_scored"].astype(int)
        df["date"] = df["date"].astype("datetime64[ns]")
        df = df.drop(columns=["team_1","team_1_score","team_2","team_2_score"])
        if end is None: 
            df = df[df["date"].dt.year == start]
        df = df.sort_values(by="date")
        return df

In [4]:
def handle_errors(data):
    """
    rtype: DataFrame
    """
    col_names = ["date", "team_1", "team_1_score", "team_2", "team_2_score", "field"]
    col_bools = df.isnull().any(axis=0)
    bad_cols = col_bools[col_bools == True].index.to_series()
    bad_col_indexes = []
    for index, value in bad_cols.iteritems():
        bad_col_indexes.append(index)
        bad_col_indexes.sort()
    for col_index in bad_col_indexes:
        print(str(col_index)+" is wrong")

    if len(df.columns) < len(col_names):
        if len(df.columns) == 4: 
    # Known occurances: Yale 1992
    # In this case, read_fwf seems to append team 2's score to column 3. We correct by creating a new column with correct value
            df.insert(4, 3, 0, allow_duplicates=True)
            df.columns = col_names
            df["team_2_score"] = df["team_2"].str[-2:].str.strip()
            df["team_2_score"] = df["team_2_score"].astype(int)
            df["team_2"] = df["team_2"].str[:-2].str.strip()
        elif len(df.columns) == 3: 
            print(df.head())
    # Known occurances: Harvard 1993
            # In this case, no games were found. 
            res = pd.DataFrame
            col_names = ["date", "team_1", "team_1_score", "team_2", "team_2_score", "field"]
            res.columns = col_names
            return res 

    elif len(df.columns > len(col_names)):
        df = df.drop(columns = bad_col_indexes)
    if 8 in bad_col_indexes: 
        # There is an extra column added because of a three word name of a school
        # Known instances: Yale 1992?
        df = df.drop(8, axis=1)
    if 7 in bad_col_indexes: 
    # case where read_fwf adds an additional column
        # seems to happen when team_1 or team_2 is a three word name
        # in this case, grab the third word from row without a NaN in added column and append to associated name
        # Known occurances: Cornell 2015, Brown 2015
        # There is an extra column added because of a three word name of a school
        # Known instances: Yale 1992?
        df = df.drop(7, axis=1)
    if 6 in bad_col_indexes: 
        # There is an extra column added because of a three word name of a school
        # Known instances: Yale 1992?
        df = df.drop(6, axis=1)     
    if 4 in bad_col_indexes: 
        # There is an extra column added because of a three word name of a school
        # Known instances: Yale 1992?
        df = df.drop(4, axis=1)     
    if 2 in bad_col_indexes: 
        # There is an extra column added because of a three word name of a school
        # Known instances: Yale 1992?
        df = df.drop(2, axis=1)   
    return df

In [5]:

def get_games(team_1, start, end=None, team_2=None):
    """
    Returns: a dataframe of all games played for a given team inclusive of given start & end year
    Data from boydsworld.com
    
    Parameter team_name: team whose games to select 
    Precondition: team_name is a lowercase string
    Parameter start: the start year of games. To select only games from one year, leave  
    Precondition: start is an int >= 1992
    Parameter end: the end year of games
    Precondition: end is an int <= 2020
    """
    if end is None:
        end = start
    if team_2 is None: 
        team_2 = "all"
    payload = {"team1":team_1,"firstyear":str(start),"team2":team_2,"lastyear":str(end),"format":"Text","submit":"Fetch"}
    url = "http://www.boydsworld.com/cgi/scores.pl?" 
    s = requests.Session()
    r = requests.get(url, params=payload)
    response = r.text 
    if (len(response) < 10):
        return pd.DataFrame
    else:
        df = pd.read_fwf(StringIO(response), encoding = "utf8", header=None)
        col_names = ["date", "team_1", "team_1_score", "team_2", "team_2_score", "field"]
        if len(df.columns) == 7:
            # case where read_fwf adds an additional column
            # seems to happen when team_1 or team_2 is a three word name
            # in this case, grab the third word from row without a NaN in added column and append to associated name
            # Known occurances: Cornell 2015, Brown 2015
            col_bools = df.isnull().any(axis=0)
            bad_cols = col_bools[col_bools == True].index.to_series()
            bad_col_indexes = []
            for index, value in bad_cols.iteritems():
                bad_col_indexes.append(index)
            bad_rows = df[~df.isnull().any(axis=1)]
            for col_index in bad_col_indexes: 
                if col_index == 6: 
                    # Known instances: Yale 1992?
                    df = df.drop(col_index, axis=1)
                if col_index == 2 or col_index == 4:
                    affected_col = col_index-1
                    bad_rows.iloc[:,affected_col] = bad_rows.iloc[:,affected_col]+" "+bad_rows.iloc[:,col_index]
                    corrected_rows = bad_rows.drop(col_index, axis=1)
                    # drop bad column
                    df = df.drop(col_index, axis=1)
                    # drop bad rows
                    df = df.drop(bad_rows.index, axis=0)
                    # concatenate corrected rows
                    df = pd.concat([df, corrected_rows])   
        elif len(df.columns) == 5:
            # Known occurances: Yale 1992
            # In this case, read_fwf seems to append team 2's score to column 3. We correct by creating a new column with correct value
            df.insert(4, 3, 0, allow_duplicates=True)
            df.columns = col_names
            df["team_2_score"] = df["team_2"].str[-2:].str.strip()
            df["team_2_score"] = df["team_2_score"].astype(int)
            df["team_2"] = df["team_2"].str[:-2].str.strip()
        df.columns = col_names
        wins = df[(df["team_1"] == team_1) & (df["team_1_score"] > df["team_2_score"])]
        losses = df[(df["team_2"] == team_1) & (df["team_1_score"] > df["team_2_score"])]
        wins["opponent"] = wins["team_2"]
        losses["opponent"] = losses["team_1"]
        wins["runs_scored"] = wins["team_1_score"]
        wins["runs_allowed"] = wins["team_2_score"]
        losses["runs_scored"] = losses["team_2_score"]
        losses["runs_allowed"] = losses["team_1_score"]       
        df = pd.concat([wins,losses])
        df["run_difference"] = df["runs_scored"] - df["runs_allowed"]
        # setting dtypes
        df["run_difference"] = df["run_difference"].astype(int)
        df["runs_allowed"] = df["runs_allowed"].astype(int)
        df["runs_scored"] = df["runs_scored"].astype(int)
        df["date"] = pd.to_datetime(df["date"])
        df["date"] = df["date"].dt.strftime('%Y-%m-%d')
        df = df.drop(columns=["team_1","team_1_score","team_2","team_2_score"])
        df = df.sort_values(by="date")
        return df

In [3]:
team_1 = "Yale"
start = 1992
end = 2004

if end is None:
    stop = start+1
else: 
    stop = end
if team_2 is None: 
    team_2 = "all"
payload = {"team1":team_1,"firstyear":str(start),"team2":team_2,"lastyear":str(stop),"format":"Text","submit":"Fetch"}
url = "http://www.boydsworld.com/cgi/scores.pl?" 
s = requests.Session()
r = requests.get(url, params=payload)
response = r.text 
if (len(response) < 10):
    print("oof")
else:
    df = pd.read_fwf(StringIO(response), encoding = "utf8", header=None)
    col_names = ["date", "team_1", "team_1_score", "team_2", "team_2_score", "field"]
#         if len(df.columns) == 7:
#             # case where read_fwf adds an additional column
#             # seems to happen when team_1 or team_2 is a three word name
#             # in this case, grab the third word from row without a NaN in added column and append to associated name
#             # Known occurances: Cornell 2015, Brown 2015
#             col_bools = df.isnull().any(axis=0)
#             bad_cols = col_bools[col_bools == True].index.to_series()
#             bad_col_indexes = []
#             for index, value in bad_cols.iteritems():
#                 bad_col_indexes.append(index)
#             bad_rows = df[~df.isnull().any(axis=1)]
#             for col_index in bad_col_indexes: 
#                 if col_index == 6: 
#                     # Known instances: Yale 1992?
#                     df = df.drop(col_index, axis=1)
#                 if col_index == 2 or col_index == 4:
#                     affected_col = col_index-1
#                     bad_rows.iloc[:,affected_col] = bad_rows.iloc[:,affected_col]+" "+bad_rows.iloc[:,col_index]
#                     corrected_rows = bad_rows.drop(col_index, axis=1)
#                     # drop bad column
#                     df = df.drop(col_index, axis=1)
#                     # drop bad rows
#                     df = df.drop(bad_rows.index, axis=0)
#                     # concatenate corrected rows
#                     df = pd.concat([df, corrected_rows])   
#         elif len(df.columns) == 5:
#             # Known occurances: Yale 1992
#             # In this case, read_fwf seems to append team 2's score to column 3. We correct by creating a new column with correct value
#             df.insert(4, 3, 0, allow_duplicates=True)
#             df.columns = col_names
#             df["team_2_score"] = df["team_2"].str[-2:].str.strip()
#             df["team_2_score"] = df["team_2_score"].astype(int)
#             df["team_2"] = df["team_2"].str[:-2].str.strip()
#         df.columns = col_names
#         wins = df[(df["team_1"] == team_1) & (df["team_1_score"] > df["team_2_score"])]
#         losses = df[(df["team_2"] == team_1) & (df["team_1_score"] > df["team_2_score"])]
#         wins["opponent"] = wins["team_2"]
#         losses["opponent"] = losses["team_1"]
#         wins["runs_scored"] = wins["team_1_score"]
#         wins["runs_allowed"] = wins["team_2_score"]
#         losses["runs_scored"] = losses["team_2_score"]
#         losses["runs_allowed"] = losses["team_1_score"]       
#         df = pd.concat([wins,losses])
#         df["run_difference"] = df["runs_scored"] - df["runs_allowed"]
#         # setting dtypes
#         df["run_difference"] = df["run_difference"].astype(int)
#         df["runs_allowed"] = df["runs_allowed"].astype(int)
#         df["runs_scored"] = df["runs_scored"].astype(int)
#         df["date"] = df["date"].astype("datetime64[ns]")
#         df = df.drop(columns=["team_1","team_1_score","team_2","team_2_score"])
#         if end is None: 
#             df = df[df["date"].dt.year == start]
#         df = df.sort_values(by="date")


NameError: name 'team_2' is not defined