This code block handles the import statements and data loading, to keep the notebook clean.

In [1]:
import pandas as pd
import kagglehub

#Philadelphia 2025 Crime Statistics
phil_2025 = pd.read_csv("/Users/pwax/Desktop/mcdermid_waxman_final_project/data/incidents_part1_part2.csv")
path_nfl = kagglehub.dataset_download("keonim/nfl-game-scores-dataset-2017-2023")
game_dates = pd.read_csv(f"{path}/Season_Scores/2024_scores.csv")


In [109]:
#game_date takes the team and year, and returns home_game_dates, away_game_dates, game_result.
def nfl_game_date(year, team):
    #load the specific year
    df = pd.read_csv(f"{path_nfl}/Season_Scores/{year}_scores.csv")
    df = df.dropna(subset=['Date'])

    
    #subsets the df to games where the team is playing
    df_away = df[(df["AwayTeam"] == team)]
    df_home = df[(df["HomeTeam"] == team)]
        
    home_dates_with_year = df_home['Date'].apply(
        lambda x: f"{x}/{year+1}" if int(str(x).split('/')[0]) <= 6 
        else f"{x}/{year}")    
    away_dates_with_year = df_away['Date'].apply(
        lambda x: f"{x}/{year+1}" if int(str(x).split('/')[0]) <= 6 
        else f"{x}/{year}")
    
    # Now convert to datetime
    home_game_dates = pd.to_datetime(home_dates_with_year)
    away_game_dates = pd.to_datetime(away_dates_with_year)

    #Gets the result of the game
    home_game_win = df_home['HomeWin'].to_list()
    away_game_win = df_away['AwayWin'].to_list()

    #Convert to boolean
    home_game_win = [bool(x) for x in home_game_win]
    away_game_win = [bool(x) for x in away_game_win]

    #Creates an away, home df, with the pairs for results and tags the Location
    home = pd.DataFrame(list(zip(home_game_dates, home_game_win)))
    away = pd.DataFrame(list(zip(away_game_dates, away_game_win)))
    home["Location"] = "Home"
    away["Location"] = "Away"

    #puts the two together and sorts them by date
    season = pd.concat([home, away], ignore_index=True) 
    season.columns = ["Date", "Result", "Location"]
    season = season.sort_values('Date').reset_index(drop=True)    
    
    return season

In [112]:
path_nba = kagglehub.dataset_download("nathanlauga/nba-games")

print("Path to dataset files:", path_nba)

Path to dataset files: /Users/pwax/.cache/kagglehub/datasets/nathanlauga/nba-games/versions/10


In [150]:
def nba_game_date(year, team):
    #first get teamId for this dataset
    team_df = pd.read_csv(f"{path_nba}/teams.csv")

    #searches the dataset for the ID, returns nothing if not found
    team_id = team_df[team_df['NICKNAME'] == team]['TEAM_ID']
    if not team_id.empty:
        team_id = team_id.iloc[0]
    else:
        print("Team not found in dataset, look at the documentation.")
        return
    print(team_id)
    #use team_id and year to sort games wanted
    df = pd.read_csv(f"{path_nba}/games.csv")
    df['SEASON'] = df['SEASON'].astype('int')
    df_home = df[(df["HOME_TEAM_ID"] == team_id) & (df["SEASON"] == int(year))]
    df_away = df[(df["VISITOR_TEAM_ID"] == team_id) & (df["SEASON"] == int(year))]

    
    return df_home, df_away

In [151]:
print(nba_game_date(2023,"Wizards"))

1610612764
(Empty DataFrame
Columns: [GAME_DATE_EST, GAME_ID, GAME_STATUS_TEXT, HOME_TEAM_ID, VISITOR_TEAM_ID, SEASON, TEAM_ID_home, PTS_home, FG_PCT_home, FT_PCT_home, FG3_PCT_home, AST_home, REB_home, TEAM_ID_away, PTS_away, FG_PCT_away, FT_PCT_away, FG3_PCT_away, AST_away, REB_away, HOME_TEAM_WINS]
Index: []

[0 rows x 21 columns], Empty DataFrame
Columns: [GAME_DATE_EST, GAME_ID, GAME_STATUS_TEXT, HOME_TEAM_ID, VISITOR_TEAM_ID, SEASON, TEAM_ID_home, PTS_home, FG_PCT_home, FT_PCT_home, FG3_PCT_home, AST_home, REB_home, TEAM_ID_away, PTS_away, FG_PCT_away, FT_PCT_away, FG3_PCT_away, AST_away, REB_away, HOME_TEAM_WINS]
Index: []

[0 rows x 21 columns])
