In [52]:
import pandas as pd
import sqlite3

con = sqlite3.connect("data/nba.sqlite")
games = pd.read_sql_query("SELECT * FROM game", con)
active_players = pd.read_sql_query("SELECT full_name FROM player WHERE is_active=TRUE", con)

#QUESTION 1: WHICH TEAMS IN THE PAST 5 YEARS CONSISTENTLY SCORE THE HIGHEST IN THEIR GAMES? 

#INTRODUCTION: This question asks for teams who have scored the most in their games from the last five years. Teams who can effectively score high points consistently are likely to find
# potential success in the long term. 

#RISING ACTION: To find these teams, we will pull info from all games from the past 5 years, and then calculate the stats for the home games and the away games. 

games_past_5yrs = pd.read_sql_query("""
	SELECT * FROM game  
	WHERE game_date BETWEEN '2019-01-01 00:00:00' AND '2023-12-31 23:59:59'
    ORDER BY game_date DESC
""", con)

# Calculate statistics for home and away teams
home_stats = games_past_5yrs.groupby(['team_id_home', 'team_name_home'])['pts_home'].agg(['mean', 'std', 'count'])
away_stats = games_past_5yrs.groupby(['team_id_away', 'team_name_away'])['pts_away'].agg(['mean', 'std', 'count'])

home_stats.index.names = ['team_id', 'team_name']
away_stats.index.names = ['team_id', 'team_name']

combined_stats = pd.concat([home_stats, away_stats])
score_stats_all = combined_stats.groupby(['team_id', 'team_name']).mean()
score_stats_all['total_games'] = combined_stats.groupby(['team_id', 'team_name'])['count'].sum() 

# Filter teams with at least 100 games
score_stats_all = score_stats_all[score_stats_all['total_games'] >= 100]

# Display top 25 teams by average score
print("Teams with highest average scores:")
print(score_stats_all.sort_values(by='mean', ascending=False).head(25))

#CLIMAX: This leaves us with the 25 NBA teams with the highest average scores within the past 5 years. Every team included in this list has played at least 100 games total, this is to eliminate
# inactive or disbanded teams. This leaves us with these teams as our top 3 choices:

# 1) Milwaukee Bucks with 116.4 avg points and 434 total games played
# 2) Utah Jazz with 114.1 avg points and 393 total games played
# 3) Atlanta Hawks with 114 avg points and 390 total games played

#FALLING ACTION: These three teams have the highest average score in the past 5 years with at least 100 games played. The standout team is the Milwaukee Bucks who have the highest average of scores 
# while also having the 3rd highest number of games in the league behind the Denver Nuggets and the Boston Celtics. 

#CONCLUSION: The Milwaukee Bucks look particularly promising for their high average of points scored and the relatively high number of games played. While the Utah Jazz and Atlanta Hawks are great
# secondary choices based on their average scores alone. 

Teams with highest average scores:
                                         mean        std  count  total_games
team_id    team_name                                                        
1610612749 Milwaukee Bucks         116.435716  12.726194  217.0          434
1610612762 Utah Jazz               114.124126  11.353901  196.5          393
1610612737 Atlanta Hawks           114.082051  12.425118  195.0          390
1610612750 Minnesota Timberwolves  114.046378  11.995903  184.0          368
1610612758 Sacramento Kings        113.760321  12.404773  186.5          373
1610612744 Golden State Warriors   113.553368  12.502250  207.5          415
1610612751 Brooklyn Nets           113.466575  12.269783  195.5          391
1610612740 New Orleans Pelicans    113.429579  11.357125  184.5          369
1610612763 Memphis Grizzlies       113.219401  12.564816  196.5          393
1610612756 Phoenix Suns            113.174049  11.591735  205.5          411
1610612743 Denver Nuggets          112.98

In [54]:
#QUESTION 2: WHICH TEAMS SCORED BELOW AVERAGE THE MOST CONSECUTIVELY? 

#INTRODUCTION: This question asks for the teams that have performed poorly the most consecutively. In essence, we are looking for teams that we steer clear of
# due to poor scoring statistics. 

#RISING ACTION: We will first begin by calculate the average points per season, including home games and away games. These scores will then be combined into a single dataset
# where we will then filter for teams that scored below this average. 

# Filter for teams with the lowest average scores across unique seasons
season_avg = pd.concat([games_past_5yrs['pts_home'], games_past_5yrs['pts_away']]).mean()

home_games = games_past_5yrs[['season_id', 'team_id_home', 'team_name_home', 'pts_home']].rename(columns={
    'team_id_home': 'team_id',
    'team_name_home': 'team_name',
    'pts_home': 'points'
})
away_games = games_past_5yrs[['season_id', 'team_id_away', 'team_name_away', 'pts_away']].rename(columns={
    'team_id_away': 'team_id',
    'team_name_away': 'team_name',
    'pts_away': 'points'
})

# Combine home and away games into a single dataset
all_games = pd.concat([home_games, away_games])

# Calculate average points per team per season
team_season_avg = all_games.groupby(['season_id', 'team_id', 'team_name'])['points'].mean().reset_index()

# Filter for unique teams with the lowest average scores
lowest_avg_teams = team_season_avg.groupby('team_name')['points'].mean().sort_values().reset_index()

print("Teams with the Lowest Average Scores:")
print(lowest_avg_teams.head(11))

#CLIMAX: This leaves us with the 10 teams who have the lowest average scores. The Ra'anana Maccabi Ra'anana isn't apart of the NBA, so for the purposes of this question, we won't be including this team
# in the decision making process. The top 5 lowest scoring teams are as follows:

# 1) Detroit Pistons with 103.78 points
# 2) Cleveland Cavaliers with 104.16 points
# 3) Orlando Magic with 105.37 points
# 4) New York Knicks with 106.40 points
# 5) Charlotte Hornets with 106.41 points

#FALLING ACTION: These 5 teams have the lowest average scores out of all the teams in the league. It can be inferred that these 5 teams have the weakest offensive capabilites in the league and should be
#avoided when making decisions regarding investment. It is important to keep in mind that because of their poor performances, these teams might be the cheapest to invest in as well. 

#CONCLUSION: These teams would require the most work when it comes to their offensive capabilites. Investing into these teams might be potentially risky as their success isn't guaranteed. 

Teams with the Lowest Average Scores:
                    team_name      points
0   Ra'anana Maccabi Ra'anana   87.666667
1             Detroit Pistons  103.785668
2         Cleveland Cavaliers  104.168146
3               Orlando Magic  105.371162
4             New York Knicks  106.405505
5           Charlotte Hornets  106.416098
6              Indiana Pacers  107.782670
7                  Miami Heat  107.845605
8           San Antonio Spurs  109.226521
9          Los Angeles Lakers  109.257752
10         Washington Wizards  109.353388


In [58]:
#QUESTION 3: WHICH TEAMS HAVE HAD THE MOST CONSISTENT APPEARENCES IN THE PLAYOFFS?

#INTRODUCTION: Playoffs are the gateway to the championship. This question asks for the teams who have most consistently made it to this stage of play. In other words, this question asks for
# the teams who are most likely to win a championship based on the frequency of their playoff appearances. 

#RISING ACTION: We calculate the number of playoff appearances of each teams over the last 5 years. We then sort our created dataframe by teams who had the most consistent appearances. 

# Analyze consistent playoff appearances
playoff_games = games_recent_5yrs[games_recent_5yrs['season_type'] == 'Playoffs']

# Get distinct playoff appearances per team per season
playoff_home_seasons = playoff_games[['season_id', 'team_name_home']].rename(columns={'team_name_home': 'team_name'}).drop_duplicates()
playoff_away_seasons = playoff_games[['season_id', 'team_name_away']].rename(columns={'team_name_away': 'team_name'}).drop_duplicates()
all_playoff_seasons = pd.concat([playoff_home_seasons, playoff_away_seasons]).drop_duplicates()

# Count playoff appearances by team
playoff_appearances = all_playoff_seasons.groupby('team_name').size().sort_values(ascending=False)

# Create a dataframe for playoff appearances
consistent_playoff_teams = playoff_appearances.reset_index()
consistent_playoff_teams.columns = ['team_name', 'appearances']

# Sort teams by the most consistent playoff appearances
consistent_playoff_teams = consistent_playoff_teams.sort_values(by='appearances', ascending=False)

# Display results
print("\nTeams with the most consistent playoff appearances (2019-2023):")
print(consistent_playoff_teams)

#CLIMAX: The teams with the most playoff apearances are as follows:

# Boston Celtics, Brooklyn Nets, Denver Nuggets, Philadelphia 76ers, and Milwaukee Bucks: all with 5 total playoff appearances

#FALLING ACTION: The Brooklyn Nets, Denver Nuggets, Philadelphia 76ers, and Milwaukee Bucks are the teams that made it to the playoffs most consistently in the past 5 years.  

#CONCLUSION: These teams with the most appearances in the playoffs could also be seen as the teams that are most likely to win a championship title in the coming seasons. 
# simply put, these are the most competitive teams in the league, and choice options for investmen


Teams with the most consistent playoff appearances (2019-2023):
                 team_name  appearances
0           Boston Celtics            5
1            Brooklyn Nets            5
2           Denver Nuggets            5
3       Philadelphia 76ers            5
4          Milwaukee Bucks            5
5               Miami Heat            4
6                Utah Jazz            4
7              LA Clippers            4
8             Phoenix Suns            3
9       Los Angeles Lakers            3
10        Dallas Mavericks            3
11   Golden State Warriors            3
12           Atlanta Hawks            3
13       Memphis Grizzlies            3
14  Portland Trail Blazers            3
15         Toronto Raptors            3
16  Minnesota Timberwolves            2
17          Indiana Pacers            2
18           Orlando Magic            2
19   Oklahoma City Thunder            2
20         Houston Rockets            2
21         New York Knicks            2
22         Detr

In [59]:
#QUESTION 4: WHICH TEAMS HAVE IMPROVED THE MOST IN THE LAST 5 YEARS?

#INTRODUCTION: This question asks for the teams who have made the most improvement when it comes to their offensive capabilities in the last five years. These teams are important to keep an eye out for
# as they have the potential to be successful in future seasons. 

#RISING ACTION: We calculate scoring improvements made within the last 5 years by looking at average points scored and identifying the teams who have made the biggest strides.

# Extract year and filter for regular season games
games_recent_5yrs['game_year'] = pd.to_datetime(games_recent_5yrs['game_date']).dt.year
regular_games = games_recent_5yrs[games_recent_5yrs['season_type'] == 'Regular Season']

# Combine home and away data for regular games
reg_all_games = pd.concat([
    regular_games[['team_name_home', 'game_year', 'pts_home']].rename(columns={'team_name_home': 'team_name', 'pts_home': 'points'}),
    regular_games[['team_name_away', 'game_year', 'pts_away']].rename(columns={'team_name_away': 'team_name', 'pts_away': 'points'})
])

# Calculate average points by team and year
team_year_avg = reg_all_games.groupby(['team_name', 'game_year'])['points'].mean().reset_index()

# Calculate total improvement for each team
team_improvement = team_year_avg.groupby('team_name')['points'].apply(lambda x: x.diff().sum()).reset_index()
team_improvement.columns = ['team_name', 'total_improvement']
team_improvement = team_improvement.sort_values(by='total_improvement', ascending=False)

# Display top teams by improvement
print("Teams with the greatest improvement over the past five years:")


#CLIMAX: The three teams that have seen the most improvement over the past 5 years are the New York Knicks, the Sacremento Kings, and the Memphis Grizzlies. 

#FALLING ACTION: These teams, particularly the New York Knicks and the Sacremento Kings, are underdogs who are proving themselves to be rising stars to look out for. 

#CONCLUSION: These 3 teams have the most potential to become top teams given their performance continues to improve at this rate. The Knicks in particular were actually the 4th worst team in terms of 
# average points scored. An investment here may be risky, but it may also prove to be fruitful. 

Teams with the greatest improvement over the past five years:


In [57]:
#QUESTION 5: WHICH TEAMS HAVE THE BEST DEFENSE? (BLOCKS/STEALS)

#INTRODUCTION: This question brings the importance of defensive stats into light. A good team must have a great offense but the best teams will have a 
# defense that is potentially greater. 

#RISING ACTION: By caluculating metrics like blocks, steals, and winrates, we will be able to rank the teams by defenseive effectiveness. 

# Extract and combine defensive stats from home and away games
defensive_stats = pd.concat([
    regular_games[['team_name_home', 'blk_home', 'stl_home', 'wl_home']].rename(
        columns={'team_name_home': 'team_name', 'blk_home': 'blocks', 'stl_home': 'steals', 'wl_home': 'win_loss'}),
    regular_games[['team_name_away', 'blk_away', 'stl_away', 'wl_away']].rename(
        columns={'team_name_away': 'team_name', 'blk_away': 'blocks', 'stl_away': 'steals', 'wl_away': 'win_loss'})
])

# Group by team and calculate defensive performance metrics
team_defense = defensive_stats.groupby('team_name').agg(
    total_blocks=('blocks', 'sum'),
    total_steals=('steals', 'sum'),
    win_rate=('win_loss', lambda x: (x == 'W').mean())
).reset_index()

# Compute defensive effectiveness
team_defense['defensive_effectiveness'] = (team_defense['total_blocks'] + team_defense['total_steals']) / team_defense['win_rate']

# Sort teams by defensive effectiveness in descending order
team_defense_sorted = team_defense.sort_values(by=['defensive_effectiveness', 'win_rate'], ascending= [False, False])

# Display teams by defensive effectiveness in order from best to worst
print("Teams Ranked by Defensive Effectiveness (Best to Worst):")
print(team_defense_sorted.head(10))

#CLIMAX: The top 3 teams with the highest defensive effectiveness in order are:
# 1) Detroit Pistons
# 2) Houston Rockets
# 3) Orlando Magic

#FALLING ACTION: These teams have the highest defense in the league. While their offensive capabilities might not be the strongest, with these defenses, these teams will be the least likely to give over
# critical points in their games. 

#CONCLUSION: When looking at NBA statistics, it is easy to simply focus on teams who score the most. It is also extremely important to take defensive stats into consideration as well. Well rounded teams
# have the potential to perform well in a number of different categories. 

Teams Ranked by Defensive Effectiveness (Best to Worst):
                 team_name  total_blocks  total_steals  win_rate  \
8          Detroit Pistons        1562.0        2562.0  0.300000   
10         Houston Rockets        1703.0        2794.0  0.381356   
21           Orlando Magic        1713.0        2553.0  0.383099   
17  Minnesota Timberwolves        1862.0        2906.0  0.431884   
11          Indiana Pacers        1974.0        2737.0  0.457627   
3        Charlotte Hornets        1657.0        2657.0  0.423631   
29      Washington Wizards        1619.0        2548.0  0.416431   
26       San Antonio Spurs        1697.0        2486.0  0.421652   
18    New Orleans Pelicans        1573.0        2777.0  0.440341   
20   Oklahoma City Thunder        1614.0        2736.0  0.440678   

    defensive_effectiveness  
8              13746.666667  
10             11792.133333  
21             11135.514706  
17             11040.000000  
11             10294.407407  
3             