In [None]:
import pandas as pd
import sqlite3

# Connect to the SQLite database
con = sqlite3.connect("data/nba.sqlite")

# Load active players
active_players = pd.read_sql_query("SELECT full_name FROM player WHERE is_active=TRUE", con)

# Question 1: Which teams have most consistently had the highest scores?
# SQL command to calculate average and standard deviation of scores per team
team_scores_sql = """
SELECT team_id,
       AVG(score) AS avg_score,
       STDEV(score) AS score_stddev,
       COUNT(*) AS game_count
FROM game
GROUP BY team_id
HAVING COUNT(*) > 10
"""
team_scores = pd.read_sql_query(team_scores_sql, con)

# Question 2: Which team has had below-average scores for the most consecutive seasons?
# SQL command to calculate league average score per season
season_avg_sql = """
SELECT season, AVG(score) AS season_avg
FROM game
GROUP BY season
"""
season_avg_scores = pd.read_sql_query(season_avg_sql, con)

# Merge season average back to games and calculate below average
games = pd.read_sql_query("SELECT * FROM game", con)
games = games.merge(season_avg_scores, on='season')
games['below_avg'] = games['score'] < games['season_avg']

# Calculate below-average seasons streak
def calculate_below_avg_streaks(df):
    return (df['below_avg'].ne(df['below_avg'].shift()).cumsum().groupby(df['below_avg']).count().max())

below_avg_seasons = games.groupby('team_id').apply(calculate_below_avg_streaks)

# Question 3: Which team has consistently made it to the playoffs?
# SQL command to calculate playoff appearance frequency by team
playoff_appearances_sql = """
SELECT team_id, AVG(playoffs) AS playoff_frequency
FROM game
GROUP BY team_id
"""
playoff_appearances = pd.read_sql_query(playoff_appearances_sql, con)

# Question 4: Does a higher proportion of three-point field goals increase the likelihood of winning?
# Calculate three-point field goal proportion directly in SQL
three_point_sql = """
SELECT three_pointers, score, won
FROM game
"""
three_point_data = pd.read_sql_query(three_point_sql, con)
three_point_data['three_point_proportion'] = three_point_data['three_pointers'] / three_point_data['score'].replace(0, 1)  # Avoid division by zero
three_point_stats = three_point_data.groupby('three_point_proportion')['won'].mean()

# Question 5: How does the frequency of turnovers in a game affect the likelihood of winning?
# SQL command to calculate average turnovers in games won vs lost
turnover_stats_sql = """
SELECT won, AVG(turnovers) AS avg_turnovers
FROM game
GROUP BY won
"""
turnover_stats = pd.read_sql_query(turnover_stats_sql, con)

# Bin turnovers and calculate win rates
turnover_binned = games.groupby(pd.cut(games['turnovers'], bins=[-1, 5, 10, 15, 20, 25]))['won'].mean()

# Results summary
print("Question 1 - High Score Consistency:\n", team_scores)
print("\nQuestion 2 - Below Average Streaks:\n", below_avg_seasons)
print("\nQuestion 3 - Playoff Frequency:\n", playoff_appearances)
print("\nQuestion 4 - Three-Point Proportion and Win Rate:\n", three_point_stats)
print("\nQuestion 5 - Turnover Frequency and Win Rate:\n", turnover_stats)
print("\nTurnover Bins and Win Rate:\n", turnover_binned)

# Close the database connection
con.close()