# SCOPE - Data Visualization
## Premier League Corner Statistics Explorer

This notebook provides interactive visualizations for exploring Premier League corner data:
- **Data Overview**: Distributions and summary statistics
- **By-Year Analysis**: Season trends and Over/Under rates
- **By-Team Analysis**: Configurable team comparisons and rankings
- **Feature Exploration**: Correlations and relationships

In [1]:
# =============================================================================
# CELL 1: Setup & Configuration
# =============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings

warnings.filterwarnings('ignore')

# Plot styling
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

# Plotly template
PLOTLY_TEMPLATE = 'plotly_white'

# Color scheme
COLORS = {
    'primary': '#1f77b4',
    'secondary': '#ff7f0e',
    'success': '#2ca02c',
    'danger': '#d62728',
    'home': '#3498db',
    'away': '#e74c3c'
}

# =============================================================================
# CONFIGURABLE: Team Filter
# Set to empty list [] to include all teams
# Example: ['Arsenal', 'Chelsea', 'Liverpool'] to filter specific teams
# =============================================================================
TEAM_FILTER = []  # Empty = all teams

print("Setup complete")

Setup complete


In [2]:
# =============================================================================
# CELL 2: Data Download & Loading
# =============================================================================

# Season configurations
SEASONS = {
    '2020-21': '2021',
    '2021-22': '2122',
    '2022-23': '2223',
    '2023-24': '2324',
    '2024-25': '2425',
    '2025-26': '2526'  # Test season (ongoing)
}

BASE_URL = 'https://www.football-data.co.uk/mmz4281/{code}/E0.csv'

# Columns to use
COLS = ['Date', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 
        'HS', 'AS', 'HST', 'AST', 'HF', 'AF', 'HC', 'AC', 
        'HY', 'AY', 'HR', 'AR']

def load_season(season_name, season_code):
    """Load a single season's data from Football-Data.co.uk"""
    url = BASE_URL.format(code=season_code)
    try:
        df = pd.read_csv(url, encoding='utf-8')
        # Keep only needed columns (some may be missing)
        available_cols = [c for c in COLS if c in df.columns]
        df = df[available_cols].copy()
        df['Season'] = season_name
        print(f"  {season_name}: {len(df)} matches")
        return df
    except Exception as e:
        print(f"  {season_name}: Failed to load - {e}")
        return None

# Load all seasons
print("Loading data from Football-Data.co.uk...\n")
dfs = []
for season_name, season_code in SEASONS.items():
    df = load_season(season_name, season_code)
    if df is not None:
        dfs.append(df)

# Combine all seasons
df = pd.concat(dfs, ignore_index=True)

# Parse dates
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True, errors='coerce')

# Create target variable
df['TotalCorners'] = df['HC'] + df['AC']

# Create Over/Under flags for common thresholds
for threshold in [8.5, 9.5, 10.5, 11.5]:
    df[f'Over_{threshold}'] = (df['TotalCorners'] > threshold).astype(int)

# Mark train vs test
df['Split'] = df['Season'].apply(lambda x: 'Test' if x == '2025-26' else 'Train')

# Sort by date
df = df.sort_values('Date').reset_index(drop=True)

print(f"\n{'='*50}")
print(f"Total matches loaded: {len(df)}")
print(f"Date range: {df['Date'].min().strftime('%Y-%m-%d')} to {df['Date'].max().strftime('%Y-%m-%d')}")
print(f"Training matches: {len(df[df['Split']=='Train'])}")
print(f"Test matches: {len(df[df['Split']=='Test'])}")
print(f"{'='*50}")

Loading data from Football-Data.co.uk...

  2020-21: 380 matches
  2021-22: 380 matches
  2022-23: 380 matches
  2023-24: 380 matches
  2024-25: 380 matches
  2025-26: 200 matches

Total matches loaded: 2100
Date range: 2020-09-12 to 2026-01-04
Training matches: 1900
Test matches: 200


In [3]:
# Quick data preview
df.head(10)

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,HS,AS,HST,AST,HF,...,AY,HR,AR,Season,TotalCorners,Over_8.5,Over_9.5,Over_10.5,Over_11.5,Split
0,2020-09-12,Fulham,Arsenal,0,3,5,13,2,6,12,...,2,0,0,2020-21,5,0,0,0,0,Train
1,2020-09-12,Crystal Palace,Southampton,1,0,5,9,3,5,14,...,1,0,0,2020-21,10,1,1,0,0,Train
2,2020-09-12,Liverpool,Leeds,4,3,22,6,6,3,9,...,0,0,0,2020-21,9,1,0,0,0,Train
3,2020-09-12,West Ham,Newcastle,0,2,15,15,3,2,13,...,2,0,0,2020-21,15,1,1,1,1,Train
4,2020-09-13,West Brom,Leicester,0,3,7,13,1,7,12,...,1,0,0,2020-21,7,0,0,0,0,Train
5,2020-09-13,Tottenham,Everton,0,1,9,15,5,4,15,...,0,0,0,2020-21,8,0,0,0,0,Train
6,2020-09-14,Brighton,Chelsea,1,3,13,10,3,5,8,...,0,0,0,2020-21,7,0,0,0,0,Train
7,2020-09-14,Sheffield United,Wolves,0,2,9,11,2,4,13,...,1,0,0,2020-21,17,1,1,1,1,Train
8,2020-09-19,Everton,West Brom,5,2,17,6,7,4,9,...,0,0,1,2020-21,12,1,1,1,1,Train
9,2020-09-19,Leeds,Fulham,4,3,10,14,7,6,13,...,2,0,0,2020-21,8,0,0,0,0,Train


---
## Section 1: Data Overview

In [4]:
# =============================================================================
# CELL 3: Distribution of Total Corners
# =============================================================================

fig = px.histogram(
    df, 
    x='TotalCorners',
    nbins=20,
    title='Distribution of Total Corners per Match',
    labels={'TotalCorners': 'Total Corners', 'count': 'Number of Matches'},
    template=PLOTLY_TEMPLATE,
    color_discrete_sequence=[COLORS['primary']]
)

# Add mean line
mean_corners = df['TotalCorners'].mean()
fig.add_vline(x=mean_corners, line_dash='dash', line_color=COLORS['danger'],
              annotation_text=f'Mean: {mean_corners:.1f}')

fig.update_layout(bargap=0.1)
fig.show()

In [5]:
# =============================================================================
# CELL 4: Summary Statistics Table
# =============================================================================

# Overall stats
overall_stats = df[['TotalCorners', 'HC', 'AC']].describe().round(2)
overall_stats.columns = ['Total Corners', 'Home Corners', 'Away Corners']
print("Overall Statistics:")
print("="*50)
display(overall_stats)

# Per-season stats
print("\n\nPer-Season Statistics:")
print("="*50)
season_stats = df.groupby('Season').agg({
    'TotalCorners': ['count', 'mean', 'std', 'min', 'max'],
    'HC': 'mean',
    'AC': 'mean'
}).round(2)
season_stats.columns = ['Matches', 'Avg Corners', 'Std', 'Min', 'Max', 'Avg Home', 'Avg Away']
display(season_stats)

Overall Statistics:


Unnamed: 0,Total Corners,Home Corners,Away Corners
count,2100.0,2100.0,2100.0
mean,10.32,5.63,4.68
std,3.41,3.08,2.82
min,2.0,0.0,0.0
25%,8.0,3.0,3.0
50%,10.0,5.0,4.0
75%,13.0,8.0,6.0
max,24.0,17.0,19.0




Per-Season Statistics:


Unnamed: 0_level_0,Matches,Avg Corners,Std,Min,Max,Avg Home,Avg Away
Season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-21,380,10.19,3.41,2,24,5.56,4.63
2021-22,380,10.42,3.38,2,20,5.6,4.82
2022-23,380,10.11,3.26,2,21,5.64,4.47
2023-24,380,10.84,3.52,2,21,6.12,4.71
2024-25,380,10.3,3.47,2,19,5.43,4.87
2025-26,200,9.8,3.32,3,19,5.28,4.52


In [6]:
# =============================================================================
# CELL 5: Home vs Away Corner Comparison
# =============================================================================

# Prepare data for violin plot
home_data = df[['HC', 'Season']].copy()
home_data.columns = ['Corners', 'Season']
home_data['Type'] = 'Home'

away_data = df[['AC', 'Season']].copy()
away_data.columns = ['Corners', 'Season']
away_data['Type'] = 'Away'

corner_data = pd.concat([home_data, away_data])

fig = px.violin(
    corner_data,
    x='Type',
    y='Corners',
    color='Type',
    box=True,
    title='Home vs Away Corners Distribution',
    template=PLOTLY_TEMPLATE,
    color_discrete_map={'Home': COLORS['home'], 'Away': COLORS['away']}
)
fig.show()

# Print summary
print(f"\nHome corners: mean = {df['HC'].mean():.2f}, std = {df['HC'].std():.2f}")
print(f"Away corners: mean = {df['AC'].mean():.2f}, std = {df['AC'].std():.2f}")


Home corners: mean = 5.63, std = 3.08
Away corners: mean = 4.68, std = 2.82


---
## Section 2: By-Year (Season) Analysis

In [7]:
# =============================================================================
# CELL 6: Season Trends - Average Corners per Season
# =============================================================================

season_avg = df.groupby('Season').agg({
    'TotalCorners': 'mean',
    'HC': 'mean',
    'AC': 'mean'
}).reset_index()

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=season_avg['Season'],
    y=season_avg['TotalCorners'],
    mode='lines+markers',
    name='Total Corners',
    line=dict(color=COLORS['primary'], width=3),
    marker=dict(size=10)
))

fig.add_trace(go.Scatter(
    x=season_avg['Season'],
    y=season_avg['HC'],
    mode='lines+markers',
    name='Home Corners',
    line=dict(color=COLORS['home'], width=2, dash='dot'),
    marker=dict(size=8)
))

fig.add_trace(go.Scatter(
    x=season_avg['Season'],
    y=season_avg['AC'],
    mode='lines+markers',
    name='Away Corners',
    line=dict(color=COLORS['away'], width=2, dash='dot'),
    marker=dict(size=8)
))

fig.update_layout(
    title='Average Corners per Season',
    xaxis_title='Season',
    yaxis_title='Average Corners',
    template=PLOTLY_TEMPLATE,
    hovermode='x unified'
)
fig.show()

In [8]:
# =============================================================================
# CELL 7: Over/Under Rates by Season
# =============================================================================

# Calculate Over rates for each threshold by season
thresholds = [8.5, 9.5, 10.5, 11.5]
over_rates = df.groupby('Season')[[f'Over_{t}' for t in thresholds]].mean() * 100
over_rates = over_rates.reset_index()

# Reshape for plotting
over_rates_long = over_rates.melt(
    id_vars='Season',
    var_name='Threshold',
    value_name='Over Rate (%)'
)
over_rates_long['Threshold'] = over_rates_long['Threshold'].str.replace('Over_', 'Over ')

fig = px.bar(
    over_rates_long,
    x='Season',
    y='Over Rate (%)',
    color='Threshold',
    barmode='group',
    title='Over/Under Rates by Season',
    template=PLOTLY_TEMPLATE
)

# Add 50% reference line
fig.add_hline(y=50, line_dash='dash', line_color='gray', 
              annotation_text='50%')

fig.update_layout(yaxis_range=[0, 100])
fig.show()

In [9]:
# =============================================================================
# CELL 8: Box Plot - Corner Distribution by Season
# =============================================================================

fig = px.box(
    df,
    x='Season',
    y='TotalCorners',
    title='Corner Distribution by Season',
    template=PLOTLY_TEMPLATE,
    color='Season'
)
fig.update_layout(showlegend=False)
fig.show()

In [10]:
# =============================================================================
# CELL 9: Monthly Patterns Heatmap
# =============================================================================

# Extract month from date
df['Month'] = df['Date'].dt.month
df['MonthName'] = df['Date'].dt.strftime('%b')

# Create pivot table: Season x Month
monthly_avg = df.pivot_table(
    values='TotalCorners',
    index='Season',
    columns='Month',
    aggfunc='mean'
).round(1)

# Rename columns to month names
month_names = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun',
               7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
monthly_avg.columns = [month_names.get(m, m) for m in monthly_avg.columns]

fig = px.imshow(
    monthly_avg,
    title='Average Corners by Month and Season',
    labels=dict(x='Month', y='Season', color='Avg Corners'),
    template=PLOTLY_TEMPLATE,
    color_continuous_scale='RdYlGn',
    aspect='auto',
    text_auto=True
)
fig.show()

---
## Section 3: By-Team Analysis

In [11]:
# =============================================================================
# CELL 10: Prepare Team Statistics
# =============================================================================

# Get all unique teams
all_teams = sorted(set(df['HomeTeam'].unique()) | set(df['AwayTeam'].unique()))
print(f"Total teams in dataset: {len(all_teams)}")
print(f"Teams: {', '.join(all_teams)}")

# Calculate team stats
def calculate_team_stats(data):
    """Calculate corner statistics for each team"""
    stats = []
    
    for team in all_teams:
        # Home games
        home_games = data[data['HomeTeam'] == team]
        # Away games
        away_games = data[data['AwayTeam'] == team]
        
        if len(home_games) == 0 and len(away_games) == 0:
            continue
            
        stats.append({
            'Team': team,
            'HomeGames': len(home_games),
            'AwayGames': len(away_games),
            'TotalGames': len(home_games) + len(away_games),
            'HomeCornersFor': home_games['HC'].mean() if len(home_games) > 0 else 0,
            'HomeCornersAgainst': home_games['AC'].mean() if len(home_games) > 0 else 0,
            'AwayCornersFor': away_games['AC'].mean() if len(away_games) > 0 else 0,
            'AwayCornersAgainst': away_games['HC'].mean() if len(away_games) > 0 else 0,
            'AvgCornersFor': (
                (home_games['HC'].sum() + away_games['AC'].sum()) /
                (len(home_games) + len(away_games))
            ) if (len(home_games) + len(away_games)) > 0 else 0,
            'AvgCornersAgainst': (
                (home_games['AC'].sum() + away_games['HC'].sum()) /
                (len(home_games) + len(away_games))
            ) if (len(home_games) + len(away_games)) > 0 else 0,
            'AvgMatchCorners': (
                (home_games['TotalCorners'].sum() + away_games['TotalCorners'].sum()) /
                (len(home_games) + len(away_games))
            ) if (len(home_games) + len(away_games)) > 0 else 0
        })
    
    return pd.DataFrame(stats)

team_stats = calculate_team_stats(df)
team_stats['NetCorners'] = team_stats['AvgCornersFor'] - team_stats['AvgCornersAgainst']

# Apply team filter if set
if TEAM_FILTER:
    team_stats_filtered = team_stats[team_stats['Team'].isin(TEAM_FILTER)]
    print(f"\nFiltered to {len(TEAM_FILTER)} teams: {', '.join(TEAM_FILTER)}")
else:
    team_stats_filtered = team_stats

display(team_stats_filtered.round(2))

Total teams in dataset: 28
Teams: Arsenal, Aston Villa, Bournemouth, Brentford, Brighton, Burnley, Chelsea, Crystal Palace, Everton, Fulham, Ipswich, Leeds, Leicester, Liverpool, Luton, Man City, Man United, Newcastle, Norwich, Nott'm Forest, Sheffield United, Southampton, Sunderland, Tottenham, Watford, West Brom, West Ham, Wolves


Unnamed: 0,Team,HomeGames,AwayGames,TotalGames,HomeCornersFor,HomeCornersAgainst,AwayCornersFor,AwayCornersAgainst,AvgCornersFor,AvgCornersAgainst,AvgMatchCorners,NetCorners
0,Arsenal,105,105,210,6.71,3.31,5.3,4.1,6.01,3.71,9.72,2.3
1,Aston Villa,105,105,210,5.79,4.53,5.08,5.53,5.43,5.03,10.47,0.4
2,Bournemouth,67,67,134,5.82,5.12,5.0,6.25,5.41,5.69,11.1,-0.28
3,Brentford,86,86,172,4.72,5.24,4.33,6.47,4.52,5.85,10.38,-1.33
4,Brighton,105,105,210,6.29,3.84,4.62,5.28,5.45,4.56,10.01,0.9
5,Burnley,67,67,134,4.99,5.37,4.15,7.3,4.57,6.34,10.9,-1.77
6,Chelsea,105,105,210,6.5,4.08,5.21,4.64,5.86,4.36,10.21,1.5
7,Crystal Palace,105,105,210,4.7,4.74,4.28,5.81,4.49,5.28,9.77,-0.79
8,Everton,105,105,210,4.98,5.48,3.88,6.49,4.43,5.98,10.41,-1.55
9,Fulham,86,86,172,5.37,4.83,4.62,5.26,4.99,5.04,10.03,-0.05


In [12]:
# =============================================================================
# CELL 11: Team Corner Rankings - Horizontal Bar Chart
# =============================================================================

# Sort by corners won
sorted_stats = team_stats_filtered.sort_values('AvgCornersFor', ascending=True)

fig = go.Figure()

fig.add_trace(go.Bar(
    y=sorted_stats['Team'],
    x=sorted_stats['AvgCornersFor'],
    name='Corners For',
    orientation='h',
    marker_color=COLORS['success']
))

fig.add_trace(go.Bar(
    y=sorted_stats['Team'],
    x=sorted_stats['AvgCornersAgainst'],
    name='Corners Against',
    orientation='h',
    marker_color=COLORS['danger']
))

fig.update_layout(
    title='Average Corners For vs Against by Team',
    xaxis_title='Average Corners per Match',
    yaxis_title='Team',
    barmode='group',
    template=PLOTLY_TEMPLATE,
    height=max(400, len(sorted_stats) * 25)
)
fig.show()

In [13]:
# =============================================================================
# CELL 12: Corners For vs Against Scatter (Quadrant Analysis)
# =============================================================================

# Calculate league averages for reference lines
avg_for = team_stats_filtered['AvgCornersFor'].mean()
avg_against = team_stats_filtered['AvgCornersAgainst'].mean()

fig = px.scatter(
    team_stats_filtered,
    x='AvgCornersFor',
    y='AvgCornersAgainst',
    text='Team',
    title='Team Corner Profile: For vs Against',
    labels={
        'AvgCornersFor': 'Avg Corners Won (per match)',
        'AvgCornersAgainst': 'Avg Corners Conceded (per match)'
    },
    template=PLOTLY_TEMPLATE
)

# Add quadrant lines
fig.add_hline(y=avg_against, line_dash='dash', line_color='gray')
fig.add_vline(x=avg_for, line_dash='dash', line_color='gray')

# Annotations for quadrants
fig.add_annotation(x=avg_for + 0.5, y=avg_against - 0.3, text='Strong Attack\nStrong Defense',
                   showarrow=False, font=dict(color='green', size=10))
fig.add_annotation(x=avg_for - 0.5, y=avg_against + 0.3, text='Weak Attack\nWeak Defense',
                   showarrow=False, font=dict(color='red', size=10))

fig.update_traces(textposition='top center', marker=dict(size=12))
fig.show()

In [14]:
# =============================================================================
# CELL 13: Team Corner Trends Over Seasons
# =============================================================================

# Calculate per-season stats for each team
def get_team_season_stats(data):
    """Calculate team stats per season"""
    results = []
    
    for season in data['Season'].unique():
        season_data = data[data['Season'] == season]
        season_stats = calculate_team_stats(season_data)
        season_stats['Season'] = season
        results.append(season_stats)
    
    return pd.concat(results, ignore_index=True)

team_season_stats = get_team_season_stats(df)

# Filter teams if specified
if TEAM_FILTER:
    teams_to_plot = TEAM_FILTER
else:
    # Show top 6 by average corners won
    top_teams = team_stats.nlargest(6, 'AvgCornersFor')['Team'].tolist()
    teams_to_plot = top_teams

plot_data = team_season_stats[team_season_stats['Team'].isin(teams_to_plot)]

fig = px.line(
    plot_data,
    x='Season',
    y='AvgCornersFor',
    color='Team',
    markers=True,
    title=f'Corner Trends Over Seasons ({"Selected" if TEAM_FILTER else "Top 6"} Teams)',
    labels={'AvgCornersFor': 'Avg Corners Won', 'Season': 'Season'},
    template=PLOTLY_TEMPLATE
)
fig.update_traces(line=dict(width=2), marker=dict(size=8))
fig.show()

In [15]:
# =============================================================================
# CELL 14: All Teams x Seasons Heatmap
# =============================================================================

# Create pivot table
team_season_pivot = team_season_stats.pivot_table(
    values='AvgCornersFor',
    index='Team',
    columns='Season',
    aggfunc='mean'
).round(1)

# Sort by overall average
team_season_pivot['Overall'] = team_season_pivot.mean(axis=1)
team_season_pivot = team_season_pivot.sort_values('Overall', ascending=False)
team_season_pivot = team_season_pivot.drop('Overall', axis=1)

fig = px.imshow(
    team_season_pivot,
    title='Average Corners Won by Team and Season',
    labels=dict(x='Season', y='Team', color='Avg Corners'),
    template=PLOTLY_TEMPLATE,
    color_continuous_scale='Blues',
    aspect='auto',
    text_auto=True
)
fig.update_layout(height=max(500, len(team_season_pivot) * 20))
fig.show()

---
## Section 4: Feature Exploration

In [16]:
# =============================================================================
# CELL 15: Correlation Heatmap
# =============================================================================

# Select numeric columns for correlation
numeric_cols = ['TotalCorners', 'HC', 'AC', 'HS', 'AS', 'HST', 'AST', 'HF', 'AF', 'HY', 'AY', 'HR', 'AR']
available_numeric = [c for c in numeric_cols if c in df.columns]

corr_matrix = df[available_numeric].corr()

fig = px.imshow(
    corr_matrix,
    title='Feature Correlation Matrix',
    template=PLOTLY_TEMPLATE,
    color_continuous_scale='RdBu_r',
    zmin=-1, zmax=1,
    text_auto='.2f'
)
fig.update_layout(height=600, width=700)
fig.show()

In [17]:
# =============================================================================
# CELL 16: Top Correlated Features with Total Corners
# =============================================================================

# Get correlations with TotalCorners
corner_corr = corr_matrix['TotalCorners'].drop('TotalCorners').sort_values(key=abs, ascending=False)

fig = px.bar(
    x=corner_corr.values,
    y=corner_corr.index,
    orientation='h',
    title='Feature Correlations with Total Corners',
    labels={'x': 'Correlation Coefficient', 'y': 'Feature'},
    template=PLOTLY_TEMPLATE,
    color=corner_corr.values,
    color_continuous_scale='RdBu_r'
)
fig.add_vline(x=0, line_color='black', line_width=1)
fig.show()

print("\nCorrelation values:")
for feat, corr in corner_corr.items():
    print(f"  {feat}: {corr:.3f}")


Correlation values:
  HC: 0.627
  AC: 0.526
  HS: 0.193
  AS: 0.136
  HST: 0.112
  AF: -0.097
  HF: -0.069
  AST: 0.065
  HR: -0.038
  AY: 0.024
  AR: 0.023
  HY: -0.004


In [18]:
# =============================================================================
# CELL 17: Shot-Corner Relationship
# =============================================================================

# Create combined shot feature
df['TotalShots'] = df['HS'] + df['AS']
df['TotalShotsOnTarget'] = df['HST'] + df['AST']

# Scatter: Total Shots vs Total Corners
fig = make_subplots(rows=1, cols=2, subplot_titles=(
    'Total Shots vs Total Corners',
    'Shots on Target vs Total Corners'
))

fig.add_trace(
    go.Scatter(
        x=df['TotalShots'],
        y=df['TotalCorners'],
        mode='markers',
        marker=dict(color=COLORS['primary'], opacity=0.5),
        name='Shots'
    ),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(
        x=df['TotalShotsOnTarget'],
        y=df['TotalCorners'],
        mode='markers',
        marker=dict(color=COLORS['secondary'], opacity=0.5),
        name='Shots on Target'
    ),
    row=1, col=2
)

fig.update_layout(
    title='Shot-Corner Relationship',
    template=PLOTLY_TEMPLATE,
    showlegend=False,
    height=400
)
fig.update_xaxes(title_text='Total Shots', row=1, col=1)
fig.update_xaxes(title_text='Shots on Target', row=1, col=2)
fig.update_yaxes(title_text='Total Corners', row=1, col=1)
fig.show()

# Print correlations
print(f"Correlation: Total Shots vs Total Corners: {df['TotalShots'].corr(df['TotalCorners']):.3f}")
print(f"Correlation: Shots on Target vs Total Corners: {df['TotalShotsOnTarget'].corr(df['TotalCorners']):.3f}")

Correlation: Total Shots vs Total Corners: 0.304
Correlation: Shots on Target vs Total Corners: 0.143


In [19]:
# =============================================================================
# CELL 18: Foul-Corner Relationship
# =============================================================================

df['TotalFouls'] = df['HF'] + df['AF']
df['TotalYellows'] = df['HY'] + df['AY']

fig = make_subplots(rows=1, cols=2, subplot_titles=(
    'Total Fouls vs Total Corners',
    'Corners by Yellow Card Count'
))

# Scatter: Fouls vs Corners
fig.add_trace(
    go.Scatter(
        x=df['TotalFouls'],
        y=df['TotalCorners'],
        mode='markers',
        marker=dict(color=COLORS['danger'], opacity=0.5),
        name='Fouls'
    ),
    row=1, col=1
)

# Box: Corners by Yellow card buckets
df['YellowBucket'] = pd.cut(df['TotalYellows'], bins=[-1, 2, 4, 6, 20], 
                            labels=['0-2', '3-4', '5-6', '7+'])

for bucket in ['0-2', '3-4', '5-6', '7+']:
    bucket_data = df[df['YellowBucket'] == bucket]['TotalCorners']
    fig.add_trace(
        go.Box(y=bucket_data, name=bucket),
        row=1, col=2
    )

fig.update_layout(
    title='Foul & Card Analysis',
    template=PLOTLY_TEMPLATE,
    showlegend=False,
    height=400
)
fig.update_xaxes(title_text='Total Fouls', row=1, col=1)
fig.update_xaxes(title_text='Yellow Cards (bucketed)', row=1, col=2)
fig.update_yaxes(title_text='Total Corners', row=1, col=1)
fig.show()

print(f"Correlation: Total Fouls vs Total Corners: {df['TotalFouls'].corr(df['TotalCorners']):.3f}")

Correlation: Total Fouls vs Total Corners: -0.112


---
## Summary

This notebook provides a comprehensive exploration of Premier League corner data. Key insights can be found in each section:

1. **Data Overview**: Distribution patterns and home/away differences
2. **By-Year Analysis**: Season trends and monthly patterns
3. **By-Team Analysis**: Team rankings and performance profiles
4. **Feature Exploration**: Correlations and relationships with shots/fouls

Use the `TEAM_FILTER` variable in Cell 1 to focus on specific teams of interest.