# 🎨 Assignment 3: Data Visualization on IPL Dataset

**Objective:**  
Create an interactive data visualization dashboard using Matplotlib and Seaborn. The dashboard will enable users to explore the IPL dataset (2008-2024) by filtering seasons, teams, and venues, and generate insightful visualizations.

**Dataset:**  
IPL Dataset (matches and deliveries data) from Kaggle.

**Key Steps:**
✅ Load and preprocess the data  
✅ Handle missing values  
✅ Implement filter functionality  
✅ Visualize key insights with bar charts, line plots, pie charts, etc.  
✅ Calculate and display dynamic summary statistics  

Let’s bring the IPL data to life! 🏏


In [27]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display, clear_output

sns.set(style='whitegrid')


In [28]:
matches = pd.read_csv('matches.csv')
deliveries = pd.read_csv('deliveries.csv')

print("Matches columns:", matches.columns.tolist())
print("Deliveries columns:", deliveries.columns.tolist())


Matches columns: ['id', 'season', 'city', 'date', 'match_type', 'player_of_match', 'venue', 'team1', 'team2', 'toss_winner', 'toss_decision', 'winner', 'result', 'result_margin', 'target_runs', 'target_overs', 'super_over', 'method', 'umpire1', 'umpire2']
Deliveries columns: ['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball', 'batter', 'bowler', 'non_striker', 'batsman_runs', 'extra_runs', 'total_runs', 'extras_type', 'is_wicket', 'player_dismissed', 'dismissal_kind', 'fielder']


In [29]:
# Clean matches
matches['city'].fillna('Unknown', inplace=True)
matches['player_of_match'].fillna('Unknown', inplace=True)
matches['winner'].fillna('No result', inplace=True)
matches['result_margin'].fillna(0, inplace=True)
matches['target_runs'].fillna(0, inplace=True)
matches['target_overs'].fillna(0, inplace=True)
matches['method'].fillna('normal', inplace=True)

# Clean deliveries
deliveries['player_dismissed'].fillna('None', inplace=True)
deliveries['dismissal_kind'].fillna('None', inplace=True)
deliveries['fielder'].fillna('None', inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  matches['city'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  matches['player_of_match'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are s

In [30]:
df = deliveries.merge(matches, left_on='match_id', right_on='id', suffixes=('_del', '_mat'))
print("Merged DataFrame shape:", df.shape)
print(df[['batter', 'bowler', 'venue', 'season', 'total_runs']].head())


Merged DataFrame shape: (260920, 37)
        batter   bowler                  venue   season  total_runs
0   SC Ganguly  P Kumar  M Chinnaswamy Stadium  2007/08           1
1  BB McCullum  P Kumar  M Chinnaswamy Stadium  2007/08           0
2  BB McCullum  P Kumar  M Chinnaswamy Stadium  2007/08           1
3  BB McCullum  P Kumar  M Chinnaswamy Stadium  2007/08           0
4  BB McCullum  P Kumar  M Chinnaswamy Stadium  2007/08           0


VBox(children=(Dropdown(description='Season:', options=('All', '2007/08', '2009', '2009/10', '2011', '2012', '…

In [32]:
def filter_data(season, team, venue):
    d = df.copy()
    if season != 'All':
        d = d[d['season'].astype(str) == season]
    if team != 'All':
        d = d[(d['batting_team'] == team) | (d['bowling_team'] == team)]
    if venue != 'All':
        d = d[d['venue'] == venue]
    return d


In [33]:
def show_summary(season, team, venue):
    d = filter_data(season, team, venue)
    if d.empty:
        print("⚠️ No data for selected filters.")
        return
    print("\n📊 Summary Statistics:")
    print(f"• Deliveries: {len(d)}")
    print(f"• Matches: {d['match_id'].nunique()}")
    print(f"• Total Runs: {d['total_runs'].sum()}")
    print(f"• Mean Runs/Delivery: {d['total_runs'].mean():.2f}")
    print(f"• Median Runs/Delivery: {d['total_runs'].median()}\n")


In [34]:
def show_visualizations(season, team, venue):
    d = filter_data(season, team, venue)
    if d.empty:
        print("⚠️ No data to plot for selected filters.")
        return

    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    fig.suptitle(f"Visuals: Season={season}, Team={team}, Venue={venue}", fontsize=16)
    plt.subplots_adjust(hspace=0.4, wspace=0.3)

    # 1. Top 10 batters
    top = d.groupby('batter')['batsman_runs'].sum().nlargest(10)
    sns.barplot(x=top.values, y=top.index, ax=axes[0,0], palette='magma')
    axes[0,0].set_title('Top 10 Batters (by Runs)')
    axes[0,0].set_xlabel('Runs')

    # 2. Dismissal types
    dc = d['dismissal_kind'].value_counts()
    axes[0,1].pie(dc, labels=dc.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette('pastel'))
    axes[0,1].set_title('Dismissal Types')

    # 3. Runs per over
    roc = d.groupby('over')['total_runs'].sum()
    axes[1,0].plot(roc.index, roc.values, marker='o', color='teal')
    axes[1,0].set_title('Runs per Over')
    axes[1,0].set_xlabel('Over')
    axes[1,0].set_ylabel('Runs')

    # 4. Blank/Optional
    axes[1,1].axis('off')

    plt.show()


In [36]:
season_options = ['All'] + sorted(df['season'].astype(str).unique())
team_options = ['All'] + sorted(df['batting_team'].unique())
venue_options = ['All'] + sorted(df['venue'].unique())

season_widget = widgets.Dropdown(options=season_options, description='Season:')
team_widget = widgets.Dropdown(options=team_options, description='Team:')
venue_widget = widgets.Dropdown(options=venue_options, description='Venue:')

display(widgets.VBox([season_widget, team_widget, venue_widget]))

out_s = widgets.Output()
out_v = widgets.Output()

def update_all(change):
    with out_s:
        clear_output(wait=True)
        show_summary(season_widget.value, team_widget.value, venue_widget.value)
    with out_v:
        clear_output(wait=True)
        show_visualizations(season_widget.value, team_widget.value, venue_widget.value)

season_widget.observe(update_all, 'value')
team_widget.observe(update_all, 'value')
venue_widget.observe(update_all, 'value')

display(out_s, out_v)
update_all(None)


VBox(children=(Dropdown(description='Season:', options=('All', '2007/08', '2009', '2009/10', '2011', '2012', '…

Output()

Output()