In [7]:
import numpy as np  # For numerical operations
import pandas as pd  # For data manipulation
import plotly.express as px  # For interactive visualizations
import plotly.graph_objects as go  # For advanced plotting

# Set default template for Plotly
px.defaults.template = "plotly_white"

In [2]:
# Load the dataset
df = pd.read_csv("/kaggle/input/ipl-2025-mega-auction-dataset/ipl_2025_auction_players.csv")

# Display the first few rows
df.head()

Unnamed: 0,Players,Team,Type,Base,Sold
0,Virat Kohli,RCB,BAT,-,21.0
1,Rajat Patidar,RCB,BAT,-,11.0
2,Yash Dayal,RCB,BOWL,-,5.0
3,Jasprit Bumrah,MI,BOWL,-,18.0
4,Suryakumar Yadav,MI,BAT,-,16.35


In [3]:
# Display basic information about the dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 623 entries, 0 to 622
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Players  623 non-null    object
 1   Team     623 non-null    object
 2   Type     623 non-null    object
 3   Base     623 non-null    object
 4   Sold     623 non-null    object
dtypes: object(5)
memory usage: 24.5+ KB


In [11]:
filtered_teams = df['Team'][df['Team'] != '-'].unique()
filtered_team_count = len(filtered_teams)

filtered_team_count, filtered_teams

(10,
 array(['RCB', 'MI', 'SRH', 'CSK', 'DC', 'KKR', 'RR', 'GT', 'LSG', 'PBKS'],
       dtype=object))

In [10]:
import plotly.graph_objects as go

# Prepare the data
teams = retained_count.index
counts = retained_count.values

# Create the bar chart
fig = go.Figure(
    data=go.Bar(
        x=teams,
        y=counts,
        marker=dict(color=px.colors.qualitative.Set3),  # Different colors for each bar
    )
)
fig.update_layout(
    title="Number of Players Retained by Each Team",
    xaxis_title="Team",
    yaxis_title="Number of Players Retained",
    xaxis=dict(tickmode="array", tickvals=teams, tickangle=45),
    showlegend=False,
)

fig.show()


In [12]:
import plotly.graph_objects as go

# Filter retained players and count their types
retained_types_count = retained_players['Type'].value_counts()

# Create a pie chart
fig = go.Figure(
    data=[
        go.Pie(
            labels=retained_types_count.index,
            values=retained_types_count.values,
            hole=0.3  # Creates a donut chart
        )
    ]
)

fig.update_layout(
    title="Types of Players Retained",
)

fig.show()


In [22]:
retained_players_copy = retained_players.copy()
retained_players_copy['Sold'] = pd.to_numeric(retained_players_copy['Sold'], errors='coerce').fillna(0)
money_spent_by_team = retained_players_copy.groupby('Team')['Sold'].sum()
money_spent_by_team = money_spent_by_team[money_spent_by_team.index != '-']

In [28]:
# Create an interactive bar chart using Plotly for money spent by each team
fig = go.Figure(
    data=[
        go.Bar(
            x=money_spent_by_team.index,
            y=money_spent_by_team.values,
            marker=dict(color=px.colors.qualitative.Set3),  # Different colors for each bar
        )
    ]
)

fig.update_layout(
    title="Money Spent by Each Team on Retaining Players",
    xaxis_title="Team",
    yaxis_title="Money Spent (in ₹ crore)",
    xaxis=dict(tickmode="array", tickvals=money_spent_by_team.index, tickangle=45),
    showlegend=False,
)

fig.show()

In [29]:
# Calculate the number of players retained by each team
players_retained_count = retained_players.groupby('Team').size()

# Remove '-' team from analysis
players_retained_count = players_retained_count[players_retained_count.index != '-']

# Calculate slots left for each team
max_slots = 25
slots_left = max_slots - players_retained_count

# Display the result
slots_left.sort_values(ascending=False)

Team
PBKS    23
RCB     22
DC      21
CSK     20
GT      20
LSG     20
SRH     20
MI      20
KKR     19
RR      19
dtype: int64

In [32]:
# Combine budget left and slots remaining into a single DataFrame
team_status = pd.DataFrame({
    'Slots Left': slots_left,
    'Budget Left (₹ crore)': money_left_by_team
})

# Sort by team names for clarity
team_status_sorted = team_status.sort_index()

# Display the result in a readable format
print("Team Status - Slots Left and Budget Left (₹ crore):\n")
print(team_status_sorted.to_string())


Team Status - Slots Left and Budget Left (₹ crore):

      Slots Left  Budget Left (₹ crore)
Team                                   
CSK           20                  55.00
DC            21                  76.25
GT            20                  69.00
KKR           19                  63.00
LSG           20                  69.00
MI            20                  45.00
PBKS          23                 110.50
RCB           22                  83.00
RR            19                  41.00
SRH           20                  45.00


In [17]:
# Calculate money left for each team
total_money = 120  # Total money allowed in the auction per team (in crore)
money_left_by_team = total_money - money_spent_by_team

# Create an interactive bar chart using Plotly for money left by each team
fig = go.Figure(
    data=[
        go.Bar(
            x=money_left_by_team_sorted.index,
            y=money_left_by_team_sorted.values,
            marker=dict(color=px.colors.qualitative.Set3),  # Different colors for each bar
        )
    ]
)

fig.update_layout(
    title="Money Left for Each Team Going to Auction",
    xaxis_title="Team",
    yaxis_title="Money Left (in ₹ crore)",
    xaxis=dict(tickmode="array", tickvals=money_left_by_team_sorted.index, tickangle=45),
    showlegend=False,
)

fig.show()

In [None]:
# Verify the changes
teams = df['Team'].unique()
print(f"Number of teams: {len(teams)}")
print("Updated Team Names:")
for team in teams:
    print(team)

In [None]:
# Count the number of players in each type
player_type_distribution = df['Type'].value_counts().reset_index()
player_type_distribution.columns = ['Type', 'Count']

# Plot the distribution using Plotly
fig1 = px.pie(player_type_distribution, values='Count', names='Type', 
             title="Player Type Distribution", hole=0.4)
fig1.show()

In [None]:
# Count the number of players in each team
team_composition = df['Team'].value_counts().reset_index()
team_composition.columns = ['Team', 'Player Count']

# Plot the team composition with unique colors for each team
fig = px.bar(
    team_composition, 
    x='Team', 
    y='Player Count', 
    title="Number of Players in Each Team", 
    labels={'Player Count': 'Number of Players'},
    text='Player Count',
    color='Team',  # Assign unique colors for each team
    color_discrete_sequence=px.colors.qualitative.Pastel  # Use a qualitative color palette
)

# Enhance the visualization
fig.update_traces(textposition='outside')
fig.update_layout(
    xaxis_title="Team",
    yaxis_title="Number of Players",
    title_font=dict(size=20, family='Arial', color='darkblue'),
    plot_bgcolor='rgba(0,0,0,0)',  # Transparent background
    paper_bgcolor='rgba(255,255,255,1)'  # White plot area
)

fig.show()

In [None]:
# Replace invalid values and convert Base column to numeric
df['Base'] = df['Base'].replace('-', '0').replace('[\$,]', '', regex=True).astype(float)

# Exclude players with Base Price of zero
filtered_df = df[df['Base'] > 0]

# Count the number of players for each unique base price
base_price_counts = filtered_df['Base'].value_counts().reset_index()
base_price_counts.columns = ['Base Price', 'Player Count']
base_price_counts = base_price_counts.sort_values('Base Price')  # Sort by base price

# Plot the bar chart for base price counts
import plotly.express as px
fig = px.bar(
    base_price_counts,
    x='Base Price',
    y='Player Count',
    title="Count of Players for Each Base Price",
    labels={'Base Price': 'Base Price (in Crore)', 'Player Count': 'Number of Players'},
    text='Player Count',
    color_discrete_sequence=['#ADD8E6']  # Light color for simplicity
)

# Clean and simple layout
fig.update_layout(
    xaxis_title="Base Price (in Crore)",
    yaxis_title="Number of Players",
    title_font=dict(size=18, family='Arial', color='black'),
    font=dict(size=14, family='Arial'),
    plot_bgcolor='rgba(255,255,255,1)',  # White background
    paper_bgcolor='rgba(255,255,255,1)',  # White plot area
    margin=dict(l=40, r=40, t=60, b=40),  # Minimal margins
    bargap=0.2  # Space between bars for readability
)

fig.update_traces(textposition='outside')
fig.show()


In [None]:
# Display summary statistics
df.describe()

# Display unique teams
print("Unique Teams:", df['Team'].unique())

In [None]:
# Ensure 'Sold_numeric' column exists
df['Sold_numeric'] = pd.to_numeric(df['Sold'], errors='coerce')

player_type_dist = df['Type'].value_counts()
fig1 = px.pie(
    names=player_type_dist.index,
    values=player_type_dist.values,
    title="Player Type Distribution",
    hole=0.4
)
fig1.show(renderer='iframe_connected')

In [None]:
# Team Composition
team_dist = df['Team'].value_counts()

# Create a bar chart using Plotly
fig2 = px.bar(
    x=team_dist.index,
    y=team_dist.values,
    title="Team Composition",
    labels={'x': 'Teams', 'y': 'Number of Players'},
    text=team_dist.values,
    color=team_dist.index,
    color_discrete_sequence=px.colors.qualitative.Bold
)
fig2.update_traces(textposition='outside')
fig2.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig2.show()


In [None]:
# Base Price Distribution
base_price_dist = df['Base'].value_counts().sort_index()

# Create a bar chart using Plotly
fig3 = px.bar(
    x=base_price_dist.index,
    y=base_price_dist.values,
    title="Base Price Distribution",
    labels={'x': 'Base Price (₹ Lakhs)', 'y': 'Number of Players'},
    text=base_price_dist.values,
    color=base_price_dist.index,
    color_continuous_scale='Blues'
)
fig3.update_traces(textposition='outside')
fig3.update_layout(xaxis={'categoryorder':'total ascending'}, uniformtext_minsize=8, uniformtext_mode='hide')
fig3.show()

In [None]:
fig4 = px.scatter(
    df.dropna(subset=['Sold_numeric']),
    x='Base',
    y='Sold_numeric',
    color='Type',
    title="Base Price vs Sold Price",
    labels={'Base': 'Base Price', 'Sold_numeric': 'Sold Price'}
)
fig4.show(renderer='iframe_connected')

In [None]:
# Function to get three new players for each team
def get_three_new_players(team):
    team_players = df[(df['Team'] == team) & (df['Sold'] > 0)]
    return team_players['Players'].head(3).tolist()

# List of unique teams
teams = df['Team'].unique()

# Get three new players for each team
for team in teams:
    new_players = get_three_new_players(team)
    print(f"Three New Players Picked by {team}: {new_players}")

In [None]:
# Assuming 'Sold' is in lakhs INR
# Convert 'Sold' from lakhs INR to USD (1 USD = 75 INR)
conversion_rate = 75
df['Sold_USD'] = (df['Sold'] * 100000) / conversion_rate

# Sum the total money spent in USD
total_spent_dollars = df['Sold_USD'].sum()

print(f"Total money spent by all teams in dollars: ${total_spent_dollars:,.2f}")

In [None]:
# Filter Mumbai Indians players
mi_players = df[df['Team'] == 'Mumbai Indians']

# Group by 'Type' and sum 'Sold'
mi_spending_by_type = mi_players.groupby('Type')['Sold'].sum()

# Plotting
plt.figure(figsize=(8, 6))
sns.barplot(x=mi_spending_by_type.values, y=mi_spending_by_type.index, palette='Blues_d')
plt.xlabel('Money Spent (₹ Lakhs)')
plt.title('Money Spent by Mumbai Indians on Each Type of Player')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(df['Sold'], bins=30, kde=True, color='green')
plt.xlabel('Sold Price (₹ Lakhs)')
plt.title('Distribution of Player Sold Prices')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.countplot(data=df, x='Type', palette='pastel')
plt.xlabel('Player Type')
plt.ylabel('Count')
plt.title('Count of Each Type of Player')
plt.tight_layout()
plt.show()

In [None]:
team_counts = df['Team'].value_counts()

plt.figure(figsize=(8, 8))
team_counts.plot.pie(autopct='%1.1f%%', startangle=140, colors=sns.color_palette('pastel'))
plt.ylabel('')
plt.title('Percentage of Players Allocated to Each Team')
plt.tight_layout()
plt.show()

In [None]:
# Select numerical columns for correlation
numerical_cols = ['Base', 'Sold', 'Sold_USD']
corr_matrix = df[numerical_cols].corr()

plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='Base', y='Sold', hue='Type', palette='deep')
plt.xlabel('Base Price (₹ Lakhs)')
plt.ylabel('Sold Price (₹ Lakhs)')
plt.title('Base Price vs. Sold Price by Player Type')
plt.legend(title='Player Type')
plt.tight_layout()
plt.show()

In [None]:
sns.pairplot(df, vars=['Base', 'Sold', 'Sold_USD'], hue='Type', palette='Set2')
plt.suptitle('Pair Plot of Numerical Features', y=1.02)
plt.tight_layout()
plt.show()