In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
import plotly.io as pio
pio.renderers.default = 'kaggle'

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv("/kaggle/input/ipl-2025-mega-auction-dataset/ipl_2025_auction_players.csv")

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df.isnull().sum()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df["Sold"].unique()

In [None]:
df['Sold'] = df['Sold'].replace(['Unsold', 'TBA'], '0')
df['Sold'] = pd.to_numeric(df['Sold'], errors='coerce')

In [None]:
df.dtypes

In [None]:
df["Base"].unique()

In [None]:
df['Base'] = df['Base'].replace('-', '0')
df['Base'] = pd.to_numeric(df['Base'], errors='coerce')

In [None]:
df.dtypes

In [None]:
df["Team"].unique()

In [None]:
df["Team"].value_counts()

In [None]:
df[['Base', 'Sold']].describe()

### ***1. Distribution of Sold Prices***

In [None]:
sold_prices = df['Sold'].dropna()
fig1 = px.histogram(sold_prices, 
                   x='Sold', 
                   nbins=30, 
                   histnorm='probability density', 
                   title='Distribution of Sold Prices',
                   labels={'Sold': 'Sold Price (in Crores)'},
                   color_discrete_sequence=['royalblue'])
fig1.update_traces(marker=dict(line=dict(width=1, color='black')))
fig1.update_layout(xaxis_title='Sold Price (in Crores)',
                  yaxis_title='Frequency',
                  bargap=0.2)

fig1.show(render='iframe')

### ***2. Top 10 Most Expensive Players:***

In [None]:
top_expensive_players = df.sort_values(by='Sold', ascending=False).head(10)
fig2 = px.bar(top_expensive_players, x='Sold', y='Players', 
              title='Top 10 Most Expensive Players', 
              labels={'Sold': 'Sold Price (in Crores)', 'Players': 'Players'},
              color='Sold', color_continuous_scale='rainbow')

# Show values on bars
for index, value in enumerate(top_expensive_players['Sold']):
    fig2.add_annotation(x=value + 0.2, y=index, text=f"{value:.2f} Cr", showarrow=False, font=dict(size=12))

fig2.update_layout(xaxis_title='Sold Price (in Crores)', yaxis_title='Players')
fig2.show(render='iframe')

### ***3. Base Price vs Sold Price:***

In [None]:
fig3 = px.box(df, x='Base', y='Sold', title='Base Price vs Sold Price', 
              labels={'Base': 'Base Price (in Crores)', 'Sold': 'Sold Price (in Crores)'}, 
              color='Base')

fig3.update_layout(xaxis_title='Base Price (in Crores)', yaxis_title='Sold Price (in Crores)')
fig3.update_traces(boxmean='sd')  # Optional: to show mean and standard deviation
fig3.show(render='iframe')

### ***4. Unsold Players:***

In [None]:
unsold_players = df[df['Sold'] == 0]
print("Number of unsold players:", len(unsold_players))
# Unsold players per team
unsold_counts = unsold_players['Type'].value_counts()
fig4 = px.bar(x=unsold_counts.index, y=unsold_counts.values, 
              title="Unsold Players", 
              labels={'x': 'Type', 'y': 'Number of Unsold Players'},
              color=unsold_counts.index)


fig4.update_layout(xaxis_title="Type", yaxis_title="Number of Unsold Players")
fig4.show(render='iframe')

### ***5. Sold Prices by Team***

In [None]:
fig5 = px.box(df, x='Team', y='Sold', title='Sold Prices by Team')

fig5.update_layout(xaxis_title='Team', yaxis_title='Sold Price', xaxis_tickangle=-45)
fig5.show(render='iframe')

### ***6. Average Sold Price by Player Type:***

In [None]:
avg_sold_by_type = df.groupby('Type')['Sold'].mean().sort_values(ascending=False)
print("Average Sold Price by Player Type:\n", avg_sold_by_type)

In [None]:
avg_sold_by_type = df.groupby('Type')['Sold'].mean().sort_values(ascending=False)
fig6 = px.bar(x=avg_sold_by_type.index, y=avg_sold_by_type.values, 
              title="Average Sold Price by Player Type", 
              labels={'x': 'Player Type', 'y': 'Average Sold Price (in Crores)'}, 
              color=avg_sold_by_type.values, color_continuous_scale='Blues')


fig6.update_layout(xaxis_title="Player Type", yaxis_title="Average Sold Price (in Crores)")
fig6.show(render='iframe')

### ***7. Total Spending by Team:***

In [None]:
total_spending_by_team = df.groupby('Team')['Sold'].sum().sort_values(ascending=False)
print("Total Spending by Team:\n", total_spending_by_team)

### ***8.Player Type Analysis***

In [None]:
fig8 = px.histogram(df, x='Type', title='Count of Players by Type', 
                   labels={'Type': 'Player Type'}, 
                   category_orders={'Type': df['Type'].value_counts().index})

fig8.update_layout(xaxis_title='Player Type', yaxis_title='Count', xaxis_tickangle=-45)
fig8.show(render='iframe')

### ***9. Sold vs Unsold Players by Type***

In [None]:
# a new column to indicate sold status
df['Sold_Status'] = df['Sold'].apply(lambda x: 'Sold' if x not in [0, np.nan] else 'Unsold')

fig9 = px.histogram(df, x='Type', color='Sold_Status', 
                   title='Count of Sold vs Unsold Players by Type', 
                   labels={'Type': 'Player Type', 'Sold_Status': 'Sold Status'},
                   barmode='group')

fig9.update_layout(xaxis_title='Player Type', yaxis_title='Count', xaxis_tickangle=-45)
fig9.show(render='iframe')

In [None]:
new_df = df[df['Sold'] > 0]
new_df

### ***10.Player Type Composition by Team***

In [None]:
fig10 = px.histogram(new_df, x='Team', color='Type', 
                   title='Player Type Composition by Team', 
                   labels={'Team': 'Team', 'Type': 'Player Type'},
                   barmode='stack')

fig10.update_layout(xaxis_title='Team', yaxis_title='Count of Players', xaxis_tickangle=-45)
fig10.show(render='iframe')