# Import Required Libraries
Import the necessary libraries, including pandas, seaborn, and matplotlib.

# Learner Comments

This notebook was created by GitHub Copilot Chat using the following prompt:

```
@workspace /newNotebook create a new notebook for data analysis that loads data from the 1-data/kaggle/IPL-2022.csv file and visualizes it using seaborn
```


In [None]:
# Importing the necessary libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the Dataset
Load the '1-data/kaggle/IPL-2022.csv' file using pandas.

In [None]:
# Load the Dataset
data = pd.read_csv('../1-data/kaggle/IPL-2022.csv')

# Display the first few rows of the dataset
data.head()

# Inspect the Dataset
Use pandas functions like head(), info(), and describe() to inspect the dataset.

In [None]:
# Inspect the Dataset
# Display the information of the dataset
data.info()

# Display the statistical summary of the dataset
data.describe()

# Display the number of missing values in each column
data.isnull().sum()

# Data Cleaning
Clean the dataset by handling missing values and outliers if any.

In [None]:
# Data Cleaning
# Check for missing values
missing_values = data.isnull().sum()
print(f"Missing values in each column:\n{missing_values}")

# Fill missing values with appropriate method, here we use forward fill method as an example
data.fillna(method='ffill', inplace=True)

# Check for outliers in the dataset, here we use Z-score as an example
from scipy import stats

z_scores = stats.zscore(data.select_dtypes(include=[np.number]))
abs_z_scores = np.abs(z_scores)
filtered_entries = (abs_z_scores < 3).all(axis=1)
data = data[filtered_entries]

# Display the cleaned data
data.head()

# Data Visualization with Seaborn
Visualize the dataset using seaborn library. Create plots like bar plot, box plot, scatter plot, etc.

In [None]:
import seaborn as sns
# Data Visualization with Seaborn

import matplotlib.pyplot as plt

# Create a bar plot for runs by venue
sns.countplot(x='venue', data=data)
plt.xticks(rotation=90)
plt.xlabel('Venue')
plt.ylabel('Runs')
plt.title('Runs by Venue')
plt.show()

import matplotlib.pyplot as plt

# Assuming first_ings_score and second_ings_score are already defined
plt.scatter(data['first_ings_score'], data['second_ings_score'])
plt.xlabel('First Innings Score')
plt.ylabel('Second Innings Score')
plt.title('Scatter Plot: First Innings Score vs Second Innings Score')
plt.show()

# Pair Plot
# Let's create a pair plot for the entire dataset to visualize all pairs of numerical columns
sns.pairplot(data)
plt.show()

# Correlation Heatmap
# Let's create a heatmap for the correlation matrix of the dataset
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.show()

In [None]:
# EDA Inspiration from Kaggle

Learn from [IPL 2022 Analysis](https://www.kaggle.com/code/hashimali179/ipl-2022-analysis/notebook) and explore customizations and extensions for building intuition

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
data = pd.read_csv('../1-data/kaggle/IPL-2022.csv')
print(data.tail())

# Plot number of matches won
figure = px.bar(data, x=data["match_winner"],
            title="Number of Matches Won in IPL 2022")
figure.show()

# Plot top scorers in color
figure = px.bar(data, x=data["top_scorer"], 
                y = data["highscore"], 
                color = data["highscore"],
            title="Top Scorers in IPL 2022")
figure.show()

# Plot Team Performance
figure = px.bar(data, x=data["match_winner"], color = "venue",
            title="Teams Performance at diffrent Venues in IPL 2022")
figure.show()

# Plot Pie Chart
venue = data["venue"].value_counts()
label = venue.index
counts = venue.values
colors = ['skyblue','yellow']

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Number of Matches at each Stadium')
fig.update_traces(hoverinfo='label+percent', 
                  textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=2)))
fig.show()