In [12]:
#Step 1
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

data = pd.read_csv("iris.csv")
data.head()

In [13]:
# Step 2: Data Overview
# Display basic information about the dataset
print("Step 2: Data Overview")
print(data.info())

In [14]:
# Step 3: Summary Statistics
# Display summary statistics (mean, median, min, max, etc.) for numerical columns
print("\nStep 3: Summary Statistics")
print(data.describe())

In [15]:
# Step 4: Missing Values
# Check for missing values and visualize them if any
print("\nStep 4: Missing Values")
print(data.isnull().sum())

In [16]:
# Step 5: Data Distribution
# Visualize the distribution of numerical features
print("\nStep 5: Data Distribution")

# Get the list of numerical columns
numerical_columns = data.select_dtypes(include=['int', 'float']).columns
num_numerical_columns = len(numerical_columns)

# Determine the subplot layout based on the number of numerical columns
num_rows = (num_numerical_columns - 1) // 3 + 1
num_cols = min(num_numerical_columns, 3)

# Create subplots dynamically
plt.figure(figsize=(15, 5 * num_rows))
for i, column in enumerate(numerical_columns):
    plt.subplot(num_rows, num_cols, i + 1)
    sns.histplot(data[column], kde=True)
    plt.title(f'Distribution of {column}')
plt.tight_layout()
plt.show()

In [17]:
# Step 6: Correlation Analysis
# Calculate and visualize correlations between numerical features
print("\nStep 6: Correlation Analysis")
correlation_matrix = data.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()

In [18]:
# Step 7: Outliers
# Detect and visualize outliers using box plots
print("\nStep 7: Outliers")

# Get the list of numerical columns
numerical_columns = data.select_dtypes(include=['int', 'float']).columns
num_numerical_columns = len(numerical_columns)

# Determine the subplot layout based on the number of numerical columns
num_rows = (num_numerical_columns - 1) // 3 + 1
num_cols = min(num_numerical_columns, 3)

# Create subplots dynamically
plt.figure(figsize=(15, 5 * num_rows))
for i, column in enumerate(numerical_columns):
    plt.subplot(num_rows, num_cols, i + 1)
    sns.boxplot(data=data, x=column)
    plt.title(f'Boxplot of {column}')
plt.tight_layout()
plt.show()

In [19]:
# Step 8: Pairwise Relationships
# Visualize pairwise relationships between numerical features
print("\nStep 8: Pairwise Relationships")
sns.pairplot(data=data, hue='variety', diag_kind='kde')
plt.suptitle("Pairwise Relationships")
plt.show()

In [20]:
# Step 9: Target Counts
# Count the number of occurrences of 0 and 1 in the "outcome" column
print("\nStep 9: Pairwise Relationships")
outcome_counts = data['variety'].value_counts()

# Create a bar plot
plt.figure(figsize=(8, 6))
outcome_counts.plot(kind='bar', color=['blue', 'green'])
plt.title('Count of 0 and 1 in Outcome')
plt.xlabel('Outcome')
plt.ylabel('Count')
plt.xticks(rotation=0)  # Remove rotation of x-axis labels
plt.show()