# 📊 Analyzing the Iris Dataset with Pandas & Matplotlib
Assignment: Analyzing Data with Pandas and Visualizing Results

Dataset: Iris

Author: [Davidson Maiso]

In [None]:
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

sns.set(style="whitegrid")

In [None]:
# ----------------------------
# Task 1: Load and Explore Dataset
# ----------------------------

iris = load_iris(as_frame=True)
df = iris.frame
df["species"] = iris.target_names[iris.target]

print("✅ Dataset loaded successfully!")
print("\nFirst 5 rows of the dataset:")
print(df.head())

print("\nDataset Info:")
print(df.info())

print("\nMissing values per column:")
print(df.isnull().sum())

In [None]:
# ----------------------------
# Task 2: Basic Data Analysis
# ----------------------------

print("\nStatistical Summary:")
print(df.describe())

grouped = df.groupby("species").mean()
print("\nMean values per species:")
print(grouped)

In [None]:
# ----------------------------
# Task 3: Data Visualization
# ----------------------------

# 1. Line Chart
grouped["petal length (cm)"].plot(kind="line", marker="o")
plt.title("Average Petal Length per Species")
plt.xlabel("Species Index (0=setosa, 1=versicolor, 2=virginica)")
plt.ylabel("Average Petal Length (cm)")
plt.show()

# 2. Bar Chart
grouped["sepal width (cm)"].plot(kind="bar", color="skyblue")
plt.title("Average Sepal Width per Species")
plt.xlabel("Species")
plt.ylabel("Average Sepal Width (cm)")
plt.show()

# 3. Histogram
df["sepal length (cm)"].plot(kind="hist", bins=20, edgecolor="black")
plt.title("Distribution of Sepal Lengths")
plt.xlabel("Sepal Length (cm)")
plt.show()

# 4. Scatter Plot
plt.figure(figsize=(6,4))
sns.scatterplot(data=df, x="sepal length (cm)", y="sepal width (cm)", hue="species", palette="deep")
plt.title("Sepal Length vs Sepal Width by Species")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")
plt.legend(title="Species")
plt.show()

In [None]:
# ----------------------------
# Findings & Observations
# ----------------------------

print("\n🔎 Observations:")
print("- Setosa species tends to have the smallest petal length, while Virginica has the largest.")
print("- Versicolor is generally in the middle range for most features.")
print("- Sepal length is normally distributed around ~5.8 cm.")
print("- Scatter plot shows Setosa clearly separates from other species in sepal size.")