In [None]:
# Assignment: Analyzing Data with Pandas and Visualizing Results with Matplotlib
#  Task 1: Load and Explore the Dataset

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Load dataset
try:
    iris = load_iris(as_frame=True)
    df = iris.frame
    print(" Dataset loaded successfully.")
except FileNotFoundError:
    print(" File not found. Please check the file path.")

# Display first rows
print("\n First 5 rows of dataset:")
print(df.head())

# Check structure
print("\n Dataset Info:")
print(df.info())

print("\n Missing values per column:")
print(df.isnull().sum())

# Clean missing values if any (here none)
df = df.dropna()

#  Task 2: Basic Data Analysis

# Basic statistics
print("\n Statistical Summary:")
print(df.describe())

# Grouping: Mean values per species
grouped = df.groupby("target").mean()
print("\n Mean of numerical features per species:")
print(grouped)

# Interesting findings
print("\n Observations:")
print("- Species 0 (Setosa) has smallest sepal & petal sizes.")
print("- Species 2 (Virginica) has the largest overall measurements.")

#  Task 3: Data Visualization

# 1. Line Chart: Sepal length trend across samples
plt.figure(figsize=(8,5))
plt.plot(df.index, df["sepal length (cm)"], label="Sepal Length")
plt.plot(df.index, df["petal length (cm)"], label="Petal Length")
plt.title("Line Chart - Sepal vs Petal Length Trend")
plt.xlabel("Sample Index")
plt.ylabel("Length (cm)")
plt.legend()
plt.show()

# 2. Bar Chart: Average petal length per species
plt.figure(figsize=(7,5))
sns.barplot(x="target", y="petal length (cm)", data=df, estimator="mean", palette="viridis")
plt.title("Bar Chart - Average Petal Length per Species")
plt.xlabel("Species")
plt.ylabel("Avg Petal Length (cm)")
plt.show()

# 3. Histogram: Distribution of sepal width
plt.figure(figsize=(7,5))
plt.hist(df["sepal width (cm)"], bins=15, color="skyblue", edgecolor="black")
plt.title("Histogram - Distribution of Sepal Width")
plt.xlabel("Sepal Width (cm)")
plt.ylabel("Frequency")
plt.show()

# 4. Scatter Plot: Sepal length vs Petal length
plt.figure(figsize=(7,5))
sns.scatterplot(x="sepal length (cm)", y="petal length (cm)", hue="target", data=df, palette="Set2")
plt.title("Scatter Plot - Sepal vs Petal Length")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Petal Length (cm)")
plt.legend(title="Species")
plt.show()

#  Findings & Observations
print("\n Findings:")
print("1. Line chart shows that petal length increases more dramatically across samples compared to sepal length.")
print("2. Bar chart confirms Virginica has the largest petal length on average.")
print("3. Histogram indicates sepal width is mostly between 2.5 - 3.5 cm.")
print("4. Scatter plot shows clear clustering, making Iris dataset suitable for classification.")
