
# Task 1 — Exploring & Visualizing the Iris Dataset

**Objective:** Read, summarize, and visualize a simple dataset (Iris).


In [None]:

import pandas as pd, numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
pd.set_option('display.max_columns', None)


In [None]:

iris_bunch = load_iris(as_frame=True)
df = iris_bunch.frame.copy()
df.columns = ["sepal_length","sepal_width","petal_length","petal_width","species"]
print("Shape:", df.shape)
display(df.head())
display(df.describe())


In [None]:

# Scatter: petal length vs width by species
fig = plt.figure()
for sp in df['species'].unique():
    d = df[df['species']==sp]
    plt.scatter(d['petal_length'], d['petal_width'], label=sp, alpha=0.8)
plt.xlabel("Petal length (cm)"); plt.ylabel("Petal width (cm)")
plt.title("Petal length vs width by species"); plt.legend(); plt.show()


In [None]:

# Histogram: Sepal length
fig = plt.figure()
plt.hist(df['sepal_length'], bins=20, alpha=0.9)
plt.xlabel("Sepal length (cm)"); plt.ylabel("Count")
plt.title("Distribution of Sepal Length"); plt.show()


In [None]:

# Box plots: numeric features
fig = plt.figure()
df[["sepal_length","sepal_width","petal_length","petal_width"]].plot(kind='box', grid=True)
plt.title("Box plots of numeric features"); plt.show()


In [None]:

df.corr(numeric_only=True)



# Conclusion

- **Goal met:** Loaded Iris, summarized structure, and visualized relationships.  
- **Key patterns:** Petal length/width clearly separate classes; sepal features overlap more.  
- **Distribution:** Features are well-behaved with tight ranges; no extreme outliers.  
- **Next steps:** Try pairplots, class-wise boxplots, and a quick Logistic Regression baseline to quantify separability.
