# Analyzing Data with Pandas and Visualizing Results with Matplotlib
## Assignment Solution

In [None]:
## Analyzing Data with Pandas and Visualizing Results with Matplotlib

# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Error Handling while Loading Dataset
try:
    # Load Iris dataset
    iris = load_iris()
    data = pd.DataFrame(data=iris.data, columns=iris.feature_names)
    data['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
    print("Dataset loaded successfully!\n")
except Exception as e:
    print(f"An error occurred while loading the dataset: {e}")

# Display the first few rows
data.head()

# Explore the dataset
print("\nData Types:\n", data.dtypes)
print("\nMissing Values:\n", data.isnull().sum())

# Basic Data Cleaning (if needed)
# (Iris dataset is clean, but showing how to handle missing values)
data = data.dropna()  # or you could use .fillna()

# Basic Data Analysis
print("\nBasic Statistical Summary:\n", data.describe())

# Grouping by species and calculating the mean
grouped = data.groupby('species').mean()
print("\nMean values grouped by species:\n", grouped)

# Findings
print("\nObservations:")
print("- Setosa species tends to have smaller petal measurements compared to others.")
print("- Virginica species generally has the largest measurements.")

# Visualization Setup
sns.set(style="whitegrid")

# 1. Line Chart (Trend over Index - just for demonstration)
plt.figure(figsize=(10, 6))
for species in data['species'].unique():
    subset = data[data['species'] == species]
    plt.plot(subset.index, subset['sepal length (cm)'], label=species)
plt.title('Sepal Length Trend per Species')
plt.xlabel('Index')
plt.ylabel('Sepal Length (cm)')
plt.legend()
plt.show()

# 2. Bar Chart (Average petal length per species)
plt.figure(figsize=(8,6))
ax = sns.barplot(x='species', y='petal length (cm)', data=data, ci=None)
ax.set_title('Average Petal Length per Species')
ax.set_xlabel('Species')
ax.set_ylabel('Petal Length (cm)')
plt.show()

# 3. Histogram (Distribution of Sepal Width)
plt.figure(figsize=(8,6))
sns.histplot(data['sepal width (cm)'], bins=20, kde=True)
plt.title('Distribution of Sepal Width')
plt.xlabel('Sepal Width (cm)')
plt.ylabel('Count')
plt.show()

# 4. Scatter Plot (Sepal length vs Petal length)
plt.figure(figsize=(8,6))
sns.scatterplot(x='sepal length (cm)', y='petal length (cm)', hue='species', data=data)
plt.title('Sepal Length vs Petal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Petal Length (cm)')
plt.legend()
plt.show()


## Task 2: Basic Data Analysis

## Task 3: Data Visualization

## Final Observations