# Analyzing Data with Pandas and Visualizing Results with Matplotlib
## Assignment Solution

In [None]:

# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:

# Error handling while loading the dataset
try:
    from sklearn.datasets import load_iris
    iris = load_iris()
    # Create a DataFrame
    data = pd.DataFrame(iris.data, columns=iris.feature_names)
    data['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
    print("Dataset loaded successfully!")
except Exception as e:
    print(f"An error occurred while loading the dataset: {e}")


## Task 1: Load and Explore the Dataset

In [None]:

# Display the first 5 rows
print("\nFirst five rows of the dataset:")
print(data.head())

# Check data types and missing values
print("\nDataset Information:")
print(data.info())

print("\nMissing Values in Dataset:")
print(data.isnull().sum())

# No missing values found, but if there were, you could fill or drop them like this:
# data.fillna(method='ffill', inplace=True)


## Task 2: Basic Data Analysis

In [None]:

# Basic statistics
print("\nBasic Statistical Description:")
print(data.describe())

# Grouping: mean petal length by species
grouped = data.groupby('species').mean()
print("\nMean Values Grouped by Species:")
print(grouped[['petal length (cm)']])

# Observations
print("\nObservations:")
print("- Virginica species tends to have larger petal and sepal measurements.")
print("- Setosa species has the smallest petal lengths.")


## Task 3: Data Visualization

In [None]:

# Set seaborn style for better visuals
sns.set(style="whitegrid")

# Line Chart: Example trend (using index as time for illustration)
plt.figure(figsize=(8,5))
plt.plot(data.index, data['sepal length (cm)'], label='Sepal Length', color='blue')
plt.title('Sepal Length Trend Across Dataset')
plt.xlabel('Index')
plt.ylabel('Sepal Length (cm)')
plt.legend()
plt.show()

# Bar Chart: Average petal length by species
plt.figure(figsize=(8,5))
sns.barplot(x='species', y='petal length (cm)', data=data, palette='pastel')
plt.title('Average Petal Length per Species')
plt.xlabel('Species')
plt.ylabel('Petal Length (cm)')
plt.show()

# Histogram: Distribution of Sepal Length
plt.figure(figsize=(8,5))
plt.hist(data['sepal length (cm)'], bins=20, color='green', edgecolor='black')
plt.title('Distribution of Sepal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Frequency')
plt.show()

# Scatter Plot: Sepal Length vs Petal Length
plt.figure(figsize=(8,5))
sns.scatterplot(x='sepal length (cm)', y='petal length (cm)', hue='species', data=data, palette='deep')
plt.title('Sepal Length vs Petal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Petal Length (cm)')
plt.legend()
plt.show()


## Final Observations

In [None]:

print("\nFinal Observations:")
print("- The Setosa species has distinctly smaller petal lengths compared to Versicolor and Virginica.")
print("- There is a positive correlation between Sepal Length and Petal Length, especially for Virginica.")
print("- The distribution of Sepal Length is slightly skewed to the right.")
