In [None]:
# Check data types and missing values
df.info()

# Check for missing values
df.isnull().sum()


In [None]:
2. Second cell: Import Libraries

# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

In [None]:
# Load the Iris dataset
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['species'] = iris.target

# Display first few rows of the dataset
df.head()

# Explore the structure of the dataset
df.info()

# Check for missing values
df.isnull().sum()



In [None]:
# If there are missing values, we can drop them (or fill them if required)
df = df.dropna()  # For now, we drop rows with missing values

# Check if any rows were dropped and the dataset shape after cleaning
df.shape


In [None]:
# Basic statistics for numerical columns
df.describe()

# Group by species and compute the mean of numerical columns for each group
df.groupby('species').mean()




In [None]:
# Line chart showing average Sepal Length for each species
df.groupby('species')['sepal length (cm)'].mean().plot(kind='line')
plt.title('Average Sepal Length for Each Species')
plt.xlabel('Species')
plt.ylabel('Average Sepal Length (cm)')
plt.show()


In [None]:

# Bar chart showing the average Petal Length per species
df.groupby('species')['petal length (cm)'].mean().plot(kind='bar')
plt.title('Average Petal Length per Species')
plt.xlabel('Species')
plt.ylabel('Average Petal Length (cm)')
plt.show()



In [None]:
# Histogram showing the distribution of Sepal Width
df['sepal width (cm)'].plot(kind='hist', bins=10, edgecolor='black')
plt.title('Distribution of Sepal Width')
plt.xlabel('Sepal Width (cm)')
plt.ylabel('Frequency')
plt.show()


In [None]:
# Scatter plot showing the relationship between Sepal Length and Petal Length
plt.scatter(df['sepal length (cm)'], df['petal length (cm)'], c=df['species'], cmap='viridis')
plt.title('Sepal Length vs Petal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Petal Length (cm)')
plt.colorbar(label='Species')
plt.show()

In [None]:
### Observations and Findings:
- The average petal length of **Setosa** is significantly smaller than that of the other species.
- The scatter plot shows a positive correlation between sepal length and petal length, especially for the **Virginica** species.
- The distribution of **sepal width** is somewhat uniform, with most values clustered around the 3-4 cm range.

In [None]:
# Example of error handling when loading a dataset
try:
    df = pd.read_csv('non_existent_file.csv')
except FileNotFoundError:
    print("The file was not found. Please check the file path.")
except Exception as e:
    print(f"An error occurred: {e}")
