In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Task 1: Load and Explore the Dataset
def load_and_explore():
    # Load the Iris dataset
    try:
        df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')
        print("Dataset loaded successfully.\n")
    except Exception as e:
        print(f"Error loading dataset: {e}")
        return None
    
    # Display first few rows
    print("First 5 rows of the dataset:")
    print(df.head())
    print("\n")
    
    # Explore structure
    print("Dataset info:")
    print(df.info())
    print("\n")
    
    # Check for missing values
    print("Missing values summary:")
    print(df.isnull().sum())
    print("\n")
    
    # Clean data (though Iris dataset typically has no missing values)
    df_clean = df.dropna()  # In case there are any missing values
    
    return df_clean

# Task 2: Basic Data Analysis
def basic_analysis(df):
    # Basic statistics
    print("Basic statistics of numerical columns:")
    print(df.describe())
    print("\n")
    
    # Group by species and compute mean
    print("Mean measurements by species:")
    species_means = df.groupby('species').mean()
    print(species_means)
    print("\n")
    
    # Interesting findings
    print("Interesting findings:")
    print("- Setosa has significantly smaller petal measurements than other species")
    print("- Virginica has the largest mean measurements for all features except sepal width")
    print("- Versicolor is intermediate in most measurements")
    print("\n")
    
    return species_means

# Task 3: Data Visualization
def create_visualizations(df, species_means):
    plt.figure(figsize=(15, 10))
    
    # 1. Line chart (using index as pseudo-time for demonstration)
    plt.subplot(2, 2, 1)
    df['sepal_length'].plot(kind='line', title='Sepal Length Trend (by index)')
    plt.xlabel('Index')
    plt.ylabel('Sepal Length (cm)')
    
    # 2. Bar chart
    plt.subplot(2, 2, 2)
    species_means['petal_length'].plot(kind='bar', color=['red', 'green', 'blue'])
    plt.title('Average Petal Length by Species')
    plt.ylabel('Length (cm)')
    plt.xticks(rotation=0)
    
    # 3. Histogram
    plt.subplot(2, 2, 3)
    df['sepal_width'].hist(bins=15, color='purple')
    plt.title('Distribution of Sepal Width')
    plt.xlabel('Width (cm)')
    plt.ylabel('Frequency')
    
    # 4. Scatter plot
    plt.subplot(2, 2, 4)
    colors = {'setosa':'red', 'versicolor':'green', 'virginica':'blue'}
    plt.scatter(df['sepal_length'], df['petal_length'], 
                c=df['species'].map(colors), alpha=0.7)
    plt.title('Sepal Length vs Petal Length')
    plt.xlabel('Sepal Length (cm)')
    plt.ylabel('Petal Length (cm)')
    # Create legend
    handles = [plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=v, label=k) 
               for k, v in colors.items()]
    plt.legend(handles=handles, title='Species')
    
    plt.tight_layout()
    plt.savefig('iris_visualizations.png')
    plt.show()

# Main execution
if __name__ == "__main__":
    print("=== Task 1: Load and Explore the Dataset ===")
    iris_df = load_and_explore()
    
    if iris_df is not None:
        print("\n=== Task 2: Basic Data Analysis ===")
        species_means = basic_analysis(iris_df)
        
        print("\n=== Task 3: Data Visualization ===")
        create_visualizations(iris_df, species_means)
        print("Visualizations saved as 'iris_visualizations.png'")