---
---

# WELCOME TO PYTHON COURSE (27 .09)



---
---

# Results STARTUP (-> 10 Uhr)

---
---

In [11]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os

# Setting random seed for reproducibility
np.random.seed(42)

# **Employee Data** with anomalies and duplicates
employee_data = pd.DataFrame({
    'EmployeeID': range(1, 101),
    'Name': [f'Employee{i}' for i in range(1, 101)],
    'Department': np.random.choice(['R&D', 'Engineering', 'Sales', 'HR', 'Finance'], 100),
    'Salary': np.random.randint(50000, 150000, 100),
    'Performance': np.random.uniform(2, 5, 100),
    'YearsOfExperience': np.random.randint(0, 20, 100),
    'SkillLevel': np.random.randint(1, 10, 100)
})

# Introduce anomalies
employee_data.loc[5, 'Salary'] = 9999999  # Extremely high salary
employee_data.loc[10, 'Performance'] = -5  # Negative performance value

# Add duplicates
employee_data = pd.concat([employee_data, employee_data.iloc[2:4]], ignore_index=True)

# Add missing values
employee_data.loc[8, 'Department'] = np.nan
employee_data.loc[20, 'SkillLevel'] = np.nan

# **Project Data** with anomalies and duplicates
project_data = pd.DataFrame({
    'ProjectID': range(1, 51),
    'ProjectName': [f'Project{i}' for i in range(1, 51)],
    'StartDate': pd.date_range(start='2018-01-01', periods=50),
    'Duration': np.random.randint(30, 365, 50),
    'Budget': np.random.randint(100000, 1000000, 50),
    'ActualCost': np.random.randint(80000, 1200000, 50),
    'Revenue': np.random.randint(150000, 2000000, 50),
    'ProjectManager': np.random.choice(employee_data['Name'], 50)
})

# Introduce anomalies
project_data.loc[3, 'Duration'] = -50  # Negative duration
project_data.loc[7, 'Budget'] = 99999999  # Unreasonably high budget

# Add duplicates
project_data = pd.concat([project_data, project_data.iloc[5:7]], ignore_index=True)

# Add missing values
project_data.loc[15, 'ProjectManager'] = np.nan

# **Financial Data** with anomalies and duplicates
start_date = datetime(2018, 1, 1)
dates = [start_date + timedelta(days=30 * i) for i in range(60)]
financial_data = pd.DataFrame({
    'Date': dates,
    'Revenue': np.random.randint(1000000, 5000000, 60),
    'Expenses': np.random.randint(800000, 4000000, 60),
    'Profit': np.random.randint(100000, 1000000, 60)
})

# Introduce anomalies
financial_data.loc[10, 'Revenue'] = -1000000  # Negative revenue
financial_data.loc[20, 'Expenses'] = 9999999  # Extremely high expense

# Add duplicates
financial_data = pd.concat([financial_data, financial_data.iloc[0:2]], ignore_index=True)

# Add missing values
financial_data.loc[5, 'Profit'] = np.nan

# **Customer Data** with anomalies and duplicates
customer_data = pd.DataFrame({
    'CustomerID': range(1, 201),
    'CustomerName': [f'Customer{i}' for i in range(1, 201)],
    'Satisfaction': np.random.uniform(2, 5, 200),
    'ContractValue': np.random.randint(10000, 1000000, 200),
    'ContractDuration': np.random.randint(6, 60, 200)
})

# Introduce anomalies
customer_data.loc[3, 'Satisfaction'] = 10  # Satisfaction beyond the valid range
customer_data.loc[50, 'ContractValue'] = -5000  # Negative contract value

# Add duplicates
customer_data = pd.concat([customer_data, customer_data.iloc[5:8]], ignore_index=True)

# Add missing values
customer_data.loc[12, 'ContractDuration'] = np.nan

# **Market Data** with anomalies and duplicates
market_data = pd.DataFrame({
    'Quarter': pd.date_range(start='2018-01-01', periods=20, freq='QE'),
    'MarketSize': np.random.randint(10000000, 50000000, 20),
    'MarketShare': np.random.uniform(0.05, 0.3, 20),
    'CompetitorShare': np.random.uniform(0.4, 0.7, 20)
})

# Introduce anomalies
market_data.loc[6, 'MarketShare'] = 1.5  # Market share exceeding 100%
market_data.loc[12, 'CompetitorShare'] = -0.2  # Negative competitor share

# Add duplicates
market_data = pd.concat([market_data, market_data.iloc[2:4]], ignore_index=True)

# Add missing values
market_data.loc[7, 'MarketSize'] = np.nan


# For this task, we'll need to create some dummy supply chain data
suppliers = ['Supplier A', 'Supplier B', 'Supplier C', 'Supplier D', 'Supplier E']
delivery = pd.DataFrame({
'Supplier': suppliers,
'AverageDeliveryTime': np.random.uniform(1, 100, 5),
'DeliveryVariance': np.random.uniform(0, 2, 5)
})

# Ensure folder structure for each analysis task
def create_folder(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

create_folder('data')

# Save to CSV
employee_data.to_csv('data/employee_data_with_anomalies.csv', index=False)
project_data.to_csv('data/project_data_with_anomalies.csv', index=False)
financial_data.to_csv('data/financial_data_with_anomalies.csv', index=False)
customer_data.to_csv('data/customer_data_with_anomalies.csv', index=False)
market_data.to_csv('data/market_data_with_anomalies.csv', index=False)
delivery.to_csv('data/delivery_data_with_anomalies.csv', index=False)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
import matplotlib.image as mpimg

# Ensure folder structure for each analysis task
def create_folder(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

# Load the datasets
employee_data = pd.read_csv('data/employee_data_with_anomalies.csv')
project_data = pd.read_csv('data/project_data_with_anomalies.csv')
financial_data = pd.read_csv('data/financial_data_with_anomalies.csv')
customer_data = pd.read_csv('data/customer_data_with_anomalies.csv')
market_data = pd.read_csv('data/market_data_with_anomalies.csv')
delivery_data = pd.read_csv('data/delivery_data_with_anomalies.csv')

employee_data.columns = employee_data.columns.str.strip() 
project_data.columns = project_data.columns.str.strip() 
financial_data.columns = financial_data.columns.str.strip() 
customer_data.columns = customer_data.columns.str.strip() 
market_data.columns = market_data.columns.str.strip() 
delivery_data.columns = delivery_data.columns.str.strip() 

# Data Cleaning and Preparation
def clean_data(df):
    # Remove duplicates
    df = df.drop_duplicates().copy()  # Ensure you're working with a new DataFrame copy
    
    # Ensure numeric columns are cleaned
    for column in df.select_dtypes(include=[np.number]).columns:
        df.loc[:, column] = df[column].fillna(df[column].mean())  # Use .loc for setting values
        
        # Clear negative values
        df.loc[:, column] = df[column].clip(lower=0)  # Use .loc for setting values
    
    # Remove outliers (using IQR method)
    for column in df.select_dtypes(include=[np.number]).columns:
        Q1 = df[column].quantile(0.25)
        Q3 = df[column].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)].copy()  # Use .copy() to prevent chained indexing warnings
    
    return df

employee_data = clean_data(employee_data)
project_data = clean_data(project_data)
financial_data = clean_data(financial_data)
customer_data = clean_data(customer_data)
market_data = clean_data(market_data)
delivery_data = clean_data(delivery_data)

# Task 1: Employee Performance and Retention Analysis
def employee_analysis():
    
    create_folder('plots')
    plt.figure(figsize=(10, 6))
    plt.scatter(employee_data['Salary'], employee_data['Performance'])
    plt.xlabel('Salary')
    plt.ylabel('Performance')
    plt.title('Employee Performance vs Salary')
    plt.savefig('plots/employee_performance_vs_salary.png')
    plt.close()

    correlation_matrix = employee_data[['Salary', 'Performance', 'YearsOfExperience', 'SkillLevel']].corr()
    plt.figure(figsize=(10, 8))
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
    plt.title('Correlation Heatmap of Employee Metrics')
    plt.savefig('plots/employee_correlation_heatmap.png')
    plt.close()
    

# Task 2: Project Profitability and Timeline Analysis
def project_analysis():
    
    project_data['EndDate'] = pd.to_datetime(project_data['StartDate']) + pd.to_timedelta(project_data['Duration'], unit='D')
    project_data['Profitability'] = project_data['Revenue'] - project_data['ActualCost']

    plt.figure(figsize=(12, 6))
    plt.bar(project_data['ProjectName'], project_data['Duration'])
    plt.xticks(rotation=90)
    plt.xlabel('Project Name')
    plt.ylabel('Duration (days)')
    plt.title('Project Durations')
    plt.tight_layout()
    plt.savefig('plots/project_durations.png')
    plt.close()

    plt.figure(figsize=(12, 6))
    sns.scatterplot(data=project_data, x='ActualCost', y='Revenue', hue='Profitability', size='Duration', sizes=(20, 200))
    plt.title('Project Cost vs Revenue')
    plt.savefig('plots/project_cost_vs_revenue.png')
    plt.close()

# Task 3: Financial Trend Analysis
def financial_analysis():
    
    financial_data['Date'] = pd.to_datetime(financial_data['Date'])
    financial_data.set_index('Date', inplace=True)

    plt.figure(figsize=(12, 6))
    plt.plot(financial_data.index, financial_data['Revenue'], label='Revenue')
    plt.plot(financial_data.index, financial_data['Expenses'], label='Expenses')
    plt.plot(financial_data.index, financial_data['Profit'], label='Profit')
    plt.xlabel('Date')
    plt.ylabel('Amount')
    plt.title('Financial Trends Over Time')
    plt.legend()
    plt.tight_layout()
    plt.savefig('plots/financial_trends.png')
    plt.close()

# Task 4: Customer Satisfaction and Revenue Impact
def customer_analysis():
    
    plt.figure(figsize=(10, 6))
    plt.hist(customer_data['Satisfaction'], bins=20)
    plt.xlabel('Satisfaction Score')
    plt.ylabel('Count')
    plt.title('Distribution of Customer Satisfaction Scores')
    plt.savefig('plots/customer_satisfaction_distribution.png')
    plt.close()

    sns.pairplot(customer_data[['Satisfaction', 'ContractValue', 'ContractDuration']])
    plt.suptitle('Pair Plot of Customer Metrics', y=1.02)
    plt.tight_layout()
    plt.savefig('plots/customer_metrics_pairplot.png')
    plt.close()

# Task 5: Market Share and Competitive Analysis
def market_analysis():
    
    latest_quarter = market_data['Quarter'].max()
    latest_data = market_data[market_data['Quarter'] == latest_quarter]

    plt.figure(figsize=(10, 6))
    plt.pie([latest_data['MarketShare'].iloc[0], latest_data['CompetitorShare'].iloc[0], 
             1 - latest_data['MarketShare'].iloc[0] - latest_data['CompetitorShare'].iloc[0]], 
            labels=['Company', 'Competitors', 'Others'], autopct='%1.1f%%')
    plt.title(f'Market Share Distribution (as of {latest_quarter})')
    plt.savefig('plots/market_share_pie_chart.png')
    plt.close()

# Task 6: Resource Utilization and Optimization
def resource_analysis():
    
    # For this task, we'll need to create some dummy resource allocation data
    resources = ['R&D', 'Engineering', 'Sales', 'HR', 'Finance']
    projects = project_data['ProjectName'].unique()[:5]  # Take first 5 projects for simplicity
    resource_allocation = pd.DataFrame(np.random.randint(1, 10, size=(len(resources), len(projects))), 
                                       index=resources, columns=projects)

    plt.figure(figsize=(12, 6))
    resource_allocation.plot(kind='bar', stacked=True)
    plt.title('Resource Allocation Across Projects')
    plt.xlabel('Resources')
    plt.ylabel('Allocation')
    plt.legend(title='Projects', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.savefig('plots/resource_allocation.png')
    plt.close()

# Task 7: Technology Innovation Impact Analysis
def innovation_analysis():
    # For this task, we'll need to create some dummy technology impact data
    tech_impact = pd.DataFrame({
        'Technology': ['Tech A', 'Tech B', 'Tech C', 'Tech D', 'Tech E'],
        'Impact': np.random.uniform(0, 1, 5),
        'Adoption': np.random.uniform(0, 1, 5),
        'Cost': np.random.uniform(100000, 1000000, 5)
    })

    plt.figure(figsize=(10, 6))
    plt.scatter(tech_impact['Adoption'], tech_impact['Impact'], s=tech_impact['Cost']/10000, alpha=0.5)
    for i, txt in enumerate(tech_impact['Technology']):
        plt.annotate(txt, (tech_impact['Adoption'][i], tech_impact['Impact'][i]))
    plt.xlabel('Adoption Rate')
    plt.ylabel('Impact')
    plt.title('Technology Impact Analysis')
    plt.savefig('plots/technology_impact.png')
    plt.close()

# Task 8: Supply Chain and Logistics Analysis
def supply_chain_analysis():
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='Supplier', y='AverageDeliveryTime', data=delivery_data)
    plt.title('Supplier Delivery Time Performance')
    plt.ylabel('Average Delivery Time (days)')
    plt.savefig('plots/supplier_delivery_performance.png')
    plt.close()

# Task 9: Employee Skill Gap Analysis
def skill_gap_analysis():
    skills = ['Technical', 'Management', 'Communication', 'Problem Solving', 'Teamwork']
    current_skills = np.random.uniform(0, 10, 5)
    required_skills = np.random.uniform(5, 10, 5)

    plt.figure(figsize=(10, 6))
    angles = np.linspace(0, 2*np.pi, len(skills), endpoint=False)
    angles = np.concatenate((angles, [angles[0]]))
    current_skills = np.concatenate((current_skills, [current_skills[0]]))
    required_skills = np.concatenate((required_skills, [required_skills[0]]))

    plt.polar(angles, current_skills, 'o-', linewidth=2, label='Current Skills')
    plt.polar(angles, required_skills, 'o-', linewidth=2, label='Required Skills')
    plt.fill(angles, current_skills, alpha=0.25)
    plt.fill(angles, required_skills, alpha=0.25)

    plt.xticks(angles[:-1], skills)
    plt.title('Skill Gap Analysis')
    plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
    plt.tight_layout()
    plt.savefig('plots/skill_gap_analysis.png')
    plt.close()


def combine_plots():
    # List of image file paths (add paths for all your images)
    image_paths = [
        'plots/employee_performance_vs_salary.png',
        'plots/employee_correlation_heatmap.png',
        'plots/project_durations.png',
        'plots/project_cost_vs_revenue.png',
        'plots/financial_trends.png',
        'plots/customer_satisfaction_distribution.png',
        'plots/customer_metrics_pairplot.png',
        'plots/market_share_pie_chart.png',
        'plots/resource_allocation.png',
        'plots/technology_impact.png',
        'plots/supplier_delivery_performance.png',
        'plots/skill_gap_analysis.png'
    ]
    
    # Number of rows and columns for the subplots (adjust grid size according to number of plots)
    n_rows = 4
    n_cols = 3

    # Create a figure with a grid of subplots
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 15))

    # Flatten axes for easy iteration
    axes = axes.flatten()

    # Loop through image paths and axes to load and plot images
    for i, img_path in enumerate(image_paths):
        img = mpimg.imread(img_path)  # Load image
        axes[i].imshow(img)           # Display image in the subplot
        axes[i].axis('off')           # Hide axes for a cleaner look

    # Adjust layout
    plt.tight_layout()

    # Save the combined figure
    plt.savefig('plots/combined_plots.png')
    

# Run all analyses
employee_analysis()
project_analysis()
financial_analysis()
customer_analysis()
market_analysis()
resource_analysis()
innovation_analysis()
supply_chain_analysis()
skill_gap_analysis()

# Call the function to combine and display the plots
combine_plots()

# Generate Comprehensive Company Health Report
def generate_report():
    report = """
    # Comprehensive Company Health Report for Galactic Innovations Inc.

    ## 1. Employee Performance and Retention
    - Average Salary: ${:,.2f}
    - Average Performance Score: {:.2f}
    - Correlation between Salary and Performance: {:.2f}

    ## 2. Project Profitability and Timeline
    - Average Project Duration: {:.2f} days
    - Average Project Profitability: ${:,.2f}
    - Most Profitable Project: {}

    ## 3. Financial Trends
    - Average Monthly Revenue: ${:,.2f}
    - Average Monthly Expenses: ${:,.2f}
    - Average Monthly Profit: ${:,.2f}

    ## 4. Customer Satisfaction and Revenue Impact
    - Average Customer Satisfaction: {:.2f}
    - Correlation between Satisfaction and Contract Value: {:.2f}

    ## 5. Market Share and Competitive Analysis
    - Current Market Share: {:.2f}%
    - Competitor Market Share: {:.2f}%

    ## 6. Resource Utilization and Optimization
    - Most Utilized Department: {}
    - Least Utilized Department: {}

    ## 7. Technology Innovation Impact
    - Most Impactful Technology: {}
    - Technology with Highest Adoption Rate: {}

    ## 8. Supply Chain and Logistics
    - Best Performing Supplier: {}
    - Worst Performing Supplier: {}

    ## 9. Employee Skill Gap
    - Largest Skill Gap: {}
    - Smallest Skill Gap: {}

    Please refer to the generated visualizations for more detailed insights.
    """.format(
        employee_data['Salary'].mean(),
        employee_data['Performance'].mean(),
        employee_data['Salary'].corr(employee_data['Performance']),
        project_data['Duration'].mean(),
        project_data['Profitability'].mean(),
        project_data.loc[project_data['Profitability'].idxmax(), 'ProjectName'],
        financial_data['Revenue'].mean(),
        financial_data['Expenses'].mean(),
        financial_data['Profit'].mean(),
        customer_data['Satisfaction'].mean(),
        customer_data['Satisfaction'].corr(customer_data['ContractValue']),
        market_data.iloc[-1]['MarketShare'] * 100,
        market_data.iloc[-1]['CompetitorShare'] * 100,
        employee_data['Department'].value_counts().index[0],
        employee_data['Department'].value_counts().index[-1],
        'Tech A',  # Placeholder
        'Tech B',  # Placeholder
        'Supplier A',  # Placeholder
        'Supplier E',  # Placeholder
        'Technical',  # Placeholder
        'Teamwork'  # Placeholder
    )

    with open('company_health_report.md', 'w') as f:
        f.write(report)

generate_report()

print("Analysis complete. All visualizations and the comprehensive report have been generated.")

---
---

# Advanced Test on Pandas, Matplotlib, Seaborn, NumPy, Plotly, and Python Functions (10.15 -> 11 Uhr) 

---
### Test: Pandas, Matplotlib, Seaborn, Plotly, NumPY (10:15 -> 11:00 Uhr)


### Instructions:
- Answer all questions to the best of your ability.
- Write code in the designated sections where applicable.
- Use comments in your code to explain your thought process.
- Send finished code and answers in .py file 

---

### Section 1: Pandas

1. Explain how to handle missing data in a Pandas DataFrame. Write a code snippet demonstrating two different methods to deal with missing values.

2. Given a DataFrame, how would you find and return the top 3 rows with the highest values in a specific column while ignoring NaN values? Provide a code example.

3. Describe the purpose of the `groupby()` function in Pandas. Write a code snippet that demonstrates how to group data by two columns and calculate the sum of another column.

4. Write a function that takes a DataFrame and a column name as inputs, normalizes the values in that column, and returns the modified DataFrame.

5. Explain how to merge two DataFrames in Pandas. Write a code snippet to demonstrate merging on a common key with an inner join.

---

### Section 2: Matplotlib

6. How can you customize the aesthetics of a Matplotlib plot? Write a code example that includes at least three different customizations.

7. Write a Matplotlib code snippet to create a subplot with three different types of plots (line, bar, and scatter) in a single figure, each with its own title.

8. Explain how to add a secondary y-axis to a plot. Write a code snippet that demonstrates this with a sample dataset.

---

### Section 3: Seaborn

9. Describe the differences between `sns.barplot()` and `sns.countplot()`. Provide an example of when to use each function.

10. Write a Seaborn code snippet that generates a violin plot for a dataset with two categories and a continuous variable. Explain what insights can be drawn from this visualization.

11. How do you improve the readability of visualizations in Seaborn? Provide a code example that demonstrates techniques for enhancing visual clarity.

---

### Section 4: NumPy

12. Write a NumPy function that accepts a 2D array and returns the row index of the maximum value in each row.

13. Explain how to perform element-wise operations between two NumPy arrays. Write a code snippet that demonstrates this with addition and multiplication.

14. Write a NumPy code snippet to create a 3D array and then compute the sum along each axis. Provide comments explaining each step.

---

### Section 5: Plotly

15. Explain how to create interactive plots using Plotly. Write a code snippet that generates a scatter plot with hover text displaying additional information about each point.

16. Write a Plotly code snippet to create a 3D surface plot from a mathematical function. Describe the function used in your example.

---

### Section 6: Advanced Python Functions, Loops, and Map

17. Write a Python function that accepts a list of dictionaries and returns a new list containing only those dictionaries that have a specific key with a value greater than a given threshold.

18. Explain how to use list comprehensions to create a new list from an existing list of numbers by applying a complex mathematical function. Write an example.

19. Write a Python function that uses recursion to calculate the factorial of a number. Provide an explanation of how recursion works in this context.

20. Describe how to use the `filter()` function in Python. Write a code snippet that filters a list of integers to return only even numbers.

---
---



# TEST RESULT DISCUSSION (-> 11.15)



---
---

# PREVIEW ANSWERS TO QUESTIONS (-> 12Uhr)

---



In [None]:
# Lists, Maps , Loops, Lambdas




---
---

# TODAYS TASKS: NO

---


# All PLOTS

# Matplotlib Visualizations

## 1. Line Plot
- **Description**: Basic line graphs used to represent continuous data points.
- **When to Use**: Ideal for showing trends over time or continuous data.
- **Example**:
    ```python
    plt.plot(data['x'], data['y'], label='Line 1', color='blue')
    ```

## 2. Scatter Plot
- **Description**: Displays values for typically two variables for a set of data.
- **When to Use**: Useful for observing relationships between two variables.
- **Example**:
    ```python
    plt.scatter(data['x'], data['y'], s=kwargs.get('size', 20), c=kwargs.get('color', 'red'))
    ```

## 3. Bar Plot
- **Description**: Represents categorical data with rectangular bars.
- **When to Use**: Great for comparing quantities across different categories.
- **Example**:
    ```python
    plt.bar(data['category'], data['values'], color='green')
    ```

## 4. Histogram
- **Description**: Represents the frequency distribution of a dataset.
- **When to Use**: Useful for understanding the distribution of data points.
- **Example**:
    ```python
    plt.hist(data['values'], bins=30, color='purple')
    ```

## 5. Box Plot
- **Description**: Summarizes data using their quartiles.
- **When to Use**: Useful for detecting outliers and understanding the distribution.
- **Example**:
    ```python
    plt.boxplot(data['values'])
    ```

## 6. Area Plot
- **Description**: Similar to line plots but with the area below the line filled in.
- **When to Use**: Good for showing cumulative totals over time.
- **Example**:
    ```python
    plt.fill_between(data['x'], data['y'], color='skyblue', alpha=0.4)
    ```

## 7. Pie Chart
- **Description**: A circular chart divided into sectors to illustrate numerical proportions.
- **When to Use**: Useful for showing parts of a whole, but not recommended for too many categories.
- **Example**:
    ```python
    plt.pie(data['sizes'], labels=data['labels'], autopct='%1.1f%%')
    ```

## 8. Heatmap
- **Description**: A two-dimensional representation of data where values are represented by colors.
- **When to Use**: Ideal for visualizing matrix-like data.
- **Example**:
    ```python
    cax = plt.imshow(data, aspect='auto', cmap='hot')
    fig.colorbar(cax)
    ```

## 9. Contour Plot
- **Description**: A way to represent three-dimensional data in two dimensions using contour lines.
- **When to Use**: Useful for visualizing the relationship between three variables.
- **Example**:
    ```python
    plt.contour(X, Y, Z)
    ```

## 10. 3D Plot
- **Description**: Visualizes data in three dimensions.
- **When to Use**: Useful for visualizing three-dimensional data points.
- **Example**:
    ```python
    ax = fig.add_subplot(111, projection='3d')
    plt.plot(data['x'], data['y'], data['z'])
    ```

## 11. Quiver Plot
- **Description**: A plot that displays velocity vectors as arrows.
- **When to Use**: Ideal for representing vector fields.
- **Example**:
    ```python
    plt.quiver(X, Y, U, V)
    ```

## 12. Stream Plot
- **Description**: Visualizes the flow of a vector field.
- **When to Use**: Useful for visualizing the direction and strength of flow fields.
- **Example**:
    ```python
    plt.streamplot(X, Y, U, V)
    ```

## 13. Error Bars
- **Description**: Represents the uncertainty in a measurement.
- **When to Use**: Useful for indicating the variability of data.
- **Example**:
    ```python
    plt.errorbar(data['x'], data['y'], yerr=data['error'], fmt='o')
    ```

## 14. Polar Plot
- **Description**: Used for plotting data in polar coordinates.
- **When to Use**: Good for circular data or periodic functions.
- **Example**:
    ```python
    ax = plt.subplot(111, polar=True)
    plt.plot(theta, r)
    ```

## 15. Twin Axes
- **Description**: Create a second y-axis on the same plot.
- **When to Use**: Useful for comparing two different data sets with different scales.
- **Example**:
    ```python
    ax2 = plt.twinx()
    ax2.plot(data['x'], data['y2'], color='red')
    ```

## 16. Subplots
- **Description**: Multiple plots within a single figure.
- **When to Use**: Useful for comparing different datasets side by side.
- **Example**:
    ```python
    fig, axs = plt.subplots(2, 2)
    ```

## 17. Animation
- **Description**: Creates animated visualizations.
- **When to Use**: Useful for showing changes over time.
- **Example**:
    ```python
    ani = FuncAnimation(fig, update, frames=100, interval=20)
    ```

## 18. Custom Legends
- **Description**: Adding customized legends to your plots.
- **When to Use**: Useful when default legends don't suffice.
- **Example**:
    ```python
    plt.legend(['Data 1', 'Data 2'], loc='upper right')
    ```

## 19. Text Annotations
- **Description**: Adding text labels to your plots.
- **When to Use**: Useful for highlighting specific data points or features.
- **Example**:
    ```python
    plt.text(x, y, 'Label', fontsize=12)
    ```

## 20. Image Display
- **Description**: Displaying images as part of your plot.
- **When to Use**: Useful for showing raster data alongside other visualizations.
- **Example**:
    ```python
    plt.imshow(image_data)
    ```

## 21. Violin Plot
- **Description**: Combines box plot and kernel density plot to show distribution.
- **When to Use**: Useful for visualizing distributions across different categories.
- **Example**:
    ```python
    plt.violinplot(data['values'])
    ```

## 22. Hexbin Plot
- **Description**: Binning data points in 2D space to visualize density.
- **When to Use**: Useful for large datasets where scatter plots may be less informative.
- **Example**:
    ```python
    plt.hexbin(data['x'], data['y'], gridsize=50)
    ```

## 23. Radar Chart (Spider Chart)
- **Description**: Visualizing multivariate data in a circular layout.
- **When to Use**: Useful for comparing multiple attributes of different groups.
- **Example**:
    ```python
    # Requires custom implementation
    ```

## 24. Step Plot
- **Description**: Plotting data points with steps instead of continuous lines.
- **When to Use**: Useful for representing discrete changes.
- **Example**:
    ```python
    plt.step(data['x'], data['y'])
    ```

## 25. Stem Plot
- **Description**: Displaying discrete data points with stems and markers.
- **When to Use**: Useful for emphasizing individual data points.
- **Example**:
    ```python
    plt.stem(data['x'], data['y'])
    ```

## 26. Customizing Ticks and Labels
- **Description**: Fine-tuning x and y-axis ticks and labels for better readability.
- **When to Use**: Useful for making plots clearer.
- **Example**:
    ```python
    plt.set_xticks(np.arange(0, 10, 1))
    ```

## 27. Multiple Axes (Insets)
- **Description**: Creating inset plots to highlight specific areas of interest.
- **When to Use**: Useful for focusing on details without losing context.
- **Example**:
    ```python
    ax_inset = fig.add_axes([0.6, 0.6, 0.2, 0.2])
    ```

## 28. Cumulative Distribution Function (CDF) Plot
- **Description**: Visualizing the cumulative distribution of a dataset.
- **When to Use**: Useful for understanding the distribution and probability.
- **Example**:
    ```python
    plt.hist(data['values'], cumulative=True)
    ```

## 29. Ridge Plot
- **Description**: A multi-density plot often used to visualize distributions of different groups.
- **When to Use**: Useful for comparing distributions across categories.
- **Example**:
    ```python
    # Requires custom implementation
    ```

## 30. Bubble Plot
- **Description**: A variation of scatter plots where the size of the marker represents another variable.
- **When to Use**: Useful for visualizing relationships among three variables.
- **Example**:
    ```python
    plt.scatter(data['x'], data['y'], s=data['size'])
    ```

## 31. Dendrogram
- **Description

**: A tree-like diagram used to visualize hierarchical relationships.
- **When to Use**: Useful for clustering and hierarchical data.
- **Example**:
    ```python
    dendrogram(linkage_matrix)
    ```

## 32. Pair Plot
- **Description**: Visualizing pairwise relationships in a dataset.
- **When to Use**: Useful for exploring correlations in multivariate data.
- **Example**:
    ```python
    sns.pairplot(data)
    ```


# Unique Plotly Visualizations

## 1. Sankey Diagram
- **Description**: A flow diagram that depicts the flow of resources or information between stages.
- **When to Use**: Ideal for visualizing the movement of data or resources across various categories.
- **Example**:
    ```python
    import plotly.graph_objects as go

    fig = go.Figure(go.Sankey(
        node=dict(pad=15, thickness=20, line=dict(color="black", width=0.5), label=["A", "B", "C"]),
        link=dict(source=[0, 1, 0], target=[1, 2, 2], value=[8, 4, 2])
    ))
    ```

## 2. Funnel Chart
- **Description**: A visual representation of a process, showing the reduction of data through stages.
- **When to Use**: Useful for visualizing stages in a process like sales funnels.
- **Example**:
    ```python
    fig = go.Figure(go.Funnel(
        y=["Stage 1", "Stage 2", "Stage 3"],
        x=[500, 300, 100]
    ))
    ```

## 3. Sunburst Chart
- **Description**: A hierarchical visualization that shows proportions within categories.
- **When to Use**: Ideal for visualizing part-to-whole relationships in hierarchical data.
- **Example**:
    ```python
    fig = go.Figure(go.Sunburst(
        labels=["A", "B", "C", "D", "E", "F"],
        parents=["", "A", "A", "B", "B", "C"],
        values=[10, 20, 30, 10, 10, 5]
    ))
    ```

## 4. Tree Map
- **Description**: Displays hierarchical data as a set of nested rectangles.
- **When to Use**: Useful for showing proportions within hierarchical categories.
- **Example**:
    ```python
    fig = go.Figure(go.Treemap(
        labels=["A", "B", "C", "D", "E"],
        parents=["", "A", "A", "B", "B"],
        values=[10, 20, 30, 10, 10]
    ))
    ```

## 5. Density Plot
- **Description**: A smoothed version of the histogram that estimates the probability density function.
- **When to Use**: Useful for visualizing the distribution of data points over a continuous interval.
- **Example**:
    ```python
    fig = go.Figure(data=go.Histogram2d(
        x=data['x'],
        y=data['y'],
        colorscale='Viridis',
        zmax=20
    ))
    ```

## 6. Polar Scatter Plot
- **Description**: A scatter plot where the data is displayed in polar coordinates.
- **When to Use**: Useful for visualizing data that has a circular nature.
- **Example**:
    ```python
    fig = go.Figure(go.Scatterpolar(
        r=[1, 2, 3, 4, 5],
        theta=[0, 90, 180, 270, 360],
        mode='markers'
    ))
    ```

## 7. Surface Plot
- **Description**: A three-dimensional plot representing a surface.
- **When to Use**: Useful for visualizing three-dimensional data.
- **Example**:
    ```python
    import numpy as np
    x = np.linspace(-5, 5, 50)
    y = np.linspace(-5, 5, 50)
    X, Y = np.meshgrid(x, y)
    Z = np.sin(np.sqrt(X**2 + Y**2))

    fig = go.Figure(data=[go.Surface(z=Z, x=X[0], y=Y[:,0])])
    ```

## 8. Ternary Plot
- **Description**: A three-dimensional plot used for displaying proportions of three variables.
- **When to Use**: Useful for visualizing compositions of three parts.
- **Example**:
    ```python
    fig = go.Figure(data=go.Ternary(
        sum=1,
        a=[0.2, 0.4, 0.3],
        b=[0.3, 0.3, 0.4],
        c=[0.5, 0.3, 0.3]
    ))
    ```

## 9. Box Whisker Plot
- **Description**: A more detailed version of the box plot that includes whiskers.
- **When to Use**: Useful for showing distribution with more detail regarding outliers.
- **Example**:
    ```python
    fig = go.Figure(data=go.Box(
        y=data['values'],
        boxpoints='all',  # show all points
        jitter=0.3,       # spread points out
        pointpos=-1.8     # move points to the left of box
    ))
    ```

## 10. Waterfall Chart
- **Description**: Displays the cumulative effect of sequentially introduced positive or negative values.
- **When to Use**: Useful for visualizing financial data.
- **Example**:
    ```python
    fig = go.Figure(go.Waterfall(
        x=["Initial", "Revenue", "Expenses", "Profit"],
        y=[100, 50, -30, 20],
        measure=["relative", "relative", "relative", "total"]
    ))
    ```

## 11. Indicator Chart
- **Description**: Displays a single value in a visual format.
- **When to Use**: Useful for key performance indicators (KPIs).
- **Example**:
    ```python
    fig = go.Figure(go.Indicator(
        mode="gauge+number",
        value=450,
        title={'text': "Speed"},
        gauge=dict(
            axis=dict(range=[0, 500]),
            steps=[{'range': [0, 250], 'color': "lightgray"},
                   {'range': [250, 400], 'color': "gray"}],
            threshold=dict(line=dict(color="red", width=4), value=400)
        )
    ))
    ```

## 12. Donut Chart
- **Description**: A circular chart similar to a pie chart but with a hole in the center.
- **When to Use**: Useful for visualizing part-to-whole relationships with a different visual appeal.
- **Example**:
    ```python
    fig = go.Figure(go.Pie(labels=data['labels'], values=data['sizes'], hole=.3))
    ```

## 13. Boxen Plot
- **Description**: An advanced version of a box plot that provides additional detail about the distribution.
- **When to Use**: Useful for large datasets where standard box plots may oversimplify the distribution.
- **Example**:
    ```python
    fig = go.Figure(data=go.Box(
        y=data['values'],
        boxpoints='all',  # show all points
        jitter=0.3,
        pointpos=-1.8,
        boxmean='sd'  # show mean and standard deviation
    ))
    ```

## 14. Parcoords (Parallel Coordinates)
- **Description**: A way to visualize multivariate data in a parallel coordinate system.
- **When to Use**: Useful for identifying relationships between multiple variables.
- **Example**:
    ```python
    fig = go.Figure(data=go.Parcoords(
        line=dict(color=data['values'], colorscale='Viridis'),
        dimensions=[dict(range=[0, 10], label='X1', values=data['x']),
                    dict(range=[0, 10], label='X2', values=data['y']),
                    dict(range=[0, 10], label='X3', values=data['z'])]
    ))
    ```

## 15. Contourf Plot
- **Description**: Filled contour plot that visualizes three-dimensional data in two dimensions with filled regions.
- **When to Use**: Useful for representing levels in a surface.
- **Example**:
    ```python
    fig = go.Figure(data=go.Contour(
        z=Z, x=X[0], y=Y[:,0],
        contours=dict(start=-2, end=2, size=0.1)
    ))
    ```

## 16. Histogram2d Contour
- **Description**: A contour plot that combines a 2D histogram and contour lines.
- **When to Use**: Useful for visualizing the density of data points in two dimensions.
- **Example**:
    ```python
    fig = go.Figure(data=go.Histogram2dContour(
        x=data['x'],
        y=data['y'],
        colorscale='Blues'
    ))
    ```

## 17. Streamgraph
- **Description**: A type of stacked area chart where the layers are displaced to follow a flowing shape.
- **When to Use**: Useful for visualizing changes over time among different groups.
- **Example**:
    ```python
    fig = go.Figure(data=go.Streamtube(
        x=X, y=Y, z=Z, u=U, v=V, w=W
    ))
    ```

## 18. Matrix Plot
- **Description**: Visualizes data in a grid format where values are represented by colors.
- **When to Use**: Useful for displaying correlation matrices or any other two-dimensional data.
- **Example**:
    ```python
    fig = go.Figure(data=go.Heatmap(
        z=data.corr()
    ))
    ```

## 19. 3D Scatter Plot
- **Description**: A three-dimensional scatter plot that displays points in a 3D space.
- **When to Use**: Useful for visualizing relationships in three dimensions.
- **Example**:
    ```python
    fig = go.Figure(data=go.Scatter3d(
        x=data['x'],
        y=data['y'],
        z=data['z'],
        mode='markers'
    ))
    ```

## 20. Violin Plot
- **Description**: Combines a box plot and a density plot, showing the distribution of data across different categories.
- **When to Use**: Useful for comparing distributions between multiple groups.
- **Example**:
    ```python
    fig = go.Figure(data=go.Violin(
        y=data['values'],
        box_visible=True
    ))
    ```



# Unique Seaborn Visualizations

## 1. Pair Plot
- **Description**: A grid of scatter plots showing relationships between multiple variables.
- **When to Use**: Ideal for visualizing pairwise relationships in a dataset.
- **Example**:
    ```python
    import seaborn as sns
    import matplotlib.pyplot as plt

    iris = sns.load_dataset("iris")
    sns.pairplot(iris, hue="species")
    plt.show()
    ```

## 2. Facet Grid
- **Description**: A grid of subplots based on a categorical variable.
- **When to Use**: Useful for visualizing the distribution of data across multiple subsets.
- **Example**:
    ```python
    g = sns.FacetGrid(iris, col="species")
    g.map(sns.histplot, "sepal_length")
    plt.show()
    ```

## 3. Boxen Plot
- **Description**: An enhanced version of the box plot that provides more detail about the distribution.
- **When to Use**: Useful for visualizing data with a large number of observations.
- **Example**:
    ```python
    sns.boxenplot(x="species", y="sepal_length", data=iris)
    plt.show()
    ```

## 4. Heatmap
- **Description**: A graphical representation of data where individual values are represented as colors.
- **When to Use**: Useful for displaying correlation matrices or other data that can be represented in a grid.
- **Example**:
    ```python
    corr = iris.corr()
    sns.heatmap(corr, annot=True, cmap="coolwarm")
    plt.show()
    ```

## 5. Violin Plot
- **Description**: A combination of a box plot and a density plot that shows the distribution of the data.
- **When to Use**: Useful for comparing the distribution between multiple categories.
- **Example**:
    ```python
    sns.violinplot(x="species", y="sepal_length", data=iris)
    plt.show()
    ```

## 6. Swarm Plot
- **Description**: A scatter plot where points are adjusted to avoid overlap.
- **When to Use**: Useful for visualizing categorical data with many points.
- **Example**:
    ```python
    sns.swarmplot(x="species", y="sepal_length", data=iris)
    plt.show()
    ```

## 7. Strip Plot
- **Description**: Similar to the swarm plot, but with points positioned randomly along the categorical axis.
- **When to Use**: Useful for visualizing the distribution of categorical data.
- **Example**:
    ```python
    sns.stripplot(x="species", y="sepal_length", data=iris)
    plt.show()
    ```

## 8. Joint Plot
- **Description**: A plot that shows both the bivariate relationship between two variables and their univariate distributions.
- **When to Use**: Useful for examining relationships between two continuous variables.
- **Example**:
    ```python
    sns.jointplot(x="sepal_length", y="sepal_width", data=iris, kind="scatter")
    plt.show()
    ```

## 9. Rug Plot
- **Description**: A plot that displays individual data points along an axis.
- **When to Use**: Useful for visualizing the distribution of a univariate dataset.
- **Example**:
    ```python
    sns.rugplot(data=iris['sepal_length'])
    plt.show()
    ```

## 10. Bar Plot
- **Description**: A plot that shows the relationship between a categorical variable and a continuous variable using bars.
- **When to Use**: Useful for comparing quantities of different categories.
- **Example**:
    ```python
    sns.barplot(x="species", y="sepal_length", data=iris, estimator=sum)
    plt.show()
    ```

## 11. Count Plot
- **Description**: A bar plot that shows the counts of observations in each categorical bin.
- **When to Use**: Useful for visualizing the frequency of categories in a dataset.
- **Example**:
    ```python
    sns.countplot(x="species", data=iris)
    plt.show()
    ```

## 12. LM Plot
- **Description**: A regression plot with confidence intervals.
- **When to Use**: Useful for visualizing linear relationships between two variables.
- **Example**:
    ```python
    sns.lmplot(x="sepal_length", y="sepal_width", data=iris)
    plt.show()
    ```

## 13. PairGrid
- **Description**: A more customizable version of the pair plot that allows for different types of plots on different axes.
- **When to Use**: Useful for examining relationships in a more flexible manner.
- **Example**:
    ```python
    g = sns.PairGrid(iris)
    g.map_diag(sns.histplot)
    g.map_offdiag(sns.scatterplot)
    plt.show()
    ```

## 14. Point Plot
- **Description**: A plot that shows point estimates and confidence intervals.
- **When to Use**: Useful for visualizing estimates of central tendency for categorical data.
- **Example**:
    ```python
    sns.pointplot(x="species", y="sepal_length", data=iris)
    plt.show()
    ```

## 15. Displot
- **Description**: A figure-level function for visualizing univariate distributions.
- **When to Use**: Useful for visualizing the distribution of a single variable with options for histograms and KDE.
- **Example**:
    ```python
    sns.displot(iris['sepal_length'], kde=True)
    plt.show()
    ```

## 16. Catplot
- **Description**: A figure-level function that creates categorical plots, useful for creating multiple types of categorical plots in one call.
- **When to Use**: Useful for visualizing categorical data with flexibility in the type of plot.
- **Example**:
    ```python
    sns.catplot(x="species", y="sepal_length", data=iris, kind="box")
    plt.show()
    ```

## 17. Heatmap with Clustering
- **Description**: A heatmap that includes hierarchical clustering.
- **When to Use**: Useful for visualizing relationships in high-dimensional data with clustering.
- **Example**:
    ```python
    sns.clustermap(corr, annot=True, cmap="coolwarm")
    plt.show()
    ```

## 18. Kdeplot
- **Description**: A kernel density estimate plot that visualizes the distribution of a variable.
- **When to Use**: Useful for visualizing the probability density function of a continuous variable.
- **Example**:
    ```python
    sns.kdeplot(iris['sepal_length'], shade=True)
    plt.show()
    ```

## 19. Histogram
- **Description**: A plot that displays the distribution of a continuous variable.
- **When to Use**: Useful for visualizing the frequency distribution of a dataset.
- **Example**:
    ```python
    sns.histplot(iris['sepal_length'], bins=10)
    plt.show()
    ```

## 20. Time Series Plot
- **Description**: A line plot that shows data points over time.
- **When to Use**: Useful for visualizing trends in time series data.
- **Example**:
    ```python
    # Assuming 'time' is a datetime variable
    sns.lineplot(x="time", y="value", data=time_series_data)
    plt.show()
    ```

