Given a Pandas DataFrame, create a line plot to visualize  the trend of a specific column over time


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Given a Pandas DataFrame, create a line plot to visualize the trend of a specific column over time.
def plot_time_trend(df, time_col, value_col, x_label="Time", y_label="Value", title="Time Trend"):
    """
    Creates a line plot to visualize the trend of a specific column over time.

    Args:
        df (pd.DataFrame): Input Pandas DataFrame.
        time_col (str): Name of the column representing time (e.g., 'Date').
        value_col (str): Name of the column to visualize the trend of (e.g., 'Sales').
        x_label (str, optional): Label for the x-axis. Defaults to "Time".
        y_label (str, optional): Label for the y-axis. Defaults to "Value".
        title (str, optional): Title of the plot. Defaults to "Time Trend".
    """

    plt.figure(figsize=(10, 6))  # Adjust figure size for better readability
    plt.plot(df[time_col], df[value_col])
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.title(title)
    plt.grid(True)  # Add grid lines for easier reading
    plt.show()

# Sample usage:
data = {'Date': pd.to_datetime(['2023-01-01', '2023-02-01', '2023-03-01', '2023-04-01', '2023-05-01']),
        'Sales': [100, 120, 90, 130, 110]}
df = pd.DataFrame(data)
plot_time_trend(df, 'Date', 'Sales', x_label='Month', y_label='Total Sales', title='Monthly Sales Trend')


Implement a program that generates a histogram using  Matplotlib to visualize the distribution of data


In [None]:


# 2. Implement a program that generates a histogram using Matplotlib to visualize the distribution of data.
def plot_histogram(df, data_col, x_label="Value", y_label="Frequency", title="Histogram"):
    """
    Generates a histogram using Matplotlib to visualize the distribution of data.

    Args:
        df (pd.DataFrame): Input Pandas DataFrame.
        data_col (str): Name of the column to visualize the distribution of.
        x_label (str, optional): Label for the x-axis. Defaults to "Value".
        y_label (str, optional): Label for the y-axis. Defaults to "Frequency".
        title (str, optional): Title of the plot. Defaults to "Histogram".
    """

    plt.figure(figsize=(8, 6))
    plt.hist(df[data_col], bins=10, color='skyblue', edgecolor='black')  # Customize bins and color
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.title(title)
    plt.grid(axis='y', alpha=0.75)  # Add grid lines for y-axis
    plt.show()

# Sample Usage:
data = {'Ages': [22, 25, 25, 30, 32, 35, 40, 42, 42, 45, 48, 50, 55, 60]}
df = pd.DataFrame(data)
plot_histogram(df, 'Ages', x_label='Age', y_label='Number of People', title='Age Distribution')


Write a Python program that uses Seaborn to create a  scatter plot matrix for multiple variables in a DataFrame


In [None]:


# 3. Write a Python program that uses Seaborn to create a scatter plot matrix for multiple variables in a DataFrame.
def plot_scatter_matrix(df):
    """
    Uses Seaborn to create a scatter plot matrix for multiple variables in a DataFrame.

    Args:
        df (pd.DataFrame): Input Pandas DataFrame.
    """

    sns.pairplot(df)
    plt.suptitle("Scatter Plot Matrix", y=1.02) # Add a title to the entire figure
    plt.show()

# Sample Usage:
data = {'Height': [160, 165, 170, 175, 180],
        'Weight': [55, 60, 68, 70, 75],
        'Age': [25, 30, 35, 40, 45]}
df = pd.DataFrame(data)
plot_scatter_matrix(df)


Create a function that takes a Pandas DataFrame and  generates a box plot to visualize the distribution of data


In [None]:


# 4. Create a function that takes a Pandas DataFrame and generates a box plot to visualize the distribution of data.
def plot_box_plot(df, data_col, x_label=" ", y_label="Value", title="Box Plot"):
    """
    Generates a box plot to visualize the distribution of data.

    Args:
        df (pd.DataFrame): Input Pandas DataFrame.
        data_col (str): Name of the column to visualize the distribution of.
        x_label (str, optional): Label for the x-axis. Defaults to " ".
        y_label (str, optional): Label for the y-axis. Defaults to "Value".
        title (str, optional): Title of the plot. Defaults to "Box Plot".
    """

    plt.figure(figsize=(6, 8))
    sns.boxplot(y=df[data_col])
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.title(title)
    plt.grid(axis='x', alpha=0.75) # Add grid lines for x-axis
    plt.show()

# Sample Usage:
data = {'Scores': [60, 65, 70, 75, 80, 85, 90, 95, 100, 62, 88, 72, 92, 78, 110]} #Added an outlier
df = pd.DataFrame(data)
plot_box_plot(df, 'Scores', y_label='Exam Scores', title='Distribution of Exam Scores')



Given a CSV file with sales data, use Matplotlib to create  a bar plot to compare the sales of different products


In [None]:

# 5. Given a CSV file with sales data, use Matplotlib to create a bar plot to compare the sales of different products.
def plot_sales_comparison(csv_file, product_col, sales_col, x_label="Product", y_label="Total Sales", title="Sales Comparison"):
    """
    Creates a bar plot to compare the sales of different products from a CSV file.

    Args:
        csv_file (str): Path to the CSV file.
        product_col (str): Name of the column representing the product.
        sales_col (str): Name of the column representing the sales.
        x_label (str, optional): Label for the x-axis. Defaults to "Product".
        y_label (str, optional): Label for the y-axis. Defaults to "Total Sales".
        title (str, optional): Title of the plot. Defaults to "Sales Comparison".
    """

    try:
        df = pd.read_csv(csv_file)
        plt.figure(figsize=(10, 6))
        plt.bar(df[product_col], df[sales_col], color='lightcoral')
        plt.xlabel(x_label)
        plt.ylabel(y_label)
        plt.title(title)
        plt.xticks(rotation=45, ha='right')  # Rotate x-axis labels for readability
        plt.tight_layout() # Adjust layout to prevent labels from overlapping
        plt.show()
    except FileNotFoundError:
        print(f"Error: File '{csv_file}' not found.")

# Sample CSV file creation (for demonstration):
sales_data = {'Product': ['A', 'B', 'C', 'D', 'E'], 'Sales': [100, 150, 120, 180, 90]}
sales_df = pd.DataFrame(sales_data)
sales_df.to_csv('sales_data.csv', index=False)

plot_sales_comparison('sales_data.csv', 'Product', 'Sales', x_label='Product Name', y_label='Sales Revenue', title='Product Sales Comparison')



Implement a program that reads a JSON file into a Pandas  DataFrame and uses Seaborn to create a violin plot


In [None]:

# 6. Implement a program that reads a JSON file into a Pandas DataFrame and uses Seaborn to create a violin plot.
import json

def plot_violin_from_json(json_file, data_col, x_label=" ", y_label="Value", title="Violin Plot"):
    """
    Reads a JSON file into a Pandas DataFrame and uses Seaborn to create a violin plot.

    Args:
        json_file (str): Path to the JSON file.
        data_col (str): Name to use for the data column in the DataFrame.
        x_label (str, optional): Label for the x-axis. Defaults to " ".
        y_label (str, optional): Label for the y-axis. Defaults to "Value".
        title (str, optional): Title of the plot. Defaults to "Violin Plot".
    """

    try:
        with open(json_file, 'r') as f:
            data = json.load(f)
        df = pd.DataFrame({data_col: data})
        plt.figure(figsize=(6, 8))
        sns.violinplot(y=df[data_col])
        plt.xlabel(x_label)
        plt.ylabel(y_label)
        plt.title(title)
        plt.show()
    except FileNotFoundError:
        print(f"Error: File '{json_file}' not found.")
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in '{json_file}'.")

# Sample JSON file creation (for demonstration):
json_data = [20, 22, 25, 27, 30, 32, 35, 40, 45, 21, 28, 31, 38]
with open('sample_data.json', 'w') as f:
    json.dump(json_data, f)

plot_violin_from_json('sample_data.json', 'Data', y_label='Distribution', title='Violin Plot of Data')


Write a function that takes a Pandas DataFrame and generates a pair plot to visualize the relationships between variables.


In [None]:


# 7. Write a function that takes a Pandas DataFrame and generates a pair plot to visualize the relationships between variables.
def plot_pair_plot(df):
    """
    Generates a pair plot to visualize the relationships between variables.

    Args:
        df (pd.DataFrame): Input Pandas DataFrame.
    """

    sns.pairplot(df)
    plt.suptitle("Pair Plot of Variables", y=1.02) # Add a title to the entire figure
    plt.show()

# Sample Usage: (Reusing the 'data' DataFrame from example 3)
data = {'Height': [160, 165, 170, 175, 180],
        'Weight': [55, 60, 68, 70, 75],
        'Age': [25, 30, 35, 40, 45]}
df = pd.DataFrame(data)
plot_pair_plot(df)


Given a Pandas DataFrame, create a pie chart using  Matplotlib to visualize the distribution of data in a specific  column


In [None]:


# 8. Given a Pandas DataFrame, create a pie chart using Matplotlib to visualize the distribution of data in a specific column.
def plot_pie_chart(df, data_col, labels=None, title="Pie Chart"):
    """
    Creates a pie chart to visualize the distribution of data in a specific column.

    Args:
        df (pd.DataFrame): Input Pandas DataFrame.
        data_col (str): Name of the column to visualize the distribution of.
        labels (list, optional): List of labels for the pie chart segments. If None, uses DataFrame index.
        title (str, optional): Title of the plot. Defaults to "Pie Chart".
    """

    plt.figure(figsize=(8, 8))
    plt.pie(df[data_col], labels=labels, autopct='%1.1f%%', startangle=140, colors=sns.color_palette('pastel')) # Customize colors
    plt.title(title)
    plt.show()

# Sample Usage:
category_data = {'Category': ['A', 'B', 'C', 'D'], 'Value': [20, 30, 25, 15]}
category_df = pd.DataFrame(category_data)
plot_pie_chart(category_df, 'Value', labels=category_df['Category'], title='Category Distribution')



Create a program that reads a CSV file into a Pandas  DataFrame and uses Seaborn to create a swarm plot for  data visualization


In [None]:

# 9. Create a program that reads a CSV file into a Pandas DataFrame and uses Seaborn to create a swarm plot for data visualization.
def plot_swarm_from_csv(csv_file, x_col, y_col, x_label="Category", y_label="Value", title="Swarm Plot"):
    """
    Reads a CSV file into a Pandas DataFrame and uses Seaborn to create a swarm plot.

    Args:
        csv_file (str): Path to the CSV file.
        x_col (str): Name of the categorical column for the x-axis.
        y_col (str): Name of the numerical column for the y-axis.
        x_label (str, optional): Label for the x-axis. Defaults to "Category".
        y_label (str, optional): Label for the y-axis. Defaults to "Value".
        title (str, optional): Title of the plot. Defaults to "Swarm Plot".
    """

    try:
        df = pd.read_csv(csv_file)
        plt.figure(figsize=(10, 6))
        sns.swarmplot(x=df[x_col], y=df[y_col])
        plt.xlabel(x_label)
        plt.ylabel(y_label)
        plt.title(title)
        plt.show()
    except FileNotFoundError:
        print(f"Error: File '{csv_file}' not found.")

# Sample CSV file creation (for demonstration):
swarm_data = {'Category': ['A', 'A', 'B', 'B', 'C', 'C', 'A', 'B', 'C'],
              'Value': [10, 12, 11, 13, 12, 14, 9, 10, 13]}
swarm_df = pd.DataFrame(swarm_data)
swarm_df.to_csv('swarm_data.csv', index=False)

plot_swarm_from_csv('swarm_data.csv', 'Category', 'Value', x_label='Groups', y_label='Measurements', title='Swarm Plot of Measurements by Group')



Implement a function that takes a Pandas DataFrame and generates a heatmap using Seaborn to visualize the correlation between variables.


In [None]:

# 10. Implement a function that takes a Pandas DataFrame and generates a heatmap using Seaborn to visualize the correlation between variables.
def plot_correlation_heatmap(df, title="Correlation Heatmap"):
    """
    Generates a heatmap to visualize the correlation between variables.

    Args:
        df (pd.DataFrame): Input Pandas DataFrame.
        title (str, optional): Title of the plot. Defaults to "Correlation Heatmap".
    """

    plt.figure(figsize=(8, 6))
    corr_matrix = df.corr()
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm') # Customize color map
    plt.title(title)
    plt.show()

# Sample Usage: (Reusing the 'data' DataFrame from example 3 and 7)
data = {'Height': [160, 165, 170, 175, 180],
        'Weight': [55, 60, 68, 70, 75],
        'Age': [25, 30, 35, 40, 45]}
df = pd.DataFrame(data)
plot_correlation_heatmap(df, title='Correlation between Height, Weight, and Age')