In [1]:
{
    "cells": [
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": [
                "import pandas as pd\n",
                "import numpy as np\n",
                "import matplotlib.pyplot as plt\n",
                "import seaborn as sns\n",
                "\n",
                "# Generate a DataFrame with numerical columns\n",
                "np.random.seed(42)  # For reproducibility\n",
                "data = {\n",
                "    'ID': np.arange(1, 1001),\n",
                "    'Age': np.random.randint(18, 65, size=1000),\n",
                "    'Salary': np.random.randint(30000, 120000, size=1000),\n",
                "    'Height': np.random.uniform(1.5, 2.0, size=1000),\n",
                "    'Weight': np.random.uniform(50, 120, size=1000)\n",
                "}\n",
                "\n",
                "df = pd.DataFrame(data)\n",
                "\n",
                "# Save the files in CSV and Excel formats\n",
                "csv_file_name = 'large_numeric_data.csv'\n",
                "excel_file_name = 'large_numeric_data.xlsx'\n",
                "df.to_csv(csv_file_name, index=False)\n",
                "df.to_excel(excel_file_name, index=False)\n",
                "\n",
                "print(f\"Files created successfully:\\n- {csv_file_name}\\n- {excel_file_name}\")"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": [
                "# Load the files\n",
                "df_csv = pd.read_csv(csv_file_name)\n",
                "df_excel = pd.read_excel(excel_file_name)\n",
                "\n",
                "# Identify numeric columns\n",
                "numeric_columns_csv = df_csv.select_dtypes(include='number').columns.tolist()\n",
                "numeric_columns_excel = df_excel.select_dtypes(include='number').columns.tolist()\n",
                "\n",
                "print(\"Numeric columns in CSV file:\", numeric_columns_csv)\n",
                "print(\"Numeric columns in Excel file:\", numeric_columns_excel)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": [
                "# Example: Generate a histogram for the 'Salary' column\n",
                "if 'Salary' in numeric_columns_csv:\n",
                "    plt.figure(figsize=(10, 6))\n",
                "    sns.histplot(df_csv['Salary'], kde=True, color='blue')\n",
                "    plt.title(\"Histogram of Salary\")\n",
                "    plt.xlabel(\"Salary\")\n",
                "    plt.ylabel(\"Frequency\")\n",
                "    plt.show()\n",
                "else:\n",
                "    print(\"The column 'Salary' is not available for a histogram.\")"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": [
                "# Example: Generate a scatter plot if 'Age' and 'Salary' are available\n",
                "if 'Age' in numeric_columns_csv and 'Salary' in numeric_columns_csv:\n",
                "    plt.figure(figsize=(10, 6))\n",
                "    sns.scatterplot(x='Age', y='Salary', data=df_csv, color='green')\n",
                "    plt.title(\"Scatter Plot: Age vs Salary\")\n",
                "    plt.xlabel(\"Age\")\n",
                "    plt.ylabel(\"Salary\")\n",
                "    plt.show()\n",
                "else:\n",
                "    print(\"The columns 'Age' and 'Salary' are not available for a scatter plot.\")"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": [
                "# Provide download links for the created files\n",
                "from IPython.display import FileLink\n",
                "\n",
                "print(\"Link to download the CSV file:\")\n",
                "display(FileLink(csv_file_name))\n",
                "\n",
                "print(\"Link to download the Excel file:\")\n",
                "display(FileLink(excel_file_name))"
            ]
        }
    ],
    "metadata": {
        "kernelspec": {
            "display_name": "Python 3",
            "language": "python",
            "name": "python3"
        },
        "language_info": {
            "codemirror_mode": {
                "name": "ipython",
                "version": 3
            },
            "file_extension": ".py",
            "mimetype": "text/x-python",
            "name": "python",
            "nbconvert_exporter": "python",
            "pygments_lexer": "ipython3",
            "version": "3.9"
        }
    },
    "nbformat": 4,
    "nbformat_minor": 5
}

NameError: name 'null' is not defined