In [21]:
!pip3 install plotly matplotlib -U google-generativeai pandas ipywidgets

Collecting google-generativeai
  Downloading google_generativeai-0.8.3-py3-none-any.whl.metadata (3.9 kB)
Collecting protobuf (from google-generativeai)
  Downloading protobuf-5.28.3-cp38-abi3-macosx_10_9_universal2.whl.metadata (592 bytes)
Downloading google_generativeai-0.8.3-py3-none-any.whl (160 kB)
Downloading protobuf-5.28.3-cp38-abi3-macosx_10_9_universal2.whl (414 kB)
Installing collected packages: protobuf, google-generativeai
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.20.3
    Uninstalling protobuf-3.20.3:
      Successfully uninstalled protobuf-3.20.3
  Attempting uninstall: google-generativeai
    Found existing installation: google-generativeai 0.8.2
    Uninstalling google-generativeai-0.8.2:
      Successfully uninstalled google-generativeai-0.8.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
mediapipe 0.1

In [1]:
import pandas as pd
import subprocess
import importlib
import traceback
import sys
import re
import os

import google.generativeai as genai
api_key = os.getenv('GOOGLE_API_KEY')
genai.configure(api_key=api_key)
model = genai.GenerativeModel("gemini-1.5-flash")

In [2]:
def extract_code(generated_code):
    try:
        code_match = re.search(r"```(?:python)?\n(.*?)```", generated_code, re.DOTALL)
        if code_match:
            return code_match.group(1).strip()
        return generated_code.strip()
    except Exception as e:
        print(f"Error extracting code: {e}")
        return ""

In [3]:
def install_required_packages(generated_code):
    import_statements = re.findall(r'^\s*(?:import|from)\s+(\w+)', generated_code, re.MULTILINE)
    unique_libraries = set(import_statements)

    for library in unique_libraries:

        try:
            # Attempt to import the library
            importlib.import_module(library)
        except ImportError as import_err:
            # Handle module not found by extracting the base name
            base_library = library.split('.')[0]
            print(f"Attempting to install '{base_library}' as '{library}' was not found...")

            try:
                subprocess.check_call([sys.executable, "-m", "pip", "install", base_library])
                print(f"'{base_library}' successfully installed.")
            except subprocess.CalledProcessError as e:
                print(f"Failed to install '{base_library}': {e}")
            except Exception as e:
                print(f"Unexpected error while installing '{base_library}': {traceback.format_exc()}")

        except ModuleNotFoundError as e:
            print(f"Module '{library}' not found: {e}")
            base_library = library.split('.')[0]
            print(f"Retrying to install '{base_library}' instead...")

            try:
                subprocess.check_call([sys.executable, "-m", "pip", "install", base_library])
                print(f"'{base_library}' successfully installed.")
            except Exception as e:
                print(f"Error installing '{base_library}': {traceback.format_exc()}")

In [8]:
def execute_code_from_response(generated_code):
    try:
        install_required_packages(generated_code)
    except Exception as e:
        print(f"Error installing required packages: {e}")
        return
    
    code = extract_code(generated_code)
    
    # Preparing environment variables
    globals_dict = {"pd": pd}

    try:
        exec(code, globals_dict)
    except SyntaxError as e:
        print(f"Syntax error in the generated code:\n{e}")
        print("Code execution aborted.")
    except Exception as e:
        print(f"Runtime error during code execution:\n{traceback.format_exc()}")
        print("Code execution aborted.")

In [9]:
def generate(csv_data):
    prompt = f"""
    You are provided with a CSV file containing data: {csv_data}.

    Write Python code using Pandas and Plotly to achieve the following:
    1. Summarize the data by calculating key statistics for each relevant numeric column (e.g., totals, averages, min, max).
    2. Create a set of diverse, visually engaging, and easy-to-interpret visualizations to uncover insights from the data:
       - A bar chart comparing values across categories, with color-coding, clearly labeled axes, and tooltips to display exact values on hover.
       - A line chart to display trends over time if the data includes time-series columns. Use data point markers, a gradient line, and tooltips to enhance readability.
       - Add additional relevant visualizations, such as a scatter plot for correlations or a pie chart for proportions, choosing chart types suited to the data's structure.
    3. Style each chart to ensure a clean, modern appearance that aids understanding:
       - Apply a cohesive color palette with contrasts, subtle shadows, and gradient fills where suitable.
       - Add titles, axis labels, and legends that explain each chart clearly.
       - Use rounded corners, and position elements like legends to avoid overlapping with data.
       - Include interactive elements like tooltips and hover effects, helping users to explore data points in detail.

    Assume the CSV data format is standard, but the column names and content may vary. Ensure the code dynamically identifies columns to adapt to any dataset structure.
    """

    try:
        response = model.generate_content(prompt)
        generated_code = response.text

        if generated_code:
            print("Generated code received, executing...")
            print(generated_code)
            execute_code_from_response(generated_code)
        else:
            print("No code generated by the model. Check the input or prompt.")
    except Exception as e:
        print(f"Error generating code: {e}")


import re
import subprocess
import sys
import importlib
import traceback
import ast

def install_required_packages(generated_code):
    """
    Parses the generated code to find all import statements, installs any missing packages,
    and returns a dictionary mapping aliases to the imported modules.
    """
    import_alias_map = {}
    unique_libraries = set()

    try:
        # Parse the generated code into an AST
        tree = ast.parse(generated_code)
        for node in ast.walk(tree):
            if isinstance(node, ast.Import):
                for alias in node.names:
                    module_name = alias.name
                    asname = alias.asname if alias.asname else module_name.split('.')[0]
                    unique_libraries.add(module_name.split('.')[0])  # Base library
                    import_alias_map[asname] = module_name
            elif isinstance(node, ast.ImportFrom):
                module = node.module
                if module is None:
                    continue  # Handle cases like 'from . import something'
                for alias in node.names:
                    full_module = f"{module}.{alias.name}"
                    asname = alias.asname if alias.asname else alias.name
                    unique_libraries.add(module.split('.')[0])  # Base library
                    import_alias_map[asname] = full_module
    except Exception as e:
        print(f"Error parsing import statements: {e}")
        return {}

    # Install missing libraries
    for library in unique_libraries:
        try:
            importlib.import_module(library)
        except ImportError:
            try:
                print(f"Installing '{library}'...")
                subprocess.check_call([sys.executable, "-m", "pip", "install", library])
                print(f"Successfully installed '{library}'.")
            except subprocess.CalledProcessError as e:
                print(f"Failed to install '{library}': {e}")
            except Exception as e:
                print(f"Unexpected error while installing '{library}': {traceback.format_exc()}")

    # Import modules and map aliases
    imported_modules = {}
    for alias, module_path in import_alias_map.items():
        try:
            if '.' in module_path:
                # For 'from module import name as alias'
                parts = module_path.split('.')
                base_module = '.'.join(parts[:-1])
                attr = parts[-1]
                module = importlib.import_module(base_module)
                imported_module = getattr(module, attr)
                imported_modules[alias] = imported_module
            else:
                # For 'import module as alias'
                module = importlib.import_module(module_path)
                imported_modules[alias] = module
        except AttributeError:
            print(f"Attribute '{parts[-1]}' not found in module '{base_module}'.")
        except ImportError as e:
            print(f"Failed to import '{module_path}': {e}")
        except Exception as e:
            print(f"Error importing '{module_path}': {traceback.format_exc()}")

    return imported_modules


In [13]:
generate("data.csv")

Generated code received, executing...
```python
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Load the CSV file
df = pd.read_csv("data.csv")

# 1. Summarize the data
numeric_columns = df.select_dtypes(include=['number']).columns
summary = df[numeric_columns].describe()
print(summary)

# 2. Create visualizations

# 2.1 Bar chart (comparing values across categories)
# Assuming there's a categorical column named 'category' and a numeric column named 'value'
fig_bar = px.bar(df, x="category", y="value", color="category",
                 title="Comparison of Values Across Categories",
                 labels={"category": "Category", "value": "Value"},
                 hover_data=["category", "value"])
fig_bar.update_traces(marker_line_color="rgb(8,48,107)", marker_line_width=1.5, opacity=0.6)
fig_bar.update_layout(
    title_font_size=20,
    xaxis_title_font_size=16,
    yaxis_title_font_size=16,
    legend_title_font_size=14,
    plot_bgcolor="#f2f2

In [11]:
generate("data2.csv")

Generated code received, executing...
```python
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Load the CSV data
df = pd.read_csv('data2.csv')

# 1. Summarize the data
numeric_cols = df.select_dtypes(include=['number']).columns
summary = df[numeric_cols].describe()
print(summary)

# 2. Create visualizations

# Categorical bar chart
categorical_cols = df.select_dtypes(include=['object']).columns
if len(categorical_cols) > 0:
    # Choose a categorical column for the bar chart
    cat_col = categorical_cols[0]
    fig = px.bar(df, x=cat_col, y=numeric_cols[0], color=cat_col,
                title=f'Distribution of {numeric_cols[0]} by {cat_col}',
                labels={'x': cat_col, 'y': numeric_cols[0]})
    fig.update_traces(hovertemplate='%{x}: %{y:.2f}')  # Add tooltips
    fig.update_layout(
        showlegend=True,
        legend_title=cat_col,
        legend_orientation='h',
        legend_xanchor='center',
        legend_x=0.5,
        legen

In [30]:
generate("data3.csv")

Generated code received, executing...
Summary Statistics:
        Sales_Units  Sales_Amount
count    12.000000     12.000000
mean    535.833333  10716.666667
std      54.682278   1093.645558
min     450.000000   9000.000000
25%     495.000000   9900.000000
50%     540.000000  10800.000000
75%     575.000000  11500.000000
max     620.000000  12400.000000


In [31]:
generate("data4.csv")

Generated code received, executing...
Data Summary:
       Days_Present  Performance_Rating
count     10.000000           10.000000
mean     213.500000            4.430000
std        7.472171            0.176698
min      200.000000            4.200000
25%      210.000000            4.300000
50%      215.000000            4.450000
75%      218.750000            4.575000
max      225.000000            4.700000


In [14]:
generate("data5.csv")

Generated code received, executing...
```python
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Load the CSV file into a Pandas DataFrame
df = pd.read_csv('data5.csv')

# 1. Data Summary
print("Data Summary:")
print(df.describe())

# 2. Visualizations
# Identify relevant columns for visualization based on data types
numeric_cols = df.select_dtypes(include=['number']).columns
categorical_cols = df.select_dtypes(include=['object']).columns
time_cols = df.select_dtypes(include=['datetime64[ns]']).columns

# Bar chart comparing values across categories
if len(categorical_cols) > 0:
    fig = px.bar(df, x=categorical_cols[0], y=numeric_cols[0], color=categorical_cols[0],
                 title="Comparison of Values Across Categories", labels={"x": categorical_cols[0], "y": numeric_cols[0]},
                 color_discrete_sequence=px.colors.qualitative.Pastel)
    fig.update_traces(hovertemplate='Category: %{x}<br>Value: %{y}')
    fig.show()

# Line cha

Runtime error during code execution:
Traceback (most recent call last):
  File "/var/folders/qf/r9wbsqpn1rn0s_sr9p5s4f1m0000gn/T/ipykernel_44188/1012904182.py", line 14, in execute_code_from_response
    exec(code, globals_dict)
  File "<string>", line 49, in <module>
  File "/Users/iassn0rma1/Library/CloudStorage/GoogleDrive-amanbarthwal0110@gmail.com/My Drive/Programming/Projects/WG/.venv/lib/python3.12/site-packages/plotly/graph_objs/_figure.py", line 792, in update_layout
    return super(Figure, self).update_layout(dict1, overwrite, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/iassn0rma1/Library/CloudStorage/GoogleDrive-amanbarthwal0110@gmail.com/My Drive/Programming/Projects/WG/.venv/lib/python3.12/site-packages/plotly/basedatatypes.py", line 1391, in update_layout
    self.layout.update(dict1, overwrite=overwrite, **kwargs)
  File "/Users/iassn0rma1/Library/CloudStorage/GoogleDrive-amanbarthwal0110@gmail.com/My Drive/Programmi

In [33]:
generate("data6.csv")

Generated code received, executing...
Data Summary:
        Feedback_Rating  Review_Length  Purchase_Count
count        10.000000      10.000000       10.000000
mean          4.390000     192.000000        3.000000
std           0.366515      26.583203        1.490712
min           3.800000     150.000000        1.000000
25%           4.125000     172.500000        2.000000
50%           4.400000     195.000000        3.000000
75%           4.675000     210.000000        4.000000
max           4.900000     230.000000        5.000000


In [34]:
generate("data7.csv")

Generated code received, executing...
       Temperature(°C)  Humidity(%)  Rainfall(mm)
count          10.0000    10.000000     10.000000
mean            9.0000    51.200000     33.700000
std            10.1653    15.690053     37.160463
min            -5.0000    28.000000      1.000000
25%            -0.7500    42.750000      4.250000
50%            13.0000    49.000000      9.500000
75%            15.7500    64.250000     73.250000
max            22.0000    72.000000     85.000000
