To run a Gemini instance using VertexAI and generate the graph code: 

Install Vertex AI SDK and other required packages

In [None]:
%pip install --upgrade --user --quiet google-cloud-aiplatform

Restart the Runtime 

In [None]:
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

If Using Google Colab Run this step 

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

In [None]:
PROJECT_ID = ""  # @param {type:"string"}
LOCATION = ""  # @param {type:"string"}

import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION)

from vertexai.generative_models import GenerationConfig, GenerativeModel
import time

Load model and model calling and sending message functionality 

In [None]:
import time

def call_gemini(prompt, generation_config=GenerationConfig(temperature=1.0)):
    wait_time = 1
    while True:
        try:
            response = model.generate_content(prompt, generation_config=generation_config).text
            return response
            break  # Exit the loop if successful
        except Exception as e:  # Replace with the actual exception type
            time.sleep(wait_time)
            wait_time *= 2  # Double the wait time

def send_message_gemini(model, prompt):    
    wait_time = 1
    while True:
        try:
            response = model.send_message(prompt).text
            return response
            break  # Exit the loop if successful
        except Exception as e:  # Replace with the actual exception type
            time.sleep(wait_time)
            wait_time *= 2  # Double the wait time

Define the LLM system prompt and the prompt to pass 

In [None]:
import os 
model_gis = GenerativeModel(
    model_name="gemini-1.5-flash",
    system_instruction=[
        "Your role: A professional Geo-information scientist and programmer good at Python.", 
        " You have worked on Geographic information science more than 20 years, and know every detail and pitfall when processing spatial data and coding.",
        "Your programs are always concise and robust, considering the various data circumstances, such as map projections, column data types, and spatial joinings.",
        "You are also very experienced on generating maps",
    ],
)

chat = model_gis.start_chat()

prompt = r"""Your task: Generate a graph (data structure) only, whose nodes are (1) a series of consecutive steps and (2) data to solve this question:
 1) Find out Census tracts that contain hazardous waste facilities, then comppute and print out the population living in those tracts. The study area is North Carolina (NC), US.
2) Generate a population choropleth map for all tract polygons in NC, rendering the color by tract population; and then highlight the borders of tracts that have hazardous waste facilities. Please draw all polygons, not only the highlighted ones. The map size is 15*10 inches.

Your reply needs to meet these requirements:
 1. Think step by step.
2. Steps and data (both input and output) form a graph stored in NetworkX. Disconnected components are NOT allowed.
3. Each step is a data process operation: the input can be data paths or variables, and the output can be data paths or variables.
4. There are two types of nodes: a) operation node, and b) data node (both input and output data). These nodes are also input nodes for the next operation node.
5. The input of each operation is the output of the previous operations, except the those need to load data from a path or need to collect data.
6. You need to carefully name the output data node, making they human readable but not to long.
7. The data and operation form a graph.
8. The first operations are data loading or collection, and the output of the last operation is the final answer to the task.Operation nodes need to connect via output data nodes, DO NOT connect the operation node directly.
9. The node attributes include: 1) node_type (data or operation), 2) data_path (data node only, set to "" if not given ), and description. E.g., {‘name’: “County boundary”, “data_type”: “data”, “data_path”: “D:\Test\county.shp”,  “description”: “County boundary for the study area”}.
10. The connection between a node and an operation node is an edge.
11. Add all nodes and edges, including node attributes to a NetworkX instance, DO NOT change the attribute names.
12. DO NOT generate code to implement the steps.
13. Join the attribute to the vector layer via a common attribute if necessary.
14. Put your reply into a Python code block, NO explanation or conversation outside the code block(enclosed by ```python and ```).
15. Note that GraphML writer does not support class dict or list as data values.
16. You need spatial data (e.g., vector or raster) to make a map.
17. Do not put the GraphML writing process as a step in the graph.
18. Keep the graph concise, DO NOT use too many operation nodes.
19. Save the network into GraphML format, save it at: C:\Users\chait\Projects\LLM-Geo\Resident_at_risk_counting\Resident_at_risk_counting.graphml

Your reply example:
```python
import networkx as nx
G = nx.DiGraph()
# Add nodes and edges for the graph
# 1 Load hazardous waste site shapefile
G.add_node("haz_waste_shp_url", node_type="data", path="https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analysis/Hazardous_Waste_Sites.zip", description="Hazardous waste facility shapefile URL")
G.add_node("load_haz_waste_shp", node_type="operation", description="Load hazardous waste facility shapefile")
G.add_edge("haz_waste_shp_url", "load_haz_waste_shp")
G.add_node("haz_waste_gdf", node_type="data", description="Hazardous waste facility GeoDataFrame")
G.add_edge("load_haz_waste_shp", "haz_waste_gdf")
...
``` """

response = send_message_gemini(chat, prompt)

# Save the response to a file
output_file_path = "C:/Users/chait/Projects/VertinetikLLM/generated_files/graph_response_LLM.py"

# Ensure the directory exists
os.makedirs(os.path.dirname(output_file_path), exist_ok=True)

# Write the response to the file
with open(output_file_path, "w", encoding="utf-8") as file:
    file.write(response)

# Optional: Print the response to the console for verification
print("Response from LLM:")
print(response)

Execute the generated Graph Code :

In [None]:
    G = nx.DiGraph()
    # Add your graph nodes and edges here
     # 1 Load Census tract shapefile
    G.add_node("tract_shp_url", node_type="data", path="https://www2.census.gov/geo/tiger/TIGER2022/TRACT/tl_2022_37_tract.zip", description="Census tract shapefile URL")
    G.add_node("load_tract_shp", node_type="operation", description="Load Census tract shapefile")
    G.add_edge("tract_shp_url", "load_tract_shp")
    G.add_node("tract_gdf", node_type="data", description="Census tract GeoDataFrame")
    G.add_edge("load_tract_shp", "tract_gdf")

    # 2 Load hazardous waste site shapefile
    G.add_node("haz_waste_shp_url", node_type="data", path="https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analysis/Hazardous_Waste_Sites.zip", description="Hazardous waste facility shapefile URL")
    G.add_node("load_haz_waste_shp", node_type="operation", description="Load hazardous waste facility shapefile")
    G.add_edge("haz_waste_shp_url", "load_haz_waste_shp")
    G.add_node("haz_waste_gdf", node_type="data", description="Hazardous waste facility GeoDataFrame")
    G.add_edge("load_haz_waste_shp", "haz_waste_gdf")

    # 3 Reproject hazardous waste site shapefile to the same CRS as census tract shapefile
    G.add_node("reproj_haz_waste_gdf", node_type="operation", description="Reproject hazardous waste facility GeoDataFrame to the same CRS as census tract GeoDataFrame")
    G.add_edge("haz_waste_gdf", "reproj_haz_waste_gdf")
    G.add_edge("tract_gdf", "reproj_haz_waste_gdf")
    G.add_node("reproj_haz_waste_gdf", node_type="data", description="Reprojected hazardous waste facility GeoDataFrame")
    G.add_edge("reproj_haz_waste_gdf", "reproj_haz_waste_gdf")

    # 4 Spatial join tract and hazardous waste site polygons
    G.add_node("tract_haz_join_gdf", node_type="operation", description="Spatial join census tract and hazardous waste facility GeoDataFrames")
    G.add_edge("tract_gdf", "tract_haz_join_gdf")
    G.add_edge("reproj_haz_waste_gdf", "tract_haz_join_gdf")
    G.add_node("tract_haz_join_gdf", node_type="data", description="Joined GeoDataFrame")
    G.add_edge("tract_haz_join_gdf", "tract_haz_join_gdf")

    # 5 Filter census tracts that contain hazardous waste facilities
    G.add_node("filter_tract_haz_gdf", node_type="operation", description="Filter the joined GeoDataFrame to find tracts with hazardous waste facilities")
    G.add_edge("tract_haz_join_gdf", "filter_tract_haz_gdf")
    G.add_node("filter_tract_haz_gdf", node_type="data", description="Filtered census tract GeoDataFrame")
    G.add_edge("filter_tract_haz_gdf", "filter_tract_haz_gdf")

    # 6 Calculate population for tracts containing hazardous waste facilities
    G.add_node("tract_pop_list", node_type="operation", description="Calculate population for tracts containing hazardous waste facilities")
    G.add_edge("filter_tract_haz_gdf", "tract_pop_list")
    G.add_node("tract_pop_list", node_type="data", description="List of population for tracts containing hazardous waste facilities")
    G.add_edge("tract_pop_list", "tract_pop_list")

    # 7 Print population data for tracts containing hazardous waste facilities
    G.add_node("print_tract_pop", node_type="operation", description="Print population data for tracts containing hazardous waste facilities")
    G.add_edge("tract_pop_list", "print_tract_pop")

    # 8 Generate choropleth map for all tracts in NC, with tract population as color
    G.add_node("tract_pop_map", node_type="operation", description="Generate choropleth map for all tracts in NC, with tract population as color")
    G.add_edge("tract_gdf", "tract_pop_map")
    G.add_node("tract_pop_map", node_type="data", description="Population choropleth map")
    G.add_edge("tract_pop_map", "tract_pop_map")

    # 9 Highlight boundaries of tracts containing hazardous waste facilities
    G.add_node("highlight_tract_boundary", node_type="operation", description="Highlight boundaries of tracts containing hazardous waste facilities")
    G.add_edge("filter_tract_haz_gdf", "highlight_tract_boundary")
    G.add_edge("tract_pop_map", "highlight_tract_boundary")
    G.add_node("highlight_tract_boundary", node_type="data", description="Highlighted boundary choropleth map")
    G.add_edge("highlight_tract_boundary", "highlight_tract_boundary")   

In [None]:
import networkx as nx
from google.cloud import aiplatform
from vertexai.preview.generative_models import GenerativeModel
from typing import Dict

class GraphToFunctions:
    def __init__(self, project_id: str, location: str):
        """
        Initialize the GraphToFunctions generator.
        """
        aiplatform.init(project=project_id, location=location)
        self.model = GenerativeModel("gemini-1.5-pro")
        
    def _create_function_prompt(self, node: str, graph: nx.DiGraph) -> str:
        """
        Create a prompt for generating a Python function based on node information.
        """
        node_data = graph.nodes[node]
        predecessors = list(graph.predecessors(node))
        successors = list(graph.successors(node))
        
        prompt = f"""Write a complete Python function for the following GIS operation:

Task: {node_data.get('description', 'No description')}

Function Specifications:
- Name: {node}
- Input Parameters: 
{['- ' + graph.nodes[pred].get('description', 'Unknown') for pred in predecessors]}
- Output: {[graph.nodes[succ].get('description', 'Unknown') for succ in successors]}

Requirements:
1. Use geopandas for GIS operations
2. Include proper error handling
3. Add type hints for parameters and return values
4. Include a detailed docstring
5. Return appropriate data structures (GeoDataFrame, etc.)

Generate only the complete Python function code.
"""
        return prompt

    def _get_operation_nodes(self, graph: nx.DiGraph) -> list:
        """Get all operation nodes from the graph."""
        operation_nodes = [node for node, attr in graph.nodes(data=True) 
                         if attr.get('node_type') == 'operation']
        print(f"Found {len(operation_nodes)} operation nodes: {operation_nodes}")
        return operation_nodes

    def generate_and_print_functions(self, graph: nx.DiGraph):
        """
        Generate and print Python functions for all operation nodes in the graph.
        """
        print("Starting function generation...")
        print("\nAnalyzing graph structure:")
        print(f"Total nodes: {graph.number_of_nodes()}")
        print(f"Total edges: {graph.number_of_edges()}")
        
        operation_nodes = self._get_operation_nodes(graph)
        if not operation_nodes:
            print("No operation nodes found in the graph!")
            return
            
        print("\n# Required imports")
        print("import geopandas as gpd")
        print("import pandas as pd")
        print("import matplotlib.pyplot as plt")
        print("from typing import List, Optional, Union\n")
        
        for node in operation_nodes:
            try:
                print(f"\n# Generating function for: {node}")
                print(f"# Description: {graph.nodes[node].get('description', '')}")
                
                prompt = self._create_function_prompt(node, graph)
                print("\nUsing prompt:")
                print(prompt)
                
                response = self.model.generate_content(
                    prompt,
                    generation_config={
                        "temperature": 0.2,
                        "top_p": 0.8,
                        "top_k": 40,
                        "max_output_tokens": 2048,
                    }
                )
                
                if response and response.text:
                    print("\nGenerated function:")
                    print(response.text)
                else:
                    print(f"Warning: No response generated for {node}")
                    
                print("\n" + "="*80 + "\n")
                
            except Exception as e:
                print(f"Error generating function for {node}: {str(e)}")

# Example usage
if __name__ == "__main__":
    # Test with your graph
    # G = nx.DiGraph()
       
    generator = GraphToFunctions(
        project_id="qwiklabs-gcp-00-d4c3eb807729",
        location="us-west1"
    )
    
    generator.generate_and_print_functions(G)