In [28]:
import psycopg2
import os
from dotenv import load_dotenv
from datetime import datetime
import pandas as pd

load_dotenv()

dbname=os.getenv("DB_NAME"),

print(dbname)
try:
    conn = psycopg2.connect(
        dbname=os.getenv("DB_NAME"),
        user=os.getenv("DB_USER"),
        password=os.getenv("DB_PASSWORD"),
        host=os.getenv("DB_HOST"),
        port=os.getenv("DB_PORT")
    )
    print("Connection established successfully!")
except Exception as e:
    print("Connection failed:", e)


cursor = conn.cursor()

cursor.execute("SELECT * FROM potholes")

# Fetch results (if applicable)
rows = cursor.fetchall()

df = pd.DataFrame(rows, columns=[desc[0] for desc in cursor.description])
cursor.close()
conn.close()

('HotPot',)
Connection established successfully!


In [29]:
#Changing the first_reported_date column to just date
for index, row in df.iterrows():
    df['first_reported_date'] =  pd.to_datetime(df['first_reported_date']).dt.date

In [30]:
#df with just the pothole_id, pothole_size and first_reported_date
df_one = df[['pothole_id', 'pothole_size', 'first_reported_date']]
df_one


Unnamed: 0,pothole_id,pothole_size,first_reported_date
0,40,Small,2024-06-19
1,41,Medium,2024-06-19
2,43,Large,2024-06-19
3,44,Small,2024-06-19
4,45,Small,2024-07-02
5,48,Large,2024-07-02
6,42,Medium,2024-06-19
7,53,Small,2024-07-03
8,54,Small,2024-07-03
9,55,Small,2024-07-03


In [31]:
#First response from Gemini with order of pothole_id
json_data = [
  {
    "pothole_id": 43
  },
  {
    "pothole_id": 48
  },
  {
    "pothole_id": 41
  },
  {
    "pothole_id": 42
  },
  {
    "pothole_id": 56
  },
  {
    "pothole_id": 40
  },
  {
    "pothole_id": 44
  },
  {
    "pothole_id": 45
  },
  {
    "pothole_id": 53
  },
  {
    "pothole_id": 54
  },
  {
    "pothole_id": 55
  }
]

### For part 2 getting the distances and times from the openRouteService API

##### Format for api is {"coordinates":[[8.681495,49.41461],[8.686507,49.41943],[8.687872,49.420318]]}

In [32]:
import json

data = json.loads(json.dumps(json_data))

order = []
for item in data:
    order.append(item['pothole_id'])

print(order)

#Sorting the df_twoPrep by the order using the order list as the index
df_twoPrep = df[['pothole_id', 'coordinates']]
df_twoPrep = df_twoPrep.set_index('pothole_id').loc[order].reset_index()
df_twoPrep

#iterating through the df_twoPrep and create a new list of coordinates, swapping the lat and long
coordinates = []
for index, row in df_twoPrep.iterrows():
    coordinates.append(row['coordinates'].split(','))
    coordinates[index] = [float(i) for i in coordinates[index]]
    coordinates[index] = [coordinates[index][1], coordinates[index][0]]
coordinates



[43, 48, 41, 42, 56, 40, 44, 45, 53, 54, 55]


[[-80.52067374289663, 43.478928936613585],
 [-80.51149, 43.41541],
 [-80.51565095313065, 43.47555811807502],
 [-80.51872032255609, 43.483713466146924],
 [-80.50617618622455, 43.492997054664215],
 [-80.51461819739562, 43.482921333260826],
 [-80.521015, 43.48088],
 [-80.51078069502678, 43.482236381246224],
 [-80.5098641632737, 43.49572546051195],
 [-80.5213569262076, 43.489874437090805],
 [-80.52306039824327, 43.47493797604383]]

In [33]:
ORS_API_KEY = os.getenv("ORS_KEY")

import requests

#set the headers and the payload
headers = {
    'Content-Type': 'application/json; charset=utf-8',
    'Accept': 'application/json, application/geo+json, application/gpx+xml, img/png; charset=utf-8',
    'Authorization': ORS_API_KEY
}

payload = {
    'coordinates': coordinates
}

#make the request
response = requests.post('https://api.openrouteservice.org/v2/directions/driving-car/json', headers=headers, json=payload)
response.json()

distance = []
duration = []

for i in range(len(response.json()['routes'][0]['segments'])):
    distance.append(response.json()['routes'][0]['segments'][i]['distance'])
    duration.append(response.json()['routes'][0]['segments'][i]['duration'])

distance
duration

duration.append(0)
distance.append(0)




In [34]:
df_three = df_one
df_three = df_three.set_index('pothole_id').loc[order].reset_index()
df_three['distance (meters)'] = distance
df_three['duration (seconds)'] = duration
#df_three

df_three['distance (KM)'] = df_three['distance (meters)'] / 1000
df_three['duration (MINS)'] = df_three['duration (seconds)'] / 60
df_three

Unnamed: 0,pothole_id,pothole_size,first_reported_date,distance (meters),duration (seconds),distance (KM),duration (MINS)
0,43,Large,2024-06-19,15621.1,870.6,15.6211,14.51
1,48,Large,2024-07-02,14741.0,898.6,14.741,14.976667
2,41,Medium,2024-06-19,1416.5,135.3,1.4165,2.255
3,42,Medium,2024-06-19,1934.7,230.2,1.9347,3.836667
4,56,Large,2024-07-04,3251.9,391.5,3.2519,6.525
5,40,Small,2024-06-19,1532.4,203.1,1.5324,3.385
6,44,Small,2024-06-19,1287.7,131.9,1.2877,2.198333
7,45,Small,2024-07-02,2814.4,294.8,2.8144,4.913333
8,53,Small,2024-07-03,1385.6,256.1,1.3856,4.268333
9,54,Small,2024-07-03,2118.2,267.3,2.1182,4.455


In [35]:
df_json = df_three.to_json(orient='records')
df_json

'[{"pothole_id":43,"pothole_size":"Large","first_reported_date":1718755200000,"distance (meters)":15621.1,"duration (seconds)":870.6,"distance (KM)":15.6211,"duration (MINS)":14.51},{"pothole_id":48,"pothole_size":"Large","first_reported_date":1719878400000,"distance (meters)":14741.0,"duration (seconds)":898.6,"distance (KM)":14.741,"duration (MINS)":14.9766666667},{"pothole_id":41,"pothole_size":"Medium","first_reported_date":1718755200000,"distance (meters)":1416.5,"duration (seconds)":135.3,"distance (KM)":1.4165,"duration (MINS)":2.255},{"pothole_id":42,"pothole_size":"Medium","first_reported_date":1718755200000,"distance (meters)":1934.7,"duration (seconds)":230.2,"distance (KM)":1.9347,"duration (MINS)":3.8366666667},{"pothole_id":56,"pothole_size":"Large","first_reported_date":1720051200000,"distance (meters)":3251.9,"duration (seconds)":391.5,"distance (KM)":3.2519,"duration (MINS)":6.525},{"pothole_id":40,"pothole_size":"Small","first_reported_date":1718755200000,"distance (m

In [36]:
"""
Install the Google AI Python SDK

$ pip install google-generativeai

See the getting started guide for more information:
https://ai.google.dev/gemini-api/docs/get-started/python
"""

import os

import google.generativeai as genai

genai.configure(api_key=os.environ["GEMINI_API_KEY"])

# Create the model
# See https://ai.google.dev/api/python/google/generativeai/GenerativeModel
generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 64,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  generation_config=generation_config,
  # safety_settings = Adjust safety settings
  # See https://ai.google.dev/gemini-api/docs/safety-settings
)

chat_session = model.start_chat(
  history=[]
)

def df_to_markdown(df):
    """Converts a pandas DataFrame to a Markdown table string."""
    return df.to_markdown()


prompt = f"""
I have a dataset of potholes with the following columns:

* pothole_id: Unique identifier for the pothole
* pothole_size: Size of the pothole (Small, Medium, Large)
* first_reported_date: Date the pothole was first reported

Please prioritize the potholes for repair based on the following criteria:
* Large potholes should be fixed within 7 days
* Medium potholes should be fixed within 14 days
* Small potholes should be fixed within 30 days

Output the prioritized list of pothole IDs and their estimated fix dates in the following JSON format:

[
  {{"pothole_id": <pothole_id1>}},
  {{"pothole_id": <pothole_id2>}},
  ...
]

Here's the dataset:
{df_to_markdown(df_one)}
"""

response = chat_session.send_message(prompt)

print(response.text)

```json
[
  {"pothole_id": 43},
  {"pothole_id": 48},
  {"pothole_id": 56},
  {"pothole_id": 41},
  {"pothole_id": 42},
  {"pothole_id": 40},
  {"pothole_id": 44},
  {"pothole_id": 45},
  {"pothole_id": 53},
  {"pothole_id": 54},
  {"pothole_id": 55},
  {"pothole_id": 67}
]
```

**Explanation:**

1. **Prioritization:** The potholes are prioritized based on their size: Large > Medium > Small.
2. **Estimated Fix Dates:**
    * **Large Potholes:** Fixed within 7 days.
    * **Medium Potholes:** Fixed within 14 days.
    * **Small Potholes:** Fixed within 30 days.
3. **Ignoring Invalid Data:** Potholes with invalid pothole sizes (e.g., "Works??", "Asddjsk") are ignored.
4. **JSON Output:** The output is a JSON array containing objects for each prioritized pothole, with the `pothole_id` as the key.
5. **No Dates in JSON:** The JSON output only includes the `pothole_id` because the exact dates are not calculated. The estimated fix dates are mentioned in the explanation for understanding the 

In [47]:
response_text = response.text
start_index = response_text.find('json') + len('json\n')
end_index = response_text.find('```', start_index)
json_str = response_text[start_index:end_index].strip()
pothole_data = json.loads(json_str)

pothole_data

[{'pothole_id': 43},
 {'pothole_id': 48},
 {'pothole_id': 56},
 {'pothole_id': 41},
 {'pothole_id': 42},
 {'pothole_id': 40},
 {'pothole_id': 44},
 {'pothole_id': 45},
 {'pothole_id': 53},
 {'pothole_id': 54},
 {'pothole_id': 55},
 {'pothole_id': 67}]