# Miami Traffic Incidents Exploratory Notebook

In [None]:
# Load dependencies
import pandas as pd
import requests
import json
from google.cloud import bigquery
from google.oauth2 import service_account
import getpass
import pytz

In [2]:
# Make the request to grab the data
url = "https://traffic.mdpd.com/api/traffic"
resp = requests.get(url)

In [3]:
# Display raw results
print(type(resp.text))
resp.text

<class 'str'>


'[{"CreateTime":"2025-07-18T15:58:25","Signal":"TRAFFIC ACCIDENT","Address":"SW 144TH CT / SW 56TH ST","Location":"","Grid":"1623","MapX":null,"MapY":null,"Longitude":-80.42789976,"Latitude":25.71399463},{"CreateTime":"2025-07-18T15:50:45","Signal":"TRAFFIC ACCIDENT","Address":"14700 SW 26TH ST","Location":"WALMART : LAT: <25.742604>  LONG: <-80.432621>","Grid":"1472","MapX":null,"MapY":null,"Longitude":-80.43262095,"Latitude":25.74260378},{"CreateTime":"2025-07-18T15:50:36","Signal":"TRAFFIC ACCIDENT","Address":"SW 72ND AVE / SW 47TH ST","Location":"","Grid":"1595","MapX":null,"MapY":null,"Longitude":-80.31073663,"Latitude":25.72637295},{"CreateTime":"2025-07-18T15:49:20","Signal":"TRAFFIC ACCIDENT WITH INJURIES","Address":"7401 NW 73RD ST","Location":"WENDYS P/LOT","Grid":"0849","MapX":null,"MapY":null,"Longitude":-80.3186518,"Latitude":25.84005843},{"CreateTime":"2025-07-18T15:46:22","Signal":"TRAFFIC ACCIDENT","Address":"7875 NW 12TH ST","Location":"S FL CENTER FOR BEHAVIORAL HEALT

In [4]:
# Parse the string inside the JSON response (converts from string to a list of dictionaries)
incidents = json.loads(resp.text)

# Explore the data
print(type(incidents))
print(incidents[0])

<class 'list'>
{'CreateTime': '2025-07-18T15:58:25', 'Signal': 'TRAFFIC ACCIDENT', 'Address': 'SW 144TH CT / SW 56TH ST', 'Location': '', 'Grid': '1623', 'MapX': None, 'MapY': None, 'Longitude': -80.42789976, 'Latitude': 25.71399463}


In [5]:
# Check how many dictionaries in the list
len(incidents)

29

In [6]:
# Inspect a few records
incidents[0:2]

[{'CreateTime': '2025-07-18T15:58:25',
  'Signal': 'TRAFFIC ACCIDENT',
  'Address': 'SW 144TH CT / SW 56TH ST',
  'Location': '',
  'Grid': '1623',
  'MapX': None,
  'MapY': None,
  'Longitude': -80.42789976,
  'Latitude': 25.71399463},
 {'CreateTime': '2025-07-18T15:50:45',
  'Signal': 'TRAFFIC ACCIDENT',
  'Address': '14700 SW 26TH ST',
  'Location': 'WALMART : LAT: <25.742604>  LONG: <-80.432621>',
  'Grid': '1472',
  'MapX': None,
  'MapY': None,
  'Longitude': -80.43262095,
  'Latitude': 25.74260378}]

In [7]:
# Convert list of dictionaries into a dataframe
df = pd.DataFrame(incidents)
df.head()

Unnamed: 0,CreateTime,Signal,Address,Location,Grid,MapX,MapY,Longitude,Latitude
0,2025-07-18T15:58:25,TRAFFIC ACCIDENT,SW 144TH CT / SW 56TH ST,,1623,,,-80.4279,25.713995
1,2025-07-18T15:50:45,TRAFFIC ACCIDENT,14700 SW 26TH ST,WALMART : LAT: <25.742604> LONG: <-80.432621>,1472,,,-80.432621,25.742604
2,2025-07-18T15:50:36,TRAFFIC ACCIDENT,SW 72ND AVE / SW 47TH ST,,1595,,,-80.310737,25.726373
3,2025-07-18T15:49:20,TRAFFIC ACCIDENT WITH INJURIES,7401 NW 73RD ST,WENDYS P/LOT,849,,,-80.318652,25.840058
4,2025-07-18T15:46:22,TRAFFIC ACCIDENT,7875 NW 12TH ST,S FL CENTER FOR BEHAVIORAL HEALTH,1210,,,-80.324675,25.782967


While observing the traffic website, I have noticed duplicate rows on several occasions. It's not clear if there is a reason for the duplicate rows but I will remove them. May need to revisit this.

In [8]:
# Remove duplicates
df = df.drop_duplicates()
print(df.info())
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29 entries, 0 to 28
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   CreateTime  29 non-null     object 
 1   Signal      29 non-null     object 
 2   Address     29 non-null     object 
 3   Location    29 non-null     object 
 4   Grid        29 non-null     object 
 5   MapX        0 non-null      object 
 6   MapY        0 non-null      object 
 7   Longitude   29 non-null     float64
 8   Latitude    29 non-null     float64
dtypes: float64(2), object(7)
memory usage: 2.2+ KB
None


Unnamed: 0,CreateTime,Signal,Address,Location,Grid,MapX,MapY,Longitude,Latitude
0,2025-07-18T15:58:25,TRAFFIC ACCIDENT,SW 144TH CT / SW 56TH ST,,1623,,,-80.4279,25.713995
1,2025-07-18T15:50:45,TRAFFIC ACCIDENT,14700 SW 26TH ST,WALMART : LAT: <25.742604> LONG: <-80.432621>,1472,,,-80.432621,25.742604
2,2025-07-18T15:50:36,TRAFFIC ACCIDENT,SW 72ND AVE / SW 47TH ST,,1595,,,-80.310737,25.726373
3,2025-07-18T15:49:20,TRAFFIC ACCIDENT WITH INJURIES,7401 NW 73RD ST,WENDYS P/LOT,849,,,-80.318652,25.840058
4,2025-07-18T15:46:22,TRAFFIC ACCIDENT,7875 NW 12TH ST,S FL CENTER FOR BEHAVIORAL HEALTH,1210,,,-80.324675,25.782967


In [15]:

# Convert to Eastern Time, then to UTC
df['CreateTime'] = pd.to_datetime(df['CreateTime']).dt.tz_localize('US/Eastern')
df['CreateTime'] = df['CreateTime'].dt.tz_convert('UTC')

# Convert Signal to categorical datatype
df['Signal'] = df['Signal'].astype('category')

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29 entries, 0 to 28
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype              
---  ------      --------------  -----              
 0   CreateTime  29 non-null     datetime64[ns, UTC]
 1   Signal      29 non-null     category           
 2   Address     29 non-null     object             
 3   Location    29 non-null     object             
 4   Grid        29 non-null     object             
 5   MapX        0 non-null      object             
 6   MapY        0 non-null      object             
 7   Longitude   29 non-null     float64            
 8   Latitude    29 non-null     float64            
dtypes: category(1), datetime64[ns, UTC](1), float64(2), object(5)
memory usage: 2.2+ KB


In [11]:
# Connect to BigQuery
# Enter Project ID
project_id = input("Enter your GCP project ID: ") # miami-traffic

# Enter the path to JSON key file
key_path = getpass.getpass("Enter full path to your service account JSON key file: ")

# Authenticate
credentials = service_account.Credentials.from_service_account_file(key_path)
client = bigquery.Client(credentials=credentials, project=project_id)

# List datasets to test connection
print("🔄 Connecting to BigQuery...")
datasets = list(client.list_datasets())
if datasets:
    print("✅ Connection successful! Found the following datasets:")
    for d in datasets:
        print(f"  - {d.dataset_id}")
else:
    print("✅ Connected, but no datasets found in this project.")

🔄 Connecting to BigQuery...
✅ Connection successful! Found the following datasets:
  - mdpd_traffic_data


In [14]:
# Location to upload data (project->dataset->table name)
table_id = "miami-traffic.mdpd_traffic_data.mdpd_data"

# Append new data to table
job = client.load_table_from_dataframe(df, table_id)
job.result()  # indicates job is complete
print("✅ Data uploaded to BigQuery!")



✅ Data uploaded to BigQuery!
