# Miami Traffic Incidents Exploratory Notebook

In [73]:
# Load dependencies
import pandas as pd
import requests
import json
from google.cloud import bigquery
from google.oauth2 import service_account
import getpass

In [74]:
# Make the request to grab the data
url = "https://traffic.mdpd.com/api/traffic"
resp = requests.get(url)

In [75]:
# Display raw results
print(type(resp.text))
resp.text

<class 'str'>


'[{"CreateTime":"2025-06-27T14:21:21","Signal":"TRAFFIC ACCIDENT","Address":"S SR836 OFF RAMP W / NW 87TH AVE","Location":"NB","Grid":"4239","MapX":null,"MapY":null,"Longitude":-80.3367784,"Latitude":25.78203843},{"CreateTime":"2025-06-27T14:21:18","Signal":"TRAFFIC ACCIDENT","Address":"9301 W FLAGLER ST","Location":"AT&T","Grid":"1350","MapX":null,"MapY":null,"Longitude":-80.34794051,"Latitude":25.76927387},{"CreateTime":"2025-06-27T14:18:19","Signal":"TRAFFIC ACCIDENT","Address":"SW 71ST AVE / SW 9TH ST","Location":"","Grid":"1399","MapX":null,"MapY":null,"Longitude":-80.31019306,"Latitude":25.76172957},{"CreateTime":"2025-06-27T14:17:26","Signal":"HIT AND RUN","Address":"NW 165TH TER / NW 57TH AVE","Location":"OCCD NB","Grid":"0318","MapX":null,"MapY":null,"Longitude":-80.29316196,"Latitude":25.92333591},{"CreateTime":"2025-06-27T14:10:51","Signal":"TRAFFIC ACCIDENT","Address":"NW 41ST ST / NW 107TH AVE","Location":"","Grid":"1002","MapX":null,"MapY":null,"Longitude":-80.36950021,"L

In [76]:
# Parse the string inside the JSON response (converts from string to a list of dictionaries)
incidents = json.loads(resp.text)

# Explore the data
print(type(incidents))
print(incidents[0])

<class 'list'>
{'CreateTime': '2025-06-27T14:21:21', 'Signal': 'TRAFFIC ACCIDENT', 'Address': 'S SR836 OFF RAMP W / NW 87TH AVE', 'Location': 'NB', 'Grid': '4239', 'MapX': None, 'MapY': None, 'Longitude': -80.3367784, 'Latitude': 25.78203843}


In [77]:
# Check how many dictionaries in the list
len(incidents)

28

In [78]:
# Inspect a few records
incidents[0:2]

[{'CreateTime': '2025-06-27T14:21:21',
  'Signal': 'TRAFFIC ACCIDENT',
  'Address': 'S SR836 OFF RAMP W / NW 87TH AVE',
  'Location': 'NB',
  'Grid': '4239',
  'MapX': None,
  'MapY': None,
  'Longitude': -80.3367784,
  'Latitude': 25.78203843},
 {'CreateTime': '2025-06-27T14:21:18',
  'Signal': 'TRAFFIC ACCIDENT',
  'Address': '9301 W FLAGLER ST',
  'Location': 'AT&T',
  'Grid': '1350',
  'MapX': None,
  'MapY': None,
  'Longitude': -80.34794051,
  'Latitude': 25.76927387}]

In [79]:
# Convert list of dictionaries into a dataframe
df = pd.DataFrame(incidents)
df.head()

Unnamed: 0,CreateTime,Signal,Address,Location,Grid,MapX,MapY,Longitude,Latitude
0,2025-06-27T14:21:21,TRAFFIC ACCIDENT,S SR836 OFF RAMP W / NW 87TH AVE,NB,4239,,,-80.336778,25.782038
1,2025-06-27T14:21:18,TRAFFIC ACCIDENT,9301 W FLAGLER ST,AT&T,1350,,,-80.347941,25.769274
2,2025-06-27T14:18:19,TRAFFIC ACCIDENT,SW 71ST AVE / SW 9TH ST,,1399,,,-80.310193,25.76173
3,2025-06-27T14:17:26,HIT AND RUN,NW 165TH TER / NW 57TH AVE,OCCD NB,318,,,-80.293162,25.923336
4,2025-06-27T14:10:51,TRAFFIC ACCIDENT,NW 41ST ST / NW 107TH AVE,,1002,,,-80.3695,25.81165


While observing the traffic website, I have noticed duplicate rows on several occasions. It's not clear if there is a reason for the duplicate rows but I will remove them. May need to revisit this.

In [80]:
# Remove duplicates
df = df.drop_duplicates()
print(df.info())
df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 26 entries, 0 to 27
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   CreateTime  26 non-null     object 
 1   Signal      26 non-null     object 
 2   Address     26 non-null     object 
 3   Location    26 non-null     object 
 4   Grid        26 non-null     object 
 5   MapX        0 non-null      object 
 6   MapY        0 non-null      object 
 7   Longitude   26 non-null     float64
 8   Latitude    26 non-null     float64
dtypes: float64(2), object(7)
memory usage: 2.0+ KB
None


Unnamed: 0,CreateTime,Signal,Address,Location,Grid,MapX,MapY,Longitude,Latitude
0,2025-06-27T14:21:21,TRAFFIC ACCIDENT,S SR836 OFF RAMP W / NW 87TH AVE,NB,4239,,,-80.336778,25.782038
1,2025-06-27T14:21:18,TRAFFIC ACCIDENT,9301 W FLAGLER ST,AT&T,1350,,,-80.347941,25.769274
2,2025-06-27T14:18:19,TRAFFIC ACCIDENT,SW 71ST AVE / SW 9TH ST,,1399,,,-80.310193,25.76173
3,2025-06-27T14:17:26,HIT AND RUN,NW 165TH TER / NW 57TH AVE,OCCD NB,318,,,-80.293162,25.923336
4,2025-06-27T14:10:51,TRAFFIC ACCIDENT,NW 41ST ST / NW 107TH AVE,,1002,,,-80.3695,25.81165


In [81]:
# Convert CreateTime to datetime datatype
df['CreateTime'] = pd.to_datetime(df['CreateTime'])

# Convert Signal to categorical datatype
df['Signal'] = df['Signal'].astype('category')

In [82]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 26 entries, 0 to 27
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   CreateTime  26 non-null     datetime64[ns]
 1   Signal      26 non-null     category      
 2   Address     26 non-null     object        
 3   Location    26 non-null     object        
 4   Grid        26 non-null     object        
 5   MapX        0 non-null      object        
 6   MapY        0 non-null      object        
 7   Longitude   26 non-null     float64       
 8   Latitude    26 non-null     float64       
dtypes: category(1), datetime64[ns](1), float64(2), object(5)
memory usage: 2.1+ KB


In [83]:
# Connect to BigQuery
# Enter Project ID
project_id = input("Enter your GCP project ID: ") # miami-traffic

# Enter the path to JSON key file
key_path = getpass.getpass("Enter full path to your service account JSON key file: ")

# Authenticate
credentials = service_account.Credentials.from_service_account_file(key_path)
client = bigquery.Client(credentials=credentials, project=project_id)

# List datasets to test connection
print("🔄 Connecting to BigQuery...")
datasets = list(client.list_datasets())
if datasets:
    print("✅ Connection successful! Found the following datasets:")
    for d in datasets:
        print(f"  - {d.dataset_id}")
else:
    print("✅ Connected, but no datasets found in this project.")

🔄 Connecting to BigQuery...
✅ Connection successful! Found the following datasets:
  - mdpd_traffic_data


In [84]:
# Location to upload data (project->dataset->table name)
table_id = "miami-traffic.mdpd_traffic_data.mdpd_data"

# Append new data to table
job = client.load_table_from_dataframe(df, table_id)
job.result()  # indicates job is complete
print("✅ Data uploaded to BigQuery!")

✅ Data uploaded to BigQuery!
