# 01 Data Ingestion
This notebook will ingest and save the data to a local data folder. While our climate data is already in CSV format, we will need to use APIs to ingest and save the accident and speed limit data.

### Imports & Setup

In [33]:
import pandas as pd
import requests
from sodapy import Socrata

client = Socrata("data.calgary.ca", None)



### Weather Data
This data was manually downloaded from climatedata.ca

In [2]:
weather = pd.read_csv("../data/climate-daily.csv")
weather.head()

Unnamed: 0,x,y,STATION_NAME,CLIMATE_IDENTIFIER,ID,LOCAL_DATE,PROVINCE_CODE,LOCAL_YEAR,LOCAL_MONTH,LOCAL_DAY,...,SPEED_MAX_GUST,SPEED_MAX_GUST_FLAG,COOLING_DEGREE_DAYS,COOLING_DEGREE_DAYS_FLAG,HEATING_DEGREE_DAYS,HEATING_DEGREE_DAYS_FLAG,MIN_REL_HUMIDITY,MIN_REL_HUMIDITY_FLAG,MAX_REL_HUMIDITY,MAX_REL_HUMIDITY_FLAG
0,-114.000297,51.109447,CALGARY INT'L CS,3031094,3031094.2016.3.1,2016-03-01 00:00:00,AB,2016,3,1,...,35.0,,0.0,,18.6,,,,,
1,-114.000297,51.109447,CALGARY INT'L CS,3031094,3031094.2016.3.2,2016-03-02 00:00:00,AB,2016,3,2,...,33.0,,0.0,,19.3,,,,,
2,-114.000297,51.109447,CALGARY INT'L CS,3031094,3031094.2016.3.3,2016-03-03 00:00:00,AB,2016,3,3,...,57.0,,0.0,,14.0,,,,,
3,-114.000297,51.109447,CALGARY INT'L CS,3031094,3031094.2016.3.4,2016-03-04 00:00:00,AB,2016,3,4,...,0.0,,0.0,,14.2,,,,,
4,-114.000297,51.109447,CALGARY INT'L CS,3031094,3031094.2016.3.5,2016-03-05 00:00:00,AB,2016,3,5,...,57.0,,0.0,,11.4,,,,,


### Speed Limit Data

In [None]:
# Get data from Socrata api endpoint
speeds_raw = client.get("2bwu-t32v", limit=3700)

# Convert to pandas DataFrame
speeds_df = pd.DataFrame.from_records(speeds_raw)

In [None]:
# Display preview of data
speeds_df.head()

Unnamed: 0,bound,distance,speed,street_name,modified_attribute_dt,multiline,created_dt
0,N/S,4.18265009,60,TWELVE MILE COULEE RD,2012-08-20T14:38:13.000Z,"{'type': 'MultiLineString', 'coordinates': [[[...",
1,E,1.85402,60,JOHN LAURIE BV,,"{'type': 'MultiLineString', 'coordinates': [[[...",
2,W,1.5454200500000002,60,JOHN LAURIE BV,,"{'type': 'MultiLineString', 'coordinates': [[[...",
3,W,0.47580501,60,CROWCHILD TR,,"{'type': 'MultiLineString', 'coordinates': [[[...",
4,N/S,0.82496297,60,11 ST,,"{'type': 'MultiLineString', 'coordinates': [[[...",


In [None]:
# Save data to our data folder
speeds_df.to_csv('../data/speed_limits.csv')

### Traffic Accident Data

In [47]:
traffic_raw = client.get("35ra-9556", limit=50000)

# Convert to pandas DataFrame
traffic_df = pd.DataFrame.from_records(traffic_raw)


In [48]:
# View first five rows of our data
traffic_df.head()

Unnamed: 0,incident_info,description,start_dt,modified_dt,quadrant,longitude,latitude,count,id,point,:@computed_region_kxmf_bzkv,:@computed_region_4a3i_ccfj,:@computed_region_4b54_tmc4
0,Westbound Stoney Trail after Sarcee Trail NW,Two vehicle incident. Blocking the left lane,2026-01-07T14:53:09.000,2026-01-07T07:56:40.000,NW,-114.1703639573997,51.15181499196338,1,2026-01-07T14:53:0951.15181499196338-114.17036...,"{'type': 'Point', 'coordinates': [-114.1703639...",202,2,3
1,Southbound Deerfoot Trail approaching country...,Two vehicle incident. Blocking the left lane,2026-01-07T14:46:49.000,2026-01-07T07:56:40.000,NE,-114.01057928402363,51.15948611863427,1,2026-01-07T14:46:4951.15948611863427-114.01057...,"{'type': 'Point', 'coordinates': [-114.0105792...",163,4,11
2,Eastbound Glenmore Trail after Blackfoot Trai...,Two vehicle incident. Blocking the centre lane,2026-01-07T14:33:52.000,2026-01-07T07:56:40.000,SE,-114.04798785240116,50.99433858658668,1,2026-01-07T14:33:5250.99433858658668-114.04798...,"{'type': 'Point', 'coordinates': [-114.0479878...",1,3,10
3,Eastbound Mcknight Blvd at Deerfoot Trail NE,Two vehicle incident. Blocking the left lane,2026-01-07T13:59:21.000,2026-01-07T07:56:40.000,NE,-114.03982359234142,51.096056352608926,1,2026-01-07T13:59:2151.096056352608926-114.0398...,"{'type': 'Point', 'coordinates': [-114.0398235...",81,4,9
4,Northbound Harvest Hills Boulevard ramp to Ea...,Traffic incident. Blocking the right shoulder,2026-01-07T04:56:28.000,2026-01-06T22:00:34.000,NE,-114.06700224840873,51.17262999470791,1,2026-01-07T04:56:2851.17262999470791-114.06700...,"{'type': 'Point', 'coordinates': [-114.0670022...",251,4,11


In [49]:
# Save our data as a CSV
traffic_df.to_csv("../data/traffic_accidents.csv")