# Group 12 Guided Project
## 1. Introduction

## 2. Data Collection & Preprocessing

In [1]:
import requests
import pandas as pd
import time
import backoff

class RateLimitError(Exception):
    pass

# create a function to call API with exception handling and retries
@backoff.on_exception(backoff.expo, RateLimitError)
def call_API(url, retries = 3, delay = 1):
    for retry in range(retries):
        try:
            api_response = requests.get(url)
            api_response.raise_for_status()
            api_response_json = api_response.json()  
            return api_response_json 
        except requests.RequestException as e:
            print(f"Attempt {retry + 1} failed due to: {e}")
            if retry < retries - 1: 
                time.sleep(delay)  
            else:
                print("Retried for three times, now give up")
                return None
            


In [2]:
from datetime import datetime, timedelta

# get PSI data for given date
def get_psi_data_for_date(date):
    url = f"https://api-open.data.gov.sg/v2/real-time/api/psi?date={date}"
    data = call_API(url)
    print(data)
    if 'data' in data and 'items' in data['data'] and len(data['data']['items']) > 0:
        psi_readings = data['data']['items'][0]['readings']
        region_metadata = data['data']['regionMetadata']
        return psi_readings, region_metadata

# retrieve PSI data for target date
def get_psi_data_for_target_date():
    
    # give time range date
    start_date = datetime(2023, 1, 14)
    end_date = datetime(2023, 1, 18)
    
    all_psi_data = []
    all_region_metadata = []

    current_date = start_date
    while current_date < end_date:
        date_str = current_date.strftime('%Y-%m-%d')
        
        psi_readings, region_metadata = get_psi_data_for_date(date_str)
        
        if psi_readings and region_metadata:
            # store the PSI and all other reading types for all regions
            for key, region_data in psi_readings.items():
                for region, reading_value in region_data.items():
                    all_psi_data.append({
                        'region': region,
                        'date': date_str,
                        'reading_type': key,  
                        'reading_value': reading_value
                    })
            
            # store region info
            for region in region_metadata:
                all_region_metadata.append({
                    'region': region['name'],
                    'latitude': region['labelLocation']['latitude'],
                    'longitude': region['labelLocation']['longitude']
                })
        
        current_date += timedelta(days=1)

    psi_df = pd.DataFrame(all_psi_data)
    region_df = pd.DataFrame(all_region_metadata)
    
    return psi_df, region_df

psi_df, region_df = get_psi_data_for_target_date()

print(psi_df.head())
print(region_df.head())

{'code': 0, 'data': {'regionMetadata': [{'name': 'west', 'labelLocation': {'latitude': 1.35735, 'longitude': 103.7}}, {'name': 'national', 'labelLocation': {'latitude': 0, 'longitude': 0}}, {'name': 'east', 'labelLocation': {'latitude': 1.35735, 'longitude': 103.94}}, {'name': 'central', 'labelLocation': {'latitude': 1.35735, 'longitude': 103.82}}, {'name': 'south', 'labelLocation': {'latitude': 1.29587, 'longitude': 103.82}}, {'name': 'north', 'labelLocation': {'latitude': 1.41803, 'longitude': 103.82}}], 'items': [{'date': '2023-01-14', 'updatedTimestamp': '2024-07-18T11:29:50+08:00', 'timestamp': '2023-01-14T23:00:00+08:00', 'readings': {'o3_sub_index': {'west': 22, 'national': 43, 'east': 34, 'central': 43, 'south': 34, 'north': 33}, 'no2_one_hour_max': {'west': 44, 'national': 48, 'east': 13, 'central': 48, 'south': 13, 'north': 27}, 'o3_eight_hour_max': {'west': 52, 'national': 102, 'east': 81, 'central': 102, 'south': 80, 'north': 77}, 'psi_twenty_four_hourly': {'west': 44, 'nat

In [3]:
import pandas as pd

vehicle_df = pd.read_csv('vehicle_data.csv')
print(vehicle_df.head())

   year max_laden_wt                  type   number
0  2004   <= 3500 kg               Lorries  29282.0
1  2004   <= 3500 kg               Tippers      0.0
2  2004   <= 3500 kg                  Vans  54183.0
3  2004   <= 3500 kg  Goods-cum Passengers   5855.0
4  2004   <= 3500 kg  Articulated Vehicles      0.0


In [4]:
vehicle_uk_df = pd.read_csv('vehicle_data_uk.csv')
print(vehicle_uk_df.head())

  BodyType    Make       GenModel                          Model    Fuel  \
0     Cars  ABARTH     ABARTH 124                124 GT MULTIAIR  Petrol   
1     Cars  ABARTH     ABARTH 124           124 GT MULTIAIR AUTO  Petrol   
2     Cars  ABARTH  ABARTH SPIDER            124 SPIDER MULTIAIR  Petrol   
3     Cars  ABARTH  ABARTH SPIDER       124 SPIDER MULTIAIR AUTO  Petrol   
4     Cars  ABARTH  ABARTH SPIDER  124 SPIDER SCORPIONE MULTIAIR  Petrol   

   2024Q1  2023Q4  2023Q3  2023Q2  2023Q1  ...  2016Q4  2016Q3  2016Q2  \
0       0       0       0       0       0  ...       0       0       0   
1       0       0       0       0       0  ...       0       0       0   
2       0       0       1       0       0  ...      59     100       0   
3       0       0       0       0       0  ...      10      26       0   
4       0       0       0       0       0  ...       0       0       0   

   2016Q1  2015Q4  2015Q3  2015Q2  2015Q1  2014Q4  2014Q3  
0       0       0       0       0     

In [2]:
import pandas as pd
import json

with open('traffic_speed_data.json') as f:
    data = json.load(f)
    
traffic_data = data['value']
traffic_df = pd.DataFrame(traffic_data)

# Display the first few rows of the DataFrame to verify
print(traffic_df.head(1))

      LinkID   RoadName RoadCategory  SpeedBand MinimumSpeed MaximumSpeed  \
0  103000000  KENT ROAD            E          3           20           29   

             StartLon            StartLat              EndLon  \
0  103.85298052044503  1.3170142376560023  103.85259882242372   

               EndLat  
0  1.3166840028663076  


In [3]:
import requests

# Define the URL for the API request
one_map_url = "https://www.onemap.gov.sg/api/common/elastic/search?searchVal=135%20BEDOK%20RESERVOIR&returnGeom=Y&getAddrDetails=Y&pageNum=1"

# Fetch data using the defined function
one_map_data = call_API(one_map_url)

# Print the result if data is successfully retrieved
if one_map_data['results']:
    address_df = pd.DataFrame(one_map_data['results'])
else:
    print("Failed to retrieve data.")

# Display the DataFrame
print(address_df.head())

      SEARCHVAL BLK_NO             ROAD_NAME      BUILDING  \
0  EUNOS SPRING    135  BEDOK RESERVOIR ROAD  EUNOS SPRING   

                                             ADDRESS  POSTAL  \
0  135 BEDOK RESERVOIR ROAD EUNOS SPRING SINGAPOR...  470135   

                  X                 Y          LATITUDE         LONGITUDE  
0  36814.6878971951  35037.6238208823  1.33314106418951  103.912523538101  


In [13]:
from time import sleep
import requests
import pandas as pd
import time

sdk_key = '77dc351e719e8d52a6588b2d49cf7e4a'
api_key = 'JzFUjwPOR5Kf5DdyGpAx6g=='
traffic_flow_api = 'https://datamall2.mytransport.sg/ltaodataservice/TrafficFlow'

def call_API_with_key(api_url, api_key, retries=1, delay=5):
    # Define headers
    headers = {
        'AccountKey': api_key,
        'accept': 'application/json' 
    }
    
    response = requests.get(api_url, headers=headers)
    response.raise_for_status()  
    data = response.json() 
    print(f"data response: {data}")
    
    for attempt in range(retries):
        try:
            response = requests.get(api_url, headers=headers)
            response.raise_for_status()  
            data = response.json() 
            print(f"data response: {data}")
            
            link = None
            if 'value' in data and len(data['value']) > 0 and 'Link' in data['value'][0]:
                link = data['value'][0]['Link']
            
            if link:
                print(f"Found link: {link}")

                # Make another GET request to the extracted link
                link_response = call_API(link)
                return link_response
            else:
                return None

        except requests.exceptions.RequestException as e:
            print(f"Error: {e}")
            if attempt < retries - 1:
                print(f"Retrying in {delay} seconds...")
                sleep(delay)
            else:
                print("Failed after several retries.")
                return None


traffic_flow_response = call_API_with_key(traffic_flow_api, api_key)
if traffic_flow_response:
    traffic_flow_df = pd.DataFrame(traffic_flow_response['Value'])
    traffic_flow_filename = "traffic_flow_data.xlsx"
    traffic_flow_df.to_excel(traffic_flow_filename, index=False)
else:
    print("Data is not in expected list format.")

HTTPError: 500 Server Error: Internal Server Error for url: https://datamall2.mytransport.sg/ltaodataservice/TrafficFlow

## 3. Exploratory Data Analysis (EDA)

## 4. Data Modelling and Analysis

## 5. Comparative Analysis

## 6. Conclusions and Recommendations

## 7. References

## 8. API URLs