In [1]:
import pandas as pd
import xml.etree.ElementTree as et

# Parse the XML file
mytree = et.parse(r"C:\Users\srika\OneDrive\Documents\York\Sem-1 york\MMAI 5100 - database fundamentals\Weather\T_ONCN00_C_LAND_202308120336_3605043431.cap")
root = mytree.getroot()

# Namespace dictionary
namespaces = {'cap': 'urn:oasis:names:tc:emergency:cap:1.2'}

# Extract event data
event = root.find('.//cap:event', namespaces).text

# Extract polygon data into a list of dictionaries
data = []
for area in root.findall('.//cap:area', namespaces):
    area_desc = area.find('.//cap:areaDesc', namespaces).text
    polygon_coords = area.find('.//cap:polygon', namespaces).text
    coords = polygon_coords.split()  # Split coordinates into individual pairs
    
    # Create a dictionary for each set of coordinates
    for coord_pair in coords:
        lat, lon = map(float, coord_pair.split(','))
        data.append({'event': event, 'areaDesc': area_desc, 'latitude': lat, 'longitude': lon})

# Create a DataFrame from the extracted data
polygon_df = pd.DataFrame(data)

# Print the DataFrame
polygon_df


Unnamed: 0,event,areaDesc,latitude,longitude
0,thunderstorm,Windsor - Leamington - Essex County,42.0409,-83.1223
1,thunderstorm,Windsor - Leamington - Essex County,42.0737,-83.1387
2,thunderstorm,Windsor - Leamington - Essex County,42.0902,-83.1333
3,thunderstorm,Windsor - Leamington - Essex County,42.0903,-83.1333
4,thunderstorm,Windsor - Leamington - Essex County,42.1025,-83.1293
...,...,...,...,...
339,thunderstorm,Windsor - Leamington - comté d'Essex,41.8127,-82.5705
340,thunderstorm,Windsor - Leamington - comté d'Essex,41.7225,-82.6052
341,thunderstorm,Windsor - Leamington - comté d'Essex,41.7134,-82.6941
342,thunderstorm,Windsor - Leamington - comté d'Essex,41.9268,-82.8988


In [2]:
import pandas as pd
cities = pd.read_csv(r'C:\Users\srika\OneDrive\Documents\York\Sem-1 york\MMAI 5100 - database fundamentals\ca_geo_dimension.csv')
cities.sample(10)

Unnamed: 0,province,region,zipcode,latitude,longitude
825620,British Columbia,Greater Vancouver,V4A5A3,49.038728,-122.785046
251769,Quebec,Centre-du-Québec,J2E1M4,45.895928,-72.519031
252689,Quebec,Estrie,J2G5W1,45.411944,-72.733143
198766,Quebec,Montréal,H2L4B4,45.539956,-73.615088
434973,Ontario,Peel,L4W3T9,43.620498,-79.618616
745994,Alberta,Edmonton,T6B0X3,53.530288,-113.431717
30144,Nova Scotia,Halifax,B3L3G7,44.643559,-63.612203
615827,Ontario,Greater Sudbury,P3E1E3,46.48924,-80.99075
880136,British Columbia,Capital,V8Z4M9,48.458248,-123.409867
86488,New Brunswick,Charlotte,E5C2P3,45.117226,-66.847864


In [3]:
import pandas as pd
from shapely.geometry import Polygon, Point

# Polygon coordinates
polygon_coords = polygon_df[['longitude','latitude']]

# Create a Shapely Polygon
polygon = Polygon(polygon_coords)

# List to store filtered city data
filtered_cities = []

# Iterate through each city
for index, city in cities.iterrows():
    city_point = Point(city['longitude'], city['latitude'])
    
    # Check if the city is inside the polygon
    if city_point.within(polygon):
        filtered_cities.append({'province': city['province'], 'region': city['region']})

# Create a DataFrame from the filtered cities
filtered_cities_df = pd.DataFrame(filtered_cities)

# Add 'event' column to the filtered cities DataFrame
event_name = "thunderstorm"  # Replace with the actual event name
filtered_cities_df['event'] = event_name

filtered_cities_df


Unnamed: 0,event


In [4]:
import pandas as pd
import xml.etree.ElementTree as et

# Parse the XML file
mytree = et.parse(r"C:\Users\srika\OneDrive\Documents\York\Sem-1 york\MMAI 5100 - database fundamentals\Weather\T_ONCN00_C_LAND_202308120336_3605043431.cap")
root = mytree.getroot()

# Namespace dictionary
namespaces = {'cap': 'urn:oasis:names:tc:emergency:cap:1.2'}

# Extract event data
event = root.find('.//cap:event', namespaces).text

# Extract polygon coordinates
polygon_coords = root.find('.//cap:area/cap:polygon', namespaces).text
polygon_points = [tuple(map(float, point.split(','))) for point in polygon_coords.split()]


cities_df = pd.read_csv(r'C:\Users\srika\OneDrive\Documents\York\Sem-1 york\MMAI 5100 - database fundamentals\ca_geo_dimension.csv')

# Check if each city is inside the polygon
inside_polygon = []
for index, city in cities_df.iterrows():
    lat, lon = city['latitude'], city['longitude']
    point = (lat, lon)
    
    # Check if the point is inside the polygon
    inside = False
    i, j = 0, len(polygon_points) - 1
    while i < len(polygon_points):
        if ((polygon_points[i][1] > point[1]) != (polygon_points[j][1] > point[1])) and \
                (point[0] < (polygon_points[j][0] - polygon_points[i][0]) * (point[1] - polygon_points[i][1]) / 
                             (polygon_points[j][1] - polygon_points[i][1]) + polygon_points[i][0]):
            inside = not inside
        j = i
        i += 1
    
    inside_polygon.append(inside)

# Create a new DataFrame with cities inside the polygon
cities_inside_polygon_df = cities_df[inside_polygon][['province', 'region']].copy()
cities_inside_polygon_df['event'] = event

# Print the DataFrame with cities inside the polygon
print(cities_inside_polygon_df)


       province        region         event
545613  Ontario         Essex  thunderstorm
545619  Ontario  Chatham-Kent  thunderstorm
545621  Ontario         Essex  thunderstorm
545626  Ontario         Essex  thunderstorm
545650  Ontario         Essex  thunderstorm
...         ...           ...           ...
608501  Ontario         Essex  thunderstorm
608502  Ontario         Essex  thunderstorm
608503  Ontario         Essex  thunderstorm
608504  Ontario         Essex  thunderstorm
608505  Ontario         Essex  thunderstorm

[11180 rows x 3 columns]


In [6]:
import pandas as pd
import xml.etree.ElementTree as et
from shapely.geometry import Point, Polygon

# Parse the XML file
mytree = et.parse(r"C:\Users\srika\OneDrive\Documents\York\Sem-1 york\MMAI 5100 - database fundamentals\Weather\T_ONCN00_C_LAND_202308120336_2772920413.cap")
root = mytree.getroot()

# Namespace dictionary
namespaces = {'cap': 'urn:oasis:names:tc:emergency:cap:1.2'}

# Extract event data
event = root.find('.//cap:event', namespaces).text

# Extract polygon coordinates
polygon_coords = root.find('.//cap:area/cap:polygon', namespaces).text
polygon_points = [tuple(map(float, point.split(','))) for point in polygon_coords.split()]

# Create a Shapely Polygon from the polygon points
polygon = Polygon(polygon_points)

cities_df = pd.read_csv(r'C:\Users\srika\OneDrive\Documents\York\Sem-1 york\MMAI 5100 - database fundamentals\ca_geo_dimension.csv')

# Check if each city is inside the polygon using Shapely's contains method
inside_polygon = []
for index, city in cities_df.iterrows():
    lat, lon = city['latitude'], city['longitude']
    point = Point(lon, lat)  # Shapely uses (x, y) order
    
    inside = polygon.contains(point)
    inside_polygon.append(inside)

# Create a new DataFrame with cities inside the polygon
cities_inside_polygon_df = cities_df[inside_polygon][['province', 'region', 'zipcode']].copy()
cities_inside_polygon_df['event'] = event

# Print the DataFrame with cities inside the polygon
print(cities_inside_polygon_df)


Empty DataFrame
Columns: [province, region, zipcode, event]
Index: []


In [7]:
import os
import pandas as pd
import xml.etree.ElementTree as et
from shapely.geometry import Point, Polygon

# Folder path containing the CAP files
folder_path = r"C:\Users\srika\OneDrive\Documents\York\Sem-1 york\MMAI 5100 - database fundamentals\Weather"

# Namespace dictionary
namespaces = {'cap': 'urn:oasis:names:tc:emergency:cap:1.2'}

# Get a list of all CAP files in the folder
file_paths = [os.path.join(folder_path, filename) for filename in os.listdir(folder_path) if filename.endswith(".cap")]

# Iterate over each file
for file_path in file_paths:
    # Parse the XML file
    mytree = et.parse(file_path)
    root = mytree.getroot()

    # Extract event data
    event = root.find('.//cap:event', namespaces).text

    # Extract polygon coordinates
    polygon_coords = root.find('.//cap:area/cap:polygon', namespaces).text
    polygon_points = [tuple(map(float, point.split(','))) for point in polygon_coords.split()]

    cities_df = pd.read_csv(r'C:\Users\srika\OneDrive\Documents\York\Sem-1 york\MMAI 5100 - database fundamentals\ca_geo_dimension.csv')

    # Create a Shapely Polygon from the polygon points
    polygon = Polygon(polygon_points)

    # Check if each city is inside the polygon using Shapely's contains method
    inside_polygon = []
    for index, city in cities_df.iterrows():
        lat, lon = city['latitude'], city['longitude']
        point = Point(lon, lat)  # Shapely uses (x, y) order

        inside = polygon.contains(point)
        inside_polygon.append(inside)

    # Create a new DataFrame with cities inside the polygon
    cities_inside_polygon_df = cities_df[inside_polygon][['province', 'region']].copy()
    cities_inside_polygon_df['event'] = event

    # Print the DataFrame with cities inside the polygon
    print(cities_inside_polygon_df)


Empty DataFrame
Columns: [province, region, event]
Index: []
Empty DataFrame
Columns: [province, region, event]
Index: []


In [8]:
import os
import pandas as pd
import xml.etree.ElementTree as et
from shapely.geometry import Point, Polygon
import requests
from datetime import datetime

# Base URL for CAP files
base_url = "https://dd.weather.gc.ca/alerts/cap/"

# Get current date in YYYYMMDD format
current_date = datetime.now().strftime('%Y%m%d')

# Create a list of two-digit hour strings
hours = []
for i in range(0, 24):
    hours.append(f'{i:02}')
codes =['CWUL','CWEG','CWNT','CWWG','CWVR','CWTO','CYQX','CWAO','CWIS','CWHX','LAND']
# Create URLs for each hour
urls = []
for code in codes:
    for hour in hours:
        url = base_url + current_date + '/' + code + '/'+ hour + '/'
        urls.append(url)



In [9]:
response = requests.get(url)
response

<Response [404]>

In [10]:
# Construct the URL for the CAP files

# Make a request to the URL to get the list of CAP files
response = requests.get(url)
cap_links = response.text.split("\n")

# Filter CAP links to get only the ones you're interested in
cap_links = [link for link in cap_links if link.endswith(".cap")]
cap_links


[]

In [11]:
import os
import pandas as pd
import xml.etree.ElementTree as et
from shapely.geometry import Point, Polygon
import requests
from datetime import datetime

# Base URL for CAP files
base_url = "https://dd.weather.gc.ca/alerts/cap/"


current_date = datetime.now().strftime('%Y%m%d')

# Create a list of two-digit hour strings
import datetime
now = datetime.datetime.now()
hours = []
for i in range(3):
    hours.append(f'{(now - datetime.timedelta(hours=i)).hour:02}')

codes = ['CWUL', 'CWEG', 'CWNT', 'CWWG', 'CWVR', 'CWTO', 'CYQX', 'CWAO', 'CWIS', 'CWHX', 'LAND']

# Create URLs for each hour and code
urls = []
for code in codes:
    for hour in hours:
        url = base_url + current_date + '/' + code + '/' + hour + '/'
        response = requests.get(url)
        if response.status_code == 200:  # Check if response is successful
            urls.append(url)
            print(f"Added {url} to the list.")


Added https://dd.weather.gc.ca/alerts/cap/20230812/CWUL/22/ to the list.
Added https://dd.weather.gc.ca/alerts/cap/20230812/CWNT/22/ to the list.
Added https://dd.weather.gc.ca/alerts/cap/20230812/CWVR/23/ to the list.
Added https://dd.weather.gc.ca/alerts/cap/20230812/CWTO/23/ to the list.
Added https://dd.weather.gc.ca/alerts/cap/20230812/CWTO/22/ to the list.
Added https://dd.weather.gc.ca/alerts/cap/20230812/CWTO/21/ to the list.
Added https://dd.weather.gc.ca/alerts/cap/20230812/LAND/22/ to the list.
Added https://dd.weather.gc.ca/alerts/cap/20230812/LAND/21/ to the list.


working

In [None]:
# Create a list to store URLs with .cap files
url_cap = []

import time
 for url in urls:
   file_paths = [os.path.join(folder_path, filename) for filename in os.listdir(folder_path) if filename.endswith(".cap")]

for code in codes:
    for hour in hours:
        url = base_url + current_date + '/' + code + '/' + hour + '/'
        response = requests.get(url, timeout=10)  # Adjust the timeout value as needed
        if response.status_code == 200 and response.url.endswith('.cap'):
            url_cap.append(response.url)
            print(f"Added {response.url} to the list of .cap files.")
        time.sleep(1)  # Add a 1-second delay between requests


In [12]:
response = requests.get(url, timeout=10)  # Adjust the timeout value as needed


In [13]:
# Create a list to store URLs with .cap files
url_cap = []

import time


for code in codes:
    for hour in hours:
        url = base_url + current_date + '/' + code + '/' + hour + '/'
        response = requests.get(url, timeout=10)  # Adjust the timeout value as needed
        if response.status_code == 200 and response.url.endswith('.cap'):
            url_cap.append(response.url)
            print(f"Added {response.url} to the list of .cap files.")
        time.sleep(1)  # Add a 1-second delay between requests


In [14]:

# Iterate over the CAP links and download the files
for cap_link in cap_links:
    cap_url = url + "/" + cap_link
    file_path = os.path.join(download_folder, cap_link)
    
    # Download the CAP file
    cap_response = requests.get(cap_url)
    with open(file_path, "wb") as file:
        file.write(cap_response.content)
    
    # Process the downloaded CAP file
    # Parse the XML file
    mytree = et.fromstring(cap_response.content)
    namespaces = {'cap': 'urn:oasis:names:tc:emergency:cap:1.2'}

    # Extract event data
    event = mytree.find('.//cap:event', namespaces).text

    # Extract polygon coordinates
    polygon_coords = mytree.find('.//cap:area/cap:polygon', namespaces).text
    polygon_points = [tuple(map(float, point.split(','))) for point in polygon_coords.split()]

    cities_df = pd.read_csv(r'C:\Users\srika\OneDrive\Documents\York\Sem-1 york\MMAI 5100 - database fundamentals\ca_geo_dimension.csv')

    # Create a Shapely Polygon from the polygon points
    polygon = Polygon(polygon_points)

    # Check if each city is inside the polygon using Shapely's contains method
    inside_polygon = []
    for index, city in cities_df.iterrows():
        lat, lon = city['latitude'], city['longitude']
        point = Point(lon, lat)  # Shapely uses (x, y) order

        inside = polygon.contains(point)
        inside_polygon.append(inside)

    # Create a new DataFrame with cities inside the polygon
    cities_inside_polygon_df = cities_df[inside_polygon][['province', 'region']].copy()
    cities_inside_polygon_df['event'] = event

    # Print the DataFrame with cities inside the polygon
    print(cities_inside_polygon_df)


In [17]:
import requests
import re
from datetime import datetime, timedelta

# Base URL for CAP files
base_url = "https://dd.weather.gc.ca/alerts/cap/"

# Get the current date
current_date = datetime.now().strftime('%Y%m%d')

# Create a list of two-digit hour strings for the past 3 hours
now = datetime.now()
hours = [(now - timedelta(hours=i)).strftime('%H') for i in range(1)]

# List of responsible office codes
responsible_offices = ['CWUL', 'CWEG', 'CWNT', 'CWWG', 'CWVR', 'CWTO', 'CYQX', 'CWAO', 'CWIS', 'CWHX', 'LAND']

# Create a list to store URLs ending with .cap
urls_cap = []

# Create URLs for each responsible office, hour, and code
for office in responsible_offices:
    for hour in hours:
        url = f"{base_url}{current_date}/{office}/{hour}/"
        try:
            response = requests.get(url, timeout=10)
            if response.status_code == 200:
                # Extract filenames from the response using regular expressions
                filenames = re.findall(r'href="(.*\.cap)"', response.text)
                # Create complete URLs for .cap files and add them to the list
                for filename in filenames:
                    cap_url = url + filename
                    urls_cap.append(cap_url)
                    print(f"Added {cap_url} to the list.")
        except requests.Timeout:
            print(f"Request to {url} timed out.")
        except requests.RequestException as e:
            print(f"Request to {url} failed: {e}")

# Print the collected URLs
print("List of URLs with .cap files:")
for url in urls_cap:
    print(url)


Added https://dd.weather.gc.ca/alerts/cap/20230812/CWVR/23/T_WOCN21_C_CWVR_202308122327_1137547568.cap to the list.
Added https://dd.weather.gc.ca/alerts/cap/20230812/CWVR/23/T_WWCN11_C_CWVR_202308122328_1348165370.cap to the list.
Added https://dd.weather.gc.ca/alerts/cap/20230812/CWVR/23/T_WWCN11_C_CWVR_202308122330_3343082376.cap to the list.
Added https://dd.weather.gc.ca/alerts/cap/20230812/CWTO/23/T_WHCN13_C_CWTO_202308122348_0562913125.cap to the list.
Added https://dd.weather.gc.ca/alerts/cap/20230812/CWTO/23/T_WHCN13_C_CWTO_202308122348_2787214325.cap to the list.
Added https://dd.weather.gc.ca/alerts/cap/20230812/CWTO/23/T_WHCN13_C_CWTO_202308122356_0463152595.cap to the list.
Added https://dd.weather.gc.ca/alerts/cap/20230812/CWTO/23/T_WHCN13_C_CWTO_202308122356_2378960251.cap to the list.
Added https://dd.weather.gc.ca/alerts/cap/20230812/CWTO/23/T_WHCN13_C_CWTO_202308122356_3648543961.cap to the list.
Added https://dd.weather.gc.ca/alerts/cap/20230812/CWTO/23/T_WZCN80_C_CW