# BIG Data 520 Final Project - Determining Weather Frequency
Need to get specific dates and times to do OpenWeather One Call API calls.

Want to get 4 ski seasons worth of weather data.

## Focusing on 2024/2025 season
Month range: first week of December to 3rd week of April

Get daily temps at 10:30 and 14:30 for two days of the week between those month ranges.

In [326]:
import calendar
import datetime as dt
import random
import pandas as pd
import numpy as np
import requests
from typing import Tuple
import pytz
import csv

In [3]:
start_month = 12
start_year = 2024
end_month = 4
end_year = 2025

In [56]:
cal = calendar.Calendar()

cal.monthdatescalendar(start_year, start_month)

[[datetime.date(2024, 11, 25),
  datetime.date(2024, 11, 26),
  datetime.date(2024, 11, 27),
  datetime.date(2024, 11, 28),
  datetime.date(2024, 11, 29),
  datetime.date(2024, 11, 30),
  datetime.date(2024, 12, 1)],
 [datetime.date(2024, 12, 2),
  datetime.date(2024, 12, 3),
  datetime.date(2024, 12, 4),
  datetime.date(2024, 12, 5),
  datetime.date(2024, 12, 6),
  datetime.date(2024, 12, 7),
  datetime.date(2024, 12, 8)],
 [datetime.date(2024, 12, 9),
  datetime.date(2024, 12, 10),
  datetime.date(2024, 12, 11),
  datetime.date(2024, 12, 12),
  datetime.date(2024, 12, 13),
  datetime.date(2024, 12, 14),
  datetime.date(2024, 12, 15)],
 [datetime.date(2024, 12, 16),
  datetime.date(2024, 12, 17),
  datetime.date(2024, 12, 18),
  datetime.date(2024, 12, 19),
  datetime.date(2024, 12, 20),
  datetime.date(2024, 12, 21),
  datetime.date(2024, 12, 22)],
 [datetime.date(2024, 12, 23),
  datetime.date(2024, 12, 24),
  datetime.date(2024, 12, 25),
  datetime.date(2024, 12, 26),
  datetime.da

Truncate the list down to the first full week of the month and grab everything else since the weeks where the dates cross months will be captured at the end.

In [9]:
cal.monthdatescalendar(end_year, 1)

[[datetime.date(2024, 12, 30),
  datetime.date(2024, 12, 31),
  datetime.date(2025, 1, 1),
  datetime.date(2025, 1, 2),
  datetime.date(2025, 1, 3),
  datetime.date(2025, 1, 4),
  datetime.date(2025, 1, 5)],
 [datetime.date(2025, 1, 6),
  datetime.date(2025, 1, 7),
  datetime.date(2025, 1, 8),
  datetime.date(2025, 1, 9),
  datetime.date(2025, 1, 10),
  datetime.date(2025, 1, 11),
  datetime.date(2025, 1, 12)],
 [datetime.date(2025, 1, 13),
  datetime.date(2025, 1, 14),
  datetime.date(2025, 1, 15),
  datetime.date(2025, 1, 16),
  datetime.date(2025, 1, 17),
  datetime.date(2025, 1, 18),
  datetime.date(2025, 1, 19)],
 [datetime.date(2025, 1, 20),
  datetime.date(2025, 1, 21),
  datetime.date(2025, 1, 22),
  datetime.date(2025, 1, 23),
  datetime.date(2025, 1, 24),
  datetime.date(2025, 1, 25),
  datetime.date(2025, 1, 26)],
 [datetime.date(2025, 1, 27),
  datetime.date(2025, 1, 28),
  datetime.date(2025, 1, 29),
  datetime.date(2025, 1, 30),
  datetime.date(2025, 1, 31),
  datetime.da

In [16]:
dates = {}
week_num = 1
for wk in cal.monthdatescalendar(start_year, start_month):
    if wk[0].month != start_month:
        continue
    
    print(f"This is Week #{week_num}: {wk}")
    dates[week_num] = wk
    week_num += 1


This is Week #1: [datetime.date(2024, 12, 2), datetime.date(2024, 12, 3), datetime.date(2024, 12, 4), datetime.date(2024, 12, 5), datetime.date(2024, 12, 6), datetime.date(2024, 12, 7), datetime.date(2024, 12, 8)]
This is Week #2: [datetime.date(2024, 12, 9), datetime.date(2024, 12, 10), datetime.date(2024, 12, 11), datetime.date(2024, 12, 12), datetime.date(2024, 12, 13), datetime.date(2024, 12, 14), datetime.date(2024, 12, 15)]
This is Week #3: [datetime.date(2024, 12, 16), datetime.date(2024, 12, 17), datetime.date(2024, 12, 18), datetime.date(2024, 12, 19), datetime.date(2024, 12, 20), datetime.date(2024, 12, 21), datetime.date(2024, 12, 22)]
This is Week #4: [datetime.date(2024, 12, 23), datetime.date(2024, 12, 24), datetime.date(2024, 12, 25), datetime.date(2024, 12, 26), datetime.date(2024, 12, 27), datetime.date(2024, 12, 28), datetime.date(2024, 12, 29)]
This is Week #5: [datetime.date(2024, 12, 30), datetime.date(2024, 12, 31), datetime.date(2025, 1, 1), datetime.date(2025, 1

### Scale up to cycle through all the months in the December to mid-April range and add the dates for each week number to `dates`.

In [51]:
month_range = [(start_year, start_month)] # initialize with the 2024 month because that's the only one for that year

In [52]:
month_range.extend([(end_year, m) for m in range(1,end_month+1)])

In [53]:
month_range

[(2024, 12), (2025, 1), (2025, 2), (2025, 3), (2025, 4)]

Iterate through to get a dictionary of the normalized week numbers and the respective dates that fall within them:

In [59]:
all_dates = {}
week_num = 1
for yr, mo in month_range:
    print(f"Year: {yr}\t Month:{mo}")
    for i, wk in enumerate(cal.monthdatescalendar(yr, mo)):
        # just want to grab the lists of datetime objects where the first element falls within the given 'mo' AND
        # want to ignore the last couple weeks in April since those dates fall beyond the assumed ski season
        if wk[0].month != mo or (mo == end_month and i > 2):
            continue
    
        print(f"This is Week #{week_num}: {wk}")
        all_dates[week_num] = wk
        week_num += 1

Year: 2024	 Month:12
This is Week #1: [datetime.date(2024, 12, 2), datetime.date(2024, 12, 3), datetime.date(2024, 12, 4), datetime.date(2024, 12, 5), datetime.date(2024, 12, 6), datetime.date(2024, 12, 7), datetime.date(2024, 12, 8)]
This is Week #2: [datetime.date(2024, 12, 9), datetime.date(2024, 12, 10), datetime.date(2024, 12, 11), datetime.date(2024, 12, 12), datetime.date(2024, 12, 13), datetime.date(2024, 12, 14), datetime.date(2024, 12, 15)]
This is Week #3: [datetime.date(2024, 12, 16), datetime.date(2024, 12, 17), datetime.date(2024, 12, 18), datetime.date(2024, 12, 19), datetime.date(2024, 12, 20), datetime.date(2024, 12, 21), datetime.date(2024, 12, 22)]
This is Week #4: [datetime.date(2024, 12, 23), datetime.date(2024, 12, 24), datetime.date(2024, 12, 25), datetime.date(2024, 12, 26), datetime.date(2024, 12, 27), datetime.date(2024, 12, 28), datetime.date(2024, 12, 29)]
This is Week #5: [datetime.date(2024, 12, 30), datetime.date(2024, 12, 31), datetime.date(2025, 1, 1), 

## Scale up for all the seasons
After mapping out what the loops look like for the 2024-2025 season, iterate through the previous 3 seasons.

In [None]:
start_month = 12
end_month = 4

In [55]:
first_season = 2021
end_season = 2025
season_range = range(first_season, end_season+1)

In [61]:
all_season_dates = {} # {week number for the season : list of datetime.date objects for that week}

for first_year in season_range:
    # not forecasting weather, so don't want to consider 2025-2026 season
    if first_year == end_season:
        break

    # specify the ski season
    second_year = first_year + 1
    season_str = f"{first_year}-{second_year}"
    print(f"\n######{season_str}######")

    # get the months for this ski season
    month_range = [(first_year, start_month)]
    month_range.extend([(second_year, m) for m in range(1,end_month+1)])

    week_num = 1
    for yr, mo in month_range:
        print(f"Year: {yr}\t Month:{mo}")
        for i, wk in enumerate(cal.monthdatescalendar(yr, mo)):
            # just want to grab the lists of datetime objects where the first element falls within the given 'mo' AND
            # want to ignore the last couple weeks in April since those dates fall beyond the assumed ski season
            if wk[0].month != mo or (mo == end_month and i > 2):
                continue
        
            print(f"This is Week #{week_num}: {wk}")
            k = f"{season_str}--{week_num}"
            all_season_dates[k] = wk
            week_num += 1


######2021-2022######
Year: 2021	 Month:12
This is Week #1: [datetime.date(2021, 12, 6), datetime.date(2021, 12, 7), datetime.date(2021, 12, 8), datetime.date(2021, 12, 9), datetime.date(2021, 12, 10), datetime.date(2021, 12, 11), datetime.date(2021, 12, 12)]
This is Week #2: [datetime.date(2021, 12, 13), datetime.date(2021, 12, 14), datetime.date(2021, 12, 15), datetime.date(2021, 12, 16), datetime.date(2021, 12, 17), datetime.date(2021, 12, 18), datetime.date(2021, 12, 19)]
This is Week #3: [datetime.date(2021, 12, 20), datetime.date(2021, 12, 21), datetime.date(2021, 12, 22), datetime.date(2021, 12, 23), datetime.date(2021, 12, 24), datetime.date(2021, 12, 25), datetime.date(2021, 12, 26)]
This is Week #4: [datetime.date(2021, 12, 27), datetime.date(2021, 12, 28), datetime.date(2021, 12, 29), datetime.date(2021, 12, 30), datetime.date(2021, 12, 31), datetime.date(2022, 1, 1), datetime.date(2022, 1, 2)]
Year: 2022	 Month:1
This is Week #5: [datetime.date(2022, 1, 3), datetime.date(2

In [62]:
all_season_dates.keys()

dict_keys(['2021-2022--1', '2021-2022--2', '2021-2022--3', '2021-2022--4', '2021-2022--5', '2021-2022--6', '2021-2022--7', '2021-2022--8', '2021-2022--9', '2021-2022--10', '2021-2022--11', '2021-2022--12', '2021-2022--13', '2021-2022--14', '2021-2022--15', '2021-2022--16', '2021-2022--17', '2021-2022--18', '2021-2022--19', '2022-2023--1', '2022-2023--2', '2022-2023--3', '2022-2023--4', '2022-2023--5', '2022-2023--6', '2022-2023--7', '2022-2023--8', '2022-2023--9', '2022-2023--10', '2022-2023--11', '2022-2023--12', '2022-2023--13', '2022-2023--14', '2022-2023--15', '2022-2023--16', '2022-2023--17', '2022-2023--18', '2022-2023--19', '2023-2024--1', '2023-2024--2', '2023-2024--3', '2023-2024--4', '2023-2024--5', '2023-2024--6', '2023-2024--7', '2023-2024--8', '2023-2024--9', '2023-2024--10', '2023-2024--11', '2023-2024--12', '2023-2024--13', '2023-2024--14', '2023-2024--15', '2023-2024--16', '2023-2024--17', '2023-2024--18', '2023-2024--19', '2023-2024--20', '2024-2025--1', '2024-2025--2'

In [63]:
# the 12th week of the 2021-2022 season
all_season_dates['2021-2022--12']

[datetime.date(2022, 2, 21),
 datetime.date(2022, 2, 22),
 datetime.date(2022, 2, 23),
 datetime.date(2022, 2, 24),
 datetime.date(2022, 2, 25),
 datetime.date(2022, 2, 26),
 datetime.date(2022, 2, 27)]

## Randomly select 2 of the dates from each week
1. Create a new list that grabs two dates from each week.
2. Duplicate each date for the 10:30 and 14:30 timestamps.
3. Convert the timestamps to Unix in UTC.

In [66]:
all_season_dates['2021-2022--12'][0]

datetime.date(2022, 2, 21)

In [69]:
new_datetime = dt.datetime.combine(all_season_dates['2021-2022--12'][0], dt.time(10,30,0))

In [79]:
random.choices(all_season_dates['2021-2022--12'], k=2)

[datetime.date(2022, 2, 21), datetime.date(2022, 2, 27)]

In [158]:
selected_season_dates = dict.fromkeys(all_season_dates)
for szn_wk in selected_season_dates.keys():
    # randomly choose 2 dates in that week
    date_choices = random.choices(all_season_dates[szn_wk], k=2)

    # get the 2 datetimes for each selected date AND convert to Unix timestamp
    # new_datetimes = []
    # for d in date_choices:
    #     new_datetimes.append(dt.datetime.combine(d, dt.time(10,30,0)).timestamp())
    #     new_datetimes.append(dt.datetime.combine(d, dt.time(14,30,0)).timestamp())

    selected_season_dates[szn_wk] = date_choices

In [159]:
selected_season_dates['2021-2022--15']

[datetime.date(2022, 3, 19), datetime.date(2022, 3, 20)]

In [160]:
type(selected_season_dates['2021-2022--15'][0])

datetime.date

In [330]:
selected_season_dates

{'2021-2022--1': [datetime.date(2021, 12, 12), datetime.date(2021, 12, 7)],
 '2021-2022--2': [datetime.date(2021, 12, 14), datetime.date(2021, 12, 15)],
 '2021-2022--3': [datetime.date(2021, 12, 25), datetime.date(2021, 12, 20)],
 '2021-2022--4': [datetime.date(2021, 12, 30), datetime.date(2022, 1, 2)],
 '2021-2022--5': [datetime.date(2022, 1, 9), datetime.date(2022, 1, 5)],
 '2021-2022--6': [datetime.date(2022, 1, 16), datetime.date(2022, 1, 14)],
 '2021-2022--7': [datetime.date(2022, 1, 17), datetime.date(2022, 1, 19)],
 '2021-2022--8': [datetime.date(2022, 1, 24), datetime.date(2022, 1, 30)],
 '2021-2022--9': [datetime.date(2022, 2, 5), datetime.date(2022, 1, 31)],
 '2021-2022--10': [datetime.date(2022, 2, 11), datetime.date(2022, 2, 10)],
 '2021-2022--11': [datetime.date(2022, 2, 18), datetime.date(2022, 2, 14)],
 '2021-2022--12': [datetime.date(2022, 2, 23), datetime.date(2022, 2, 22)],
 '2021-2022--13': [datetime.date(2022, 3, 2), datetime.date(2022, 3, 3)],
 '2021-2022--14': [da

In [166]:
# all the selected dates in one flat list
all_selected_dates = sorted(set([u for t in selected_season_dates.values() for u in t]))
all_selected_dates[:6]

[datetime.date(2021, 12, 7),
 datetime.date(2021, 12, 12),
 datetime.date(2021, 12, 14),
 datetime.date(2021, 12, 15),
 datetime.date(2021, 12, 20),
 datetime.date(2021, 12, 25)]

## Specify parameters for OpenWeather API call
API call = `https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={time}&appid={API key}`

### Geographical Coordinates of Resorts ->> Geocoding API
Before we can get weather, need to get the coordinates of the 6 resorts. Used AccuWeather POI Search by Country Code API since you can lookup coordinates by a POI instead of providing city and state codes.

In [377]:
resorts = {
    'Mammoth Mountain': 'Northern California',
    'Sierra-at-Tahoe': 'Northern California',
    'Copper Mountain': 'Rockies',
    'Snowbird': 'Rockies',
    'The Summit at Snoqualmie': 'PNW',
    'Mt. Bachelor': 'PNW',
    'Boyne Mountain': 'Midwest',
    'Boyne Highlands': 'Midwest'
}

Will combine the work of getting the coordinates for a resort then the weather for all the timestamps before proceeding to the next resort.

In [None]:
from utilities import API_KEY_geocode, API_KEY_weather

In [297]:
def get_coordinates_tz(resort:str) -> Tuple[float,float,str]:
    '''
    Function to make API calls to the AccuWeather Locations POI to get latitude and longitude coordinates for a specified resort + timezone

    Parameters
    ------
    resort : str
        the ski resort you want to search on

    Returns
    ------
    (lat, long, tz) : Tuple[float, float,str]
        a pair of latitude and longitude coordinates for resort + timezone
    '''

    url = 'http://dataservice.accuweather.com/locations/v1/poi/US/search'

    poi_params = {
        'apikey': API_KEY_geocode,
        'q': resort,
        'type': 25, # POI type ID for 'ski'
        'language': 'en-us',
        'details': False
    }

    try:
        # make the API call
        response = requests.get(url=url, params=poi_params)

        # check that it was successful
        response.raise_for_status()

    except requests.exceptions.RequestException as e:
        print(f'Error making geocode request: {e}')
        return None
    
    # get the json output
    try:
        api_response = response.json()

    except requests.exceptions.JSONDecodeError as je:
        print(f'No JSON available: {je}')
        return None
    
    # parse the json to get coordinates
    try:
        json_output = api_response[0]
        geoposition_dict = json_output['GeoPosition']

        lat = geoposition_dict['Latitude']
        long = geoposition_dict['Longitude']

    except KeyError:
        print("No coordinates available.")
        return None, None, None
    
    except IndexError:
        print("No json output")
        return None, None, None
    
    # parse json to get timeze
    try:
        timezone_dict = json_output['TimeZone']

        tz = timezone_dict['Name']

    except KeyError:
        print("No timezone info.")
        return lat,long,None
    
    else:
        return lat,long,tz

In [156]:
get_coordinates_tz("Snowbird")

(40.581, -111.656, 'America/Denver')

In [None]:
https://api.openweathermap.org/data/3.0/onecall/timemachine?lat=40.581&lon=-111.656&dt=1638891000&appid=5afdd2f664ca1b1885d5a82af0aa2bcc

In [304]:
def get_historical_weather(lat:float, long:float, timestamp:int) -> dict:
    '''
    Function to make a request to the OpenWeather History API to get weather data for a specified resort

    Parameters
    ------
    lat : float
        a float representing the latitudinal coordinates of your destination
    long : float
        a float representing the longitudinal coordinates of your destination
    timestamp : int
        an integer representing a specific Unix timestamp for the weather

    Returns
    ------
    weather_dict : dict
        a dictionary containing weather data for the timestamp at a location of (lat,long)
    '''

    url = 'https://api.openweathermap.org/data/3.0/onecall/timemachine'
    # url = 'https://history.openweathermap.org/data/2.5/history/city'

    weather_params = {
        'lat': lat,
        'lon': long,
        'dt': timestamp,
        'units': 'imperial',
        'appid': API_KEY_weather
    }

    try:
        # make the API call
        response = requests.get(url=url, params=weather_params)

        # check that it was successful
        response.raise_for_status()

    except requests.exceptions.RequestException as e:
        print(f'Error making weather request: {e}')
        return {}
    
    try:
        # get json output
        json_output = response.json()
        weather = json_output['data'][0]

    except requests.exceptions.JSONDecodeError as je:
        print(f'No JSON available: {je}')
        return {}
    
    except KeyError:
        print("No weather data.")
        return {}
    
    else:
        return weather

In [214]:
def get_forecasted_weather(lat:float, long:float) -> dict:
    '''
    Function to make a request to the OpenWeather 16-Day Forecast API to get forecasted weather for a specified resort for the next 16 days

    Parameters
    ------
    lat : float
        a float representing the latitudinal coordinates of your destination
    long : float
        a float representing the longitudinal coordinates of your destination

    Returns
    ------
    weather_dict : dict
        a dictionary containing weather data at a location of (lat,long)
    '''

    url = 'https://api.openweathermap.org/data/2.5/forecast/daily'

    weather_params = {
        'lat': lat,
        'lon': long,
        'cnt': 16,
        'mode': 'json',
        'units': 'imperial',
        'appid': API_KEY_weather
    }

    try:
        # make the API call
        response = requests.get(url=url, params=weather_params)

        # check that it was successful
        response.raise_for_status()

    except requests.exceptions.RequestException as e:
        print(f'Error making weather request: {e}')
        return None
    
    try:
        # get json output
        json_output = response.json()

    except requests.exceptions.JSONDecodeError as je:
        print(f'No JSON available: {je}')
        return None
    
    else:
        return json_output

In [299]:
def get_utc_timestamp(original_date:dt.datetime, tz:str) -> int:
    '''
    Function to make a datetime UTC then convert to Unix timestamp
    '''
    local_date = original_date.astimezone(tz=pytz.timezone(tz))
    utc_date = local_date.astimezone(dt.timezone.utc)
    
    return int(utc_date.timestamp())

In [177]:
get_utc_timestamp(dt.datetime(2021,12,7,10,30,0), "America/Denver")

1638891000

In [207]:
date = dt.date(2024,1,1)
get_historical_weather(lat=40.581, long=-111.656, timestamp=get_utc_timestamp(dt.datetime.combine(date, dt.time(10,30,0)), "America/Denver"))

{'dt': 1704123000,
 'sunrise': 1704120610,
 'sunset': 1704154174,
 'temp': 11.46,
 'feels_like': 5.92,
 'pressure': 1024,
 'humidity': 92,
 'dew_point': 9.79,
 'clouds': 0,
 'wind_speed': 3.09,
 'wind_deg': 96,
 'weather': [{'id': 800,
   'main': 'Clear',
   'description': 'clear sky',
   'icon': '01d'}]}

Putting the functions all together to call the AccuWeather Geocoding and OpenWeather APIs for the ski resorts and the specified dates:

In [332]:
selected_season_dates#.items()

{'2021-2022--1': [datetime.date(2021, 12, 12), datetime.date(2021, 12, 7)],
 '2021-2022--2': [datetime.date(2021, 12, 14), datetime.date(2021, 12, 15)],
 '2021-2022--3': [datetime.date(2021, 12, 25), datetime.date(2021, 12, 20)],
 '2021-2022--4': [datetime.date(2021, 12, 30), datetime.date(2022, 1, 2)],
 '2021-2022--5': [datetime.date(2022, 1, 9), datetime.date(2022, 1, 5)],
 '2021-2022--6': [datetime.date(2022, 1, 16), datetime.date(2022, 1, 14)],
 '2021-2022--7': [datetime.date(2022, 1, 17), datetime.date(2022, 1, 19)],
 '2021-2022--8': [datetime.date(2022, 1, 24), datetime.date(2022, 1, 30)],
 '2021-2022--9': [datetime.date(2022, 2, 5), datetime.date(2022, 1, 31)],
 '2021-2022--10': [datetime.date(2022, 2, 11), datetime.date(2022, 2, 10)],
 '2021-2022--11': [datetime.date(2022, 2, 18), datetime.date(2022, 2, 14)],
 '2021-2022--12': [datetime.date(2022, 2, 23), datetime.date(2022, 2, 22)],
 '2021-2022--13': [datetime.date(2022, 3, 2), datetime.date(2022, 3, 3)],
 '2021-2022--14': [da

In [337]:
'2024-2025--20'.split('--', maxsplit=1)[1]

'20'

In [338]:
test_resorts = {'Mammoth Mountain': 'Northern California'}
test_all_selected_dates = {'2021-2022--1': [dt.date(2021, 12, 7)]}

test_mega_list = []

for resort, region in test_resorts.items():
    print(f"Starting {resort}...\n")

    # get the geographical coordinates
    print(f"Getting coordinates and time zone...\n")
    # latitude, longitude, timezone_name = get_coordinates_tz(resort=resort)

    # this was me playing with fake data to test this chunk w/o wasting API calls
    latitude, longitude, timezone_name = random.randint(-90,90), random.randint(-180,180), random.choice(['America/Los_Angeles', 'America/Denver', 'America/Chicago', 'America/New_York'])

    if latitude is None and longitude is None and timezone_name is None:
        continue
    
    # cycle through the dates
    for szn_key, date_values in test_all_selected_dates.items():
        for date in date_values:
            print(f"The date is {date} for {szn_key}")

            # check if date is before today() b/c need to call 2 different weather APIs
            if date >= dt.date.today():
                # print("Getting forecasted weather...")
                # # need to call the other API that does a 16-day forecast, then can break out of loop to not waste API calls
                # api_response = get_forecasted_weather(lat=latitude, long=longitude)

                # # the actual weather data is located in "list" of json output
                # try:
                #     weather_json_list = api_response["list"]

                # except KeyError:
                #     print("No 'list' variable in API response.")

                # else:
                #     for i, forecasted_dict in enumerate(weather_json_list):
                #         print(f"Getting the forecasted weather for Day {i+1}...\n")
                #         row['unix_timestamp'] = forecasted_dict['dt']
                #         row['weather_forecasted'] = forecasted_dict
                #         row['weather_historical'] = ''

                #         mega_list.append(row)

                # finally:
                #     break
                continue

            else:
                # get weather for the 2 times in Unix time, UTC time zone according to location of resort
                for t in [10, 14]:
                    datetime_obj = dt.datetime.combine(date, dt.time(t,30,0))
                    utc_timestamp = get_utc_timestamp(datetime_obj, timezone_name)

                    print(f"Getting historical weather for {date} at {t}:30...")
                    # weather_data = get_historical_weather(lat=latitude, long=longitude, timestamp=utc_timestamp)

                    # this was me playing with fake data to test this chunk w/o wasting API calls
                    weather_data = {
                        'temp':random.randint(0,40), 
                        'conditions':random.choice(['rain','wind','snow','sun']),
                        'uvi': random.random()
                        }

                    # construct 'row' down here
                    row = {
                        'ski_resort': resort, 
                        'us_region': region,
                        'latitude': latitude,
                        'longitude': longitude,
                        'original_timezone': timezone_name,
                        'date': datetime_obj.strftime("%Y-%m-%d"),
                        'time': datetime_obj.strftime("%H:%M"),
                        'unix_timestamp': utc_timestamp,
                        'ski_season': szn_key.split('--', maxsplit=1)[0],
                        'week_num': szn_key.split('--', maxsplit=1)[1],

                        # the weather data I need from API
                        'temp': weather_data.get('temp', np.nan),
                        'conditions': weather_data.get('conditions', ''),
                        'temp_feels_like': weather_data.get('feels_like', np.nan),
                        'cloud_percent': weather_data.get('clouds', np.nan),
                        'visibility': weather_data.get('visibility', np.nan),
                        'wind_speed': weather_data.get('wind_speed', np.nan),
                        'weather_main': weather_data.get('weather', [{}])[0].get('main', ''),
                        'weather_desc': weather_data.get('weather', [{}])[0].get('description', ''),
                        'rain_rate': weather_data.get('rain', {}).get('1h', np.nan),
                        'snow_rate': weather_data.get('snow', {}).get('1h', np.nan)
                        }

                    test_mega_list.append(row.copy())

        print("End of Date")
        
    print("End of Resort\n\n")
    

Starting Mammoth Mountain...

Getting coordinates and time zone...

The date is 2021-12-07 for 2021-2022--1
Getting historical weather for 2021-12-07 at 10:30...
Getting historical weather for 2021-12-07 at 14:30...
End of Date
End of Resort




In [334]:
test_mega_df = pd.DataFrame(test_mega_list)

In [335]:
test_mega_df.head(5)

Unnamed: 0,ski_resort,us_region,latitude,longitude,original_timezone,date,time,unix_timestamp,temp,conditions,temp_feels_like,cloud_percent,visibility,wind_speed,weather_main,weather_desc,rain_rate,snow_rate
0,Mammoth Mountain,Northern California,13,-177,America/Chicago,2021-12-07,10:30,1638891000,22,sun,,,,,,,,
1,Mammoth Mountain,Northern California,13,-177,America/Chicago,2021-12-07,14:30,1638905400,5,wind,,,,,,,,


In [275]:
weather_data = {'temp':random.randint(0,40), 'conditions':random.choice(['rain','wind','snow','sun'])}

time_data = {
        'unix_timestamp': '12354',
        'temp': weather_data['temp'],
        **weather_data
    }

time_data

{'unix_timestamp': '12354', 'temp': 2, 'conditions': 'snow'}

In [None]:
test_resorts = {'Mammoth Mountain': 'Northern California', 'Steamboat': 'Rockies'}
test_all_selected_dates = [dt.date(2021,12,21), dt.date(2022,1,2), dt.date(2025,4,1)]

test_mega_list = []

def calculate_time_data(timezone_name, date, time, resort_data):
    # getting the weather data for that date and time
    # resort_data ~= 'row'
    datetime_obj = dt.datetime.combine(date, dt.time(time,30,0))
                
    utc_timestamp = get_utc_timestamp(datetime_obj, timezone_name)

    print(f"Getting historical weather for {date} at {time}:30...")

    weather_data = {'temp':random.randint(0,40), 'conditions':random.choice(['rain','wind','snow','sun'])} # API call here

    time_data = {
        'unix_timestamp': utc_timestamp,
        'temp': weather_data['temp'],
        **weather_data
    }
    
    # Merge resort data with time data
    time_data.update(resort_data)
    
    return time_data

def calculate_date_data(timezone_name, date, resort_data):
    # all the times for a specific date
    print(f"The date is {date}")
    date_data = resort_data.copy()
    date_data['date'] = date.strftime("%Y-%m-%d")

    # check if date is before today() b/c need to call 2 different weather APIs
    if date >= dt.date.today():
        return []

    else:
        results = []
        # get weather for the 2 times in Unix time, UTC time zone according to location of resort
        for time in [10, 14]:
            time_data = calculate_time_data(timezone_name, date, time, date_data)
            results.append(time_data)
            
        return results
                
for resort, region in test_resorts.items():
    print(f"Starting {resort}...\n")
    resort_data = {'ski_resort': resort, 'us_region': region}

    # get the geographical coordinates
    print(f"Getting coordinates and time zone...\n")
    latitude, longitude, timezone_name = random.randint(-90,90), random.randint(-180,180), random.choice(['America/Los_Angeles', 'America/Denver', 'America/Chicago', 'America/New_York'])

    resort_data.update({
        'latitude': latitude,
        'longitude': longitude,
        'original_timezone': timezone_name
    })

    if latitude is None and longitude is None and timezone_name is None:
        continue
    
    # cycle through the dates
    for date in test_all_selected_dates:
        date_results = calculate_date_data(timezone_name, date, resort_data)
        test_mega_list.extend(date_results)

    print("End of Dates")
print("End of Resort\n\n")


df = pd.DataFrame(test_mega_list)
print(df)
df.to_csv('test_mega_list.csv', index=False)


After playing with fake data, then test on real data with one resort, one date, one time.

In [315]:
len(all_selected_dates)*2*3

900

Divide `resorts` in 2 to spread out API calls over 2 diff keys/accounts to not get charged:

In [322]:
resort_keys = list(resorts.items())
resorts1 = resort_keys[:4]
resorts2 = resort_keys[4:]
resorts1

[('Mammoth Mountain', 'Northern California'),
 ('Sierra-at-Tahoe', 'Northern California'),
 ('Copper Mountain', 'Rockies'),
 ('Snowbird', 'Rockies')]

In [325]:
# first batch of resorts

mega_list1 = []

for resort, region in resorts1:
    print(f"Starting {resort}...\n")

    # get the geographical coordinates
    print(f"Getting coordinates and time zone...\n")
    latitude, longitude, timezone_name = get_coordinates_tz(resort=resort)

    # this was me playing with fake data to test this chunk w/o wasting API calls
    # latitude, longitude, timezone_name = random.randint(-90,90), random.randint(-180,180), random.choice(['America/Los_Angeles', 'America/Denver', 'America/Chicago', 'America/New_York'])

    if latitude is None and longitude is None and timezone_name is None:
        continue
    
    # cycle through the dates
    for date in all_selected_dates:
        print(f"The date is {date}")

        # check if date is before today() b/c need to call 2 different weather APIs
        if date >= dt.date.today():
            # print("Getting forecasted weather...")
            # # need to call the other API that does a 16-day forecast, then can break out of loop to not waste API calls
            # api_response = get_forecasted_weather(lat=latitude, long=longitude)

            # # the actual weather data is located in "list" of json output
            # try:
            #     weather_json_list = api_response["list"]

            # except KeyError:
            #     print("No 'list' variable in API response.")

            # else:
            #     for i, forecasted_dict in enumerate(weather_json_list):
            #         print(f"Getting the forecasted weather for Day {i+1}...\n")
            #         row['unix_timestamp'] = forecasted_dict['dt']
            #         row['weather_forecasted'] = forecasted_dict
            #         row['weather_historical'] = ''

            #         mega_list.append(row)

            # finally:
            #     break
            continue

        else:
            # get weather for the 2 times in Unix time, UTC time zone according to location of resort
            for t in [10, 14]:
                datetime_obj = dt.datetime.combine(date, dt.time(t,30,0))
                utc_timestamp = get_utc_timestamp(datetime_obj, timezone_name)

                print(f"Getting historical weather for {date} at {t}:30...")
                weather_data = get_historical_weather(lat=latitude, long=longitude, timestamp=utc_timestamp)

                # this was me playing with fake data to test this chunk w/o wasting API calls
                # weather_data = {
                #     'temp':random.randint(0,40), 
                #     'conditions':random.choice(['rain','wind','snow','sun']),
                #     'uvi': random.random()
                #     }

                # construct 'row' down here
                row = {
                    'ski_resort': resort, 
                    'us_region': region,
                    'latitude': latitude,
                    'longitude': longitude,
                    'original_timezone': timezone_name,
                    'date': datetime_obj.strftime("%Y-%m-%d"),
                    'time': datetime_obj.strftime("%H:%M"),
                    'unix_timestamp': utc_timestamp,

                    # the weather data I need from API
                    'temp': weather_data.get('temp', np.nan),
                    'temp_feels_like': weather_data.get('feels_like', np.nan),
                    'cloud_percent': weather_data.get('clouds', np.nan),
                    'visibility': weather_data.get('visibility', np.nan),
                    'wind_speed': weather_data.get('wind_speed', np.nan),
                    'weather_main': weather_data.get('weather', [{}])[0].get('main', ''),
                    'weather_desc': weather_data.get('weather', [{}])[0].get('description', ''),
                    'rain_rate': weather_data.get('rain', {}).get('1h', np.nan),
                    'snow_rate': weather_data.get('snow', {}).get('1h', np.nan)
                    }

                mega_list1.append(row.copy())

        print("End of Date")
        
    print("End of Resort\n\n")

df1 = pd.DataFrame(mega_list1)
print(df1)
df1.to_csv('data/weather_data_batch1.csv', index=False)

Starting Mammoth Mountain...

Getting coordinates and time zone...

The date is 2021-12-07
Getting historical weather for 2021-12-07 at 10:30...
Getting historical weather for 2021-12-07 at 14:30...
End of Date
The date is 2021-12-12
Getting historical weather for 2021-12-12 at 10:30...
Getting historical weather for 2021-12-12 at 14:30...
End of Date
The date is 2021-12-14
Getting historical weather for 2021-12-14 at 10:30...
Getting historical weather for 2021-12-14 at 14:30...
End of Date
The date is 2021-12-15
Getting historical weather for 2021-12-15 at 10:30...
Getting historical weather for 2021-12-15 at 14:30...
End of Date
The date is 2021-12-20
Getting historical weather for 2021-12-20 at 10:30...
Getting historical weather for 2021-12-20 at 14:30...
End of Date
The date is 2021-12-25
Getting historical weather for 2021-12-25 at 10:30...
Getting historical weather for 2021-12-25 at 14:30...
End of Date
The date is 2021-12-30
Getting historical weather for 2021-12-30 at 10:30.

Need to add 2 new columns to `df1` for 'ski_season' and 'week_num' that I thought of after running the above chunk and calling the API.

In [355]:
def get_key(d:dict, v:str):
    '''
    Function to get the key for a specific value 'v' in dictionary 'd'
    '''
    for key, values in d.items():
        for value in values:
            if value == v:
                return key

In [356]:
get_key(selected_season_dates, dt.datetime.strptime('2021-12-15', '%Y-%m-%d').date())

'2021-2022--2'

In [363]:
df1_new = df1.copy()

In [364]:
df1_new['date_dt'] = df1_new['date'].apply(lambda x: dt.datetime.strptime(str(x), '%Y-%m-%d').date())
df1_new['ski_season-week_num'] = df1_new['date_dt'].apply(lambda x: get_key(selected_season_dates, x))

df1_new[['ski_season', 'week_num']] = df1_new['ski_season-week_num'].str.split('--', n=1, expand=True)

df1_new.drop(columns=['date_dt', 'ski_season-week_num'], inplace=True)

In [365]:
df1_new

Unnamed: 0,ski_resort,us_region,latitude,longitude,original_timezone,date,time,unix_timestamp,temp,temp_feels_like,cloud_percent,visibility,wind_speed,weather_main,weather_desc,rain_rate,snow_rate,ski_season,week_num
0,Mammoth Mountain,Northern California,37.631,-119.032,America/Los_Angeles,2021-12-07,10:30,1638891000,21.85,21.85,100,10000.0,0.00,Snow,light snow,,0.24,2021-2022,1
1,Mammoth Mountain,Northern California,37.631,-119.032,America/Los_Angeles,2021-12-07,14:30,1638905400,24.87,19.13,100,10000.0,4.61,Rain,light rain,0.76,,2021-2022,1
2,Mammoth Mountain,Northern California,37.631,-119.032,America/Los_Angeles,2021-12-12,10:30,1639323000,17.08,4.48,75,10000.0,16.11,Clouds,broken clouds,,,2021-2022,1
3,Mammoth Mountain,Northern California,37.631,-119.032,America/Los_Angeles,2021-12-12,14:30,1639337400,28.53,19.42,77,,10.00,Clouds,broken clouds,,,2021-2022,1
4,Mammoth Mountain,Northern California,37.631,-119.032,America/Los_Angeles,2021-12-14,10:30,1639495800,10.87,-1.73,100,402.0,19.57,Snow,snow,,3.05,2021-2022,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1139,Snowbird,Rockies,40.581,-111.656,America/Denver,2025-03-14,14:30,1741977000,20.66,12.60,98,,6.13,Snow,light snow,,,2024-2025,15
1140,Snowbird,Rockies,40.581,-111.656,America/Denver,2025-03-21,10:30,1742567400,22.98,22.98,95,10000.0,2.17,Clouds,overcast clouds,,,2024-2025,16
1141,Snowbird,Rockies,40.581,-111.656,America/Denver,2025-03-21,14:30,1742581800,32.22,27.27,88,10000.0,5.08,Clouds,overcast clouds,,,2024-2025,16
1142,Snowbird,Rockies,40.581,-111.656,America/Denver,2025-03-22,10:30,1742653800,25.86,20.84,100,137.0,4.12,Snow,light snow,,0.19,2024-2025,16


In [366]:
df1_new.to_csv('data/weather_data_batch1_new.csv', index=False)

## The Second Batch of Data

In [368]:
# second batch of resorts

mega_list2 = []

for resort, region in resorts2:
    print(f"Starting {resort}...\n")

    # get the geographical coordinates
    print(f"Getting coordinates and time zone...\n")
    latitude, longitude, timezone_name = get_coordinates_tz(resort=resort)

    # this was me playing with fake data to test this chunk w/o wasting API calls
    # latitude, longitude, timezone_name = random.randint(-90,90), random.randint(-180,180), random.choice(['America/Los_Angeles', 'America/Denver', 'America/Chicago', 'America/New_York'])

    if latitude is None and longitude is None and timezone_name is None:
        continue
    
    # cycle through the dates
    for date in all_selected_dates:
        print(f"The date is {date}")

        # check if date is before today() b/c need to call 2 different weather APIs
        if date >= dt.date.today():
            # print("Getting forecasted weather...")
            # # need to call the other API that does a 16-day forecast, then can break out of loop to not waste API calls
            # api_response = get_forecasted_weather(lat=latitude, long=longitude)

            # # the actual weather data is located in "list" of json output
            # try:
            #     weather_json_list = api_response["list"]

            # except KeyError:
            #     print("No 'list' variable in API response.")

            # else:
            #     for i, forecasted_dict in enumerate(weather_json_list):
            #         print(f"Getting the forecasted weather for Day {i+1}...\n")
            #         row['unix_timestamp'] = forecasted_dict['dt']
            #         row['weather_forecasted'] = forecasted_dict
            #         row['weather_historical'] = ''

            #         mega_list.append(row)

            # finally:
            #     break
            continue

        else:
            # get weather for the 2 times in Unix time, UTC time zone according to location of resort
            for t in [10, 14]:
                datetime_obj = dt.datetime.combine(date, dt.time(t,30,0))
                utc_timestamp = get_utc_timestamp(datetime_obj, timezone_name)

                print(f"Getting historical weather for {date} at {t}:30...")
                weather_data = get_historical_weather(lat=latitude, long=longitude, timestamp=utc_timestamp)

                # this was me playing with fake data to test this chunk w/o wasting API calls
                # weather_data = {
                #     'temp':random.randint(0,40), 
                #     'conditions':random.choice(['rain','wind','snow','sun']),
                #     'uvi': random.random()
                #     }

                # construct 'row' down here
                row = {
                    'ski_resort': resort, 
                    'us_region': region,
                    'latitude': latitude,
                    'longitude': longitude,
                    'original_timezone': timezone_name,
                    'date': datetime_obj.strftime("%Y-%m-%d"),
                    'time': datetime_obj.strftime("%H:%M"),
                    'unix_timestamp': utc_timestamp,

                    # the weather data I need from API
                    'temp': weather_data.get('temp', np.nan),
                    'temp_feels_like': weather_data.get('feels_like', np.nan),
                    'cloud_percent': weather_data.get('clouds', np.nan),
                    'visibility': weather_data.get('visibility', np.nan),
                    'wind_speed': weather_data.get('wind_speed', np.nan),
                    'weather_main': weather_data.get('weather', [{}])[0].get('main', ''),
                    'weather_desc': weather_data.get('weather', [{}])[0].get('description', ''),
                    'rain_rate': weather_data.get('rain', {}).get('1h', np.nan),
                    'snow_rate': weather_data.get('snow', {}).get('1h', np.nan)
                    }

                mega_list2.append(row.copy())

        print("End of Date")
        
    print("End of Resort\n\n")

df2 = pd.DataFrame(mega_list2)
print(df2)
df2.to_csv('data/weather_data_batch2.csv', index=False)

Starting The Summit at Snoqualmie...

Getting coordinates and time zone...

The date is 2021-12-07
Getting historical weather for 2021-12-07 at 10:30...
Getting historical weather for 2021-12-07 at 14:30...
End of Date
The date is 2021-12-12
Getting historical weather for 2021-12-12 at 10:30...
Getting historical weather for 2021-12-12 at 14:30...
End of Date
The date is 2021-12-14
Getting historical weather for 2021-12-14 at 10:30...
Getting historical weather for 2021-12-14 at 14:30...
End of Date
The date is 2021-12-15
Getting historical weather for 2021-12-15 at 10:30...
Getting historical weather for 2021-12-15 at 14:30...
End of Date
The date is 2021-12-20
Getting historical weather for 2021-12-20 at 10:30...
Getting historical weather for 2021-12-20 at 14:30...
End of Date
The date is 2021-12-25
Getting historical weather for 2021-12-25 at 10:30...
Getting historical weather for 2021-12-25 at 14:30...
End of Date
The date is 2021-12-30
Getting historical weather for 2021-12-30 a

In [369]:
df2_new = df2.copy()

In [370]:
df2_new['date_dt'] = df2_new['date'].apply(lambda x: dt.datetime.strptime(str(x), '%Y-%m-%d').date())
df2_new['ski_season-week_num'] = df2_new['date_dt'].apply(lambda x: get_key(selected_season_dates, x))

df2_new[['ski_season', 'week_num']] = df2_new['ski_season-week_num'].str.split('--', n=1, expand=True)

df2_new.drop(columns=['date_dt', 'ski_season-week_num'], inplace=True)

In [371]:
df2_new

Unnamed: 0,ski_resort,us_region,latitude,longitude,original_timezone,date,time,unix_timestamp,temp,temp_feels_like,cloud_percent,visibility,wind_speed,weather_main,weather_desc,rain_rate,snow_rate,ski_season,week_num
0,The Summit at Snoqualmie,PNW,47.424,-121.417,America/Los_Angeles,2021-12-07,10:30,1638891000,40.62,40.62,100,3219.0,0.00,Mist,mist,,,2021-2022,1
1,The Summit at Snoqualmie,PNW,47.424,-121.417,America/Los_Angeles,2021-12-07,14:30,1638905400,40.55,40.55,100,805.0,0.00,Fog,fog,,,2021-2022,1
2,The Summit at Snoqualmie,PNW,47.424,-121.417,America/Los_Angeles,2021-12-12,10:30,1639323000,30.61,25.83,100,,4.61,Snow,snow,,0.35,2021-2022,1
3,The Summit at Snoqualmie,PNW,47.424,-121.417,America/Los_Angeles,2021-12-12,14:30,1639337400,32.94,32.94,100,,0.00,Snow,light snow,,0.20,2021-2022,1
4,The Summit at Snoqualmie,PNW,47.424,-121.417,America/Los_Angeles,2021-12-14,10:30,1639495800,24.04,16.99,75,,5.75,Snow,light snow,,0.13,2021-2022,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
853,Boyne Mountain,Midwest,45.159,-84.939,America/Detroit,2025-03-14,14:30,1741977000,57.97,55.81,100,,12.50,Clouds,overcast clouds,,,2024-2025,15
854,Boyne Mountain,Midwest,45.159,-84.939,America/Detroit,2025-03-21,10:30,1742567400,32.92,23.04,91,10000.0,14.27,Clouds,overcast clouds,,,2024-2025,16
855,Boyne Mountain,Midwest,45.159,-84.939,America/Detroit,2025-03-21,14:30,1742581800,44.64,37.17,100,10000.0,17.34,Clouds,overcast clouds,,,2024-2025,16
856,Boyne Mountain,Midwest,45.159,-84.939,America/Detroit,2025-03-22,10:30,1742653800,19.83,7.23,93,10000.0,14.36,Clouds,overcast clouds,,,2024-2025,16


In [372]:
df2_new.to_csv('data/weather_data_batch2_new.csv', index=False)

In [378]:
# third batch of resort ->> just for "Boyne Highlands"

mega_list3 = []

resort = 'Boyne Highlands'
region = 'Midwest'

print(f"Starting {resort}...\n")

# get the geographical coordinates
print(f"Getting coordinates and time zone...\n")
latitude, longitude, timezone_name = get_coordinates_tz(resort=resort)

# this was me playing with fake data to test this chunk w/o wasting API calls
# latitude, longitude, timezone_name = random.randint(-90,90), random.randint(-180,180), random.choice(['America/Los_Angeles', 'America/Denver', 'America/Chicago', 'America/New_York'])

# cycle through the dates
for date in all_selected_dates:
    print(f"The date is {date}")

    # check if date is before today() b/c need to call 2 different weather APIs
    if date >= dt.date.today():
        # print("Getting forecasted weather...")
        # # need to call the other API that does a 16-day forecast, then can break out of loop to not waste API calls
        # api_response = get_forecasted_weather(lat=latitude, long=longitude)

        # # the actual weather data is located in "list" of json output
        # try:
        #     weather_json_list = api_response["list"]

        # except KeyError:
        #     print("No 'list' variable in API response.")

        # else:
        #     for i, forecasted_dict in enumerate(weather_json_list):
        #         print(f"Getting the forecasted weather for Day {i+1}...\n")
        #         row['unix_timestamp'] = forecasted_dict['dt']
        #         row['weather_forecasted'] = forecasted_dict
        #         row['weather_historical'] = ''

        #         mega_list.append(row)

        # finally:
        #     break
        continue

    else:
        # get weather for the 2 times in Unix time, UTC time zone according to location of resort
        for t in [10, 14]:
            datetime_obj = dt.datetime.combine(date, dt.time(t,30,0))
            utc_timestamp = get_utc_timestamp(datetime_obj, timezone_name)

            print(f"Getting historical weather for {date} at {t}:30...")
            weather_data = get_historical_weather(lat=latitude, long=longitude, timestamp=utc_timestamp)

            # this was me playing with fake data to test this chunk w/o wasting API calls
            # weather_data = {
            #     'temp':random.randint(0,40), 
            #     'conditions':random.choice(['rain','wind','snow','sun']),
            #     'uvi': random.random()
            #     }

            # construct 'row' down here
            row = {
                'ski_resort': resort, 
                'us_region': region,
                'latitude': latitude,
                'longitude': longitude,
                'original_timezone': timezone_name,
                'date': datetime_obj.strftime("%Y-%m-%d"),
                'time': datetime_obj.strftime("%H:%M"),
                'unix_timestamp': utc_timestamp,

                # the weather data I need from API
                'temp': weather_data.get('temp', np.nan),
                'temp_feels_like': weather_data.get('feels_like', np.nan),
                'cloud_percent': weather_data.get('clouds', np.nan),
                'visibility': weather_data.get('visibility', np.nan),
                'wind_speed': weather_data.get('wind_speed', np.nan),
                'weather_main': weather_data.get('weather', [{}])[0].get('main', ''),
                'weather_desc': weather_data.get('weather', [{}])[0].get('description', ''),
                'rain_rate': weather_data.get('rain', {}).get('1h', np.nan),
                'snow_rate': weather_data.get('snow', {}).get('1h', np.nan)
                }

            mega_list3.append(row.copy())

    print("End of Date")
    
print("End of Resort\n\n")

df3 = pd.DataFrame(mega_list3)
print(df3)
df3.to_csv('data/weather_data_batch3.csv', index=False)

Starting Boyne Highlands...

Getting coordinates and time zone...

The date is 2021-12-07
Getting historical weather for 2021-12-07 at 10:30...
Getting historical weather for 2021-12-07 at 14:30...
End of Date
The date is 2021-12-12
Getting historical weather for 2021-12-12 at 10:30...
Getting historical weather for 2021-12-12 at 14:30...
End of Date
The date is 2021-12-14
Getting historical weather for 2021-12-14 at 10:30...
Getting historical weather for 2021-12-14 at 14:30...
End of Date
The date is 2021-12-15
Getting historical weather for 2021-12-15 at 10:30...
Getting historical weather for 2021-12-15 at 14:30...
End of Date
The date is 2021-12-20
Getting historical weather for 2021-12-20 at 10:30...
Getting historical weather for 2021-12-20 at 14:30...
End of Date
The date is 2021-12-25
Getting historical weather for 2021-12-25 at 10:30...
Getting historical weather for 2021-12-25 at 14:30...
End of Date
The date is 2021-12-30
Getting historical weather for 2021-12-30 at 10:30..

In [379]:
df3_new = df3.copy()

In [380]:
df3_new['date_dt'] = df3_new['date'].apply(lambda x: dt.datetime.strptime(str(x), '%Y-%m-%d').date())
df3_new['ski_season-week_num'] = df3_new['date_dt'].apply(lambda x: get_key(selected_season_dates, x))

df3_new[['ski_season', 'week_num']] = df3_new['ski_season-week_num'].str.split('--', n=1, expand=True)

df3_new.drop(columns=['date_dt', 'ski_season-week_num'], inplace=True)

In [381]:
df3_new

Unnamed: 0,ski_resort,us_region,latitude,longitude,original_timezone,date,time,unix_timestamp,temp,temp_feels_like,cloud_percent,visibility,wind_speed,weather_main,weather_desc,rain_rate,snow_rate,ski_season,week_num
0,Boyne Highlands,Midwest,45.469,-84.922,America/Detroit,2021-12-07,10:30,1638891000,17.60,5.00,100,10000.0,11.50,Clouds,overcast clouds,,,2021-2022,1
1,Boyne Highlands,Midwest,45.469,-84.922,America/Detroit,2021-12-07,14:30,1638905400,18.61,6.01,100,10000.0,13.80,Snow,light snow,,,2021-2022,1
2,Boyne Highlands,Midwest,45.469,-84.922,America/Detroit,2021-12-12,10:30,1639323000,34.93,24.98,0,10000.0,16.11,Rain,light rain,0.25,,2021-2022,1
3,Boyne Highlands,Midwest,45.469,-84.922,America/Detroit,2021-12-12,14:30,1639337400,41.29,33.17,0,10000.0,16.11,Rain,moderate rain,1.52,,2021-2022,1
4,Boyne Highlands,Midwest,45.469,-84.922,America/Detroit,2021-12-14,10:30,1639495800,34.72,26.92,0,10000.0,10.36,Clear,clear sky,,,2021-2022,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281,Boyne Highlands,Midwest,45.469,-84.922,America/Detroit,2025-03-14,14:30,1741977000,62.26,59.88,75,10000.0,16.11,Clouds,broken clouds,,,2024-2025,15
282,Boyne Highlands,Midwest,45.469,-84.922,America/Detroit,2025-03-21,10:30,1742567400,33.28,27.10,0,10000.0,6.91,Clear,clear sky,,,2024-2025,16
283,Boyne Highlands,Midwest,45.469,-84.922,America/Detroit,2025-03-21,14:30,1742581800,43.03,36.07,100,10000.0,13.80,Clouds,overcast clouds,,,2024-2025,16
284,Boyne Highlands,Midwest,45.469,-84.922,America/Detroit,2025-03-22,10:30,1742653800,20.77,8.17,100,10000.0,16.11,Clouds,overcast clouds,,,2024-2025,16


In [382]:
df3_new.to_csv('data/weather_data_batch3_new.csv', index=False)