In [None]:
import openmeteo_requests
import requests
import boto3
import requests_cache
import pandas as pd
from retry_requests import retry

from pyspark.sql import SparkSession

import matplotlib.pyplot as plt
import numpy as np

from datetime import datetime, timedelta


In [None]:
spark = SparkSession.builder.appName("WeatherData").getOrCreate()

In [None]:
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
params = {
	"latitude": 54.3523,
	"longitude": 18.6491,
	"start_date": "2021-03-23",
	"end_date": "2025-02-16",
	"hourly": "temperature_2m"
}
responses = openmeteo.weather_api(url, params=params)

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["temperature_2m"] = hourly_temperature_2m

hourly_dataframe = pd.DataFrame(data = hourly_data)

In [None]:
s3_client = boto3.client('s3')
s3_bucket = "sparkcalculations"
file_path = "/tmp/weather_data.csv"
hourly_dataframe.to_csv(file_path, index=False)
s3_client.upload_file(file_path, s3_bucket, "data/weather_data.csv")