# **Download and import libraries**

In [1]:
!pip install boto3



In [2]:
# Environment variables (secrets from Google Colab)
from google.colab import userdata

# Interaction with AWS services
import boto3

# Extract data from an API
import requests
import pandas as pd

# **Parameters**

In [3]:
# API request
url = 'https://archive-api.open-meteo.com/v1/archive'
params = {
    # Coordinates for Reforma, Mexico City (CDMX)
    'latitude': 19.4291,
    'longitude': -99.1621,
    'start_date': '2020-01-01',
    'end_date': '2024-12-31',
    'daily': [
        'temperature_2m_max',
        'temperature_2m_min',
        'temperature_2m_mean',
        'rain_sum',
        'precipitation_hours',
        'wind_speed_10m_max',
        'shortwave_radiation_sum'
    ],
    'timezone': 'America/Mexico_City'
}

# Create S3 client to interact with AWS S3
s3 = boto3.client(
    's3',
    aws_access_key_id=userdata.get('AWS_ACCESS_KEY_ID'),
    aws_secret_access_key=userdata.get('AWS_SECRET_ACCESS_KEY'),
    region_name=userdata.get('AWS_DEFAULT_REGION')
)

# **Extract data from the API**

In [4]:
try:
  response = requests.get(url, params=params)
  response.raise_for_status()
  data = response.json()
  print(data)
except requests.exceptions.RequestException as e:
  print(f'Error {e}')

{'latitude': 19.437609, 'longitude': -99.19641, 'generationtime_ms': 4.080772399902344, 'utc_offset_seconds': -21600, 'timezone': 'America/Mexico_City', 'timezone_abbreviation': 'GMT-6', 'elevation': 2232.0, 'daily_units': {'time': 'iso8601', 'temperature_2m_max': '°C', 'temperature_2m_min': '°C', 'temperature_2m_mean': '°C', 'rain_sum': 'mm', 'precipitation_hours': 'h', 'wind_speed_10m_max': 'km/h', 'shortwave_radiation_sum': 'MJ/m²'}, 'daily': {'time': ['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04', '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08', '2020-01-09', '2020-01-10', '2020-01-11', '2020-01-12', '2020-01-13', '2020-01-14', '2020-01-15', '2020-01-16', '2020-01-17', '2020-01-18', '2020-01-19', '2020-01-20', '2020-01-21', '2020-01-22', '2020-01-23', '2020-01-24', '2020-01-25', '2020-01-26', '2020-01-27', '2020-01-28', '2020-01-29', '2020-01-30', '2020-01-31', '2020-02-01', '2020-02-02', '2020-02-03', '2020-02-04', '2020-02-05', '2020-02-06', '2020-02-07', '2020-02

In [5]:
# Extract relevant weather data from the API response
daily_data = {
    "date": data["daily"]["time"],
    "temperature_2m_max": data["daily"]["temperature_2m_max"],
    "temperature_2m_min": data["daily"]["temperature_2m_min"],
    "temperature_2m_mean": data["daily"]["temperature_2m_mean"],
    "rain_sum": data["daily"]["rain_sum"],
    "precipitation_hours": data["daily"]["precipitation_hours"],
    "wind_speed_10m_max": data["daily"]["wind_speed_10m_max"],
    "shortwave_radiation_sum": data["daily"]["shortwave_radiation_sum"]
}

In [6]:
# Create a DataFrame from the extracted weather data
df_weather = pd.DataFrame(daily_data)
df_weather.head()

Unnamed: 0,date,temperature_2m_max,temperature_2m_min,temperature_2m_mean,rain_sum,precipitation_hours,wind_speed_10m_max,shortwave_radiation_sum
0,2020-01-01,24.7,6.4,15.0,0.0,0.0,18.2,19.36
1,2020-01-02,21.9,10.4,15.2,0.0,0.0,19.8,18.18
2,2020-01-03,23.8,6.7,14.2,0.0,0.0,16.6,18.19
3,2020-01-04,17.7,5.8,10.4,0.0,0.0,20.2,18.17
4,2020-01-05,21.7,4.9,11.8,0.0,0.0,11.3,19.99


# **Upload the CSV to S3**

In [7]:
def upload_csv_to_s3(df, bucket_name, file_name):
    # Guardar el DataFrame en un archivo CSV local
    csv_file_path = '/tmp/' + file_name
    df.to_csv(csv_file_path, index=False)

    # Subir el archivo CSV al bucket S3
    s3.upload_file(csv_file_path, bucket_name, file_name)
    print(f'File {file_name} successfully uploaded to S3 in bucket {bucket_name}.')

In [8]:
upload_csv_to_s3(df_weather, userdata.get('BUCKET_NAME'), 'weather_data.csv')

File weather_data.csv successfully uploaded to S3 in bucket weather-khj.
