In [1]:
import requests
import os
import pandas as pd
pd.set_option('display.max_rows', 500)

## Extract

### Create API class with the functions to extract data from the API

In [2]:
class AccuWeatherApiClient:
    """
    Client for getting data from AccuWeather API.
    """
    def __init__(self, api_key: str):
        self.base_url = "https://dataservice.accuweather.com"
        if api_key is None:
            raise Exception("API key cannot be set to None.")
        self.api_key = api_key
        
    def get_current_weather(
        self, location_key: int
    ) -> dict:
        """Extract location data from a city name using the location API
        
        Args: 
            city_key: city key in text to search for
            
        Returns:
            A dict with the current weather metrics 
            
        Raises:
            Exception when it is not possible to extract data from the API.
        """
        location_url = f"{self.base_url}/currentconditions/v1/{location_key}"
        params = {
            "apikey" : self.api_key,
            "language" : "en-us",
            "details" : "true",
        }
        response = requests.get(url=location_url, params=params)
        if response.status_code == 200:
            return response.json()[0]
        else:
            raise Exception(
                f"Failes to extract data from AccuWeather API. Status Code: {response.status_code}. Response: {response.text}"
            )
    def get_location(self, location_name: str) -> str:
        """Extract location data from a city name using the location API
        
        Args: 
            q: city text to search for.
            
        Returns:
            A string with the key of the city
            
        Raises:
            Exception when it is not possible to extract data from the API.
        """
        location_url = f"{self.base_url}/locations/v1/cities/search"
        params = {
            "q": location_name,
            "apikey" : self.api_key,
            "language" : "en-us",
            "details" : "false",
        }
        response = requests.get(url=location_url, params=params)
        if response.status_code == 200 and response.json()[0].get("Key") is not None:
            return response.json()[0].get("Key")
        else:
            raise Exception(
                f"Failes to extract data from AccuWeather API. Status Code: {response.status_code}. Response: {response.text}"
            )

### Create extract function for the current weather API

In [3]:
def extract_current_weather(
    accuweather_client: AccuWeatherApiClient,
    location_name: str
) -> pd.DataFrame:
    """
    Extract forecast data from AccuWeather API.

    The forecast data is extracted from a desired location based on the location_key.
    
    The forecast_days argument define the number of forecast days returned. Possible
    values are 1, 5, 10, and 15.
    """
    location_key = accuweather_client.get_location(
        location_name = location_name
    )
    data = accuweather_client.get_current_weather(
        location_key = location_key
    )
    data['location_key'] = location_key
    data['location_name'] = location_name
    return pd.json_normalize(data=data)

## Pre-process

### Transformation

In [4]:
def uv_index_category(uv_index) -> str:
    """
    Perform transformation on uv index to categorize according to international standars
    
    Args: 
        uv index : uv index value that comes from raw data extracted from the current_weather API 
    Returns:
        A text with the category of the uv index
    reference:
        https://www.epa.gov/sites/default/files/documents/uviguide.pdf
    
    """
    if(uv_index<=2):
        return 'LOW'
    elif(uv_index<=5):
        return 'MODERATE'
    elif(uv_index<=7):
        return 'HIGH'
    elif(uv_index<=10):
        return 'VERY HIGH'
    else:
        return 'EXTREME'
    
def raw_current_conditions_transform(
    df_current_conditions: pd.DataFrame,
    uv_index_category_func
) -> pd.DataFrame:
    """
    Perform transformation on dataframe returned from raw data extracted from the current_weather API
    
    Args: 
        df_current_conditions : dataframe that comes from raw data extracted from the current_weather API
        uv_index_category_func : function to categoriza the uv index 
        (reference:https://www.epa.gov/sites/default/files/documents/uviguide.pdf ) 
    Returns:
        A dataframe with the current weather conditions metrics
    
    """
    # transformation 1 -> filter columns
    columns_of_interest = [
    'location_key',
    'location_name',
    'LocalObservationDateTime',
    'WeatherText',
    'HasPrecipitation',
    'PrecipitationType',
    'Temperature.Metric.Value',
    'RealFeelTemperature.Metric.Value',
    'DewPoint.Metric.Value',
    'Wind.Direction.Localized',
    'Wind.Speed.Metric.Value',
    'UVIndex',
    'Visibility.Metric.Value',
    'Pressure.Metric.Value',
    'PrecipitationSummary.Precipitation.Metric.Value'
]
    df_clean_current_conditions = df_current_conditions[columns_of_interest]
    # transformation 2 -> renaming fields
    renaming_fields_map = {
    'LocalObservationDateTime': "date",
    'WeatherText': 'weather_text',
    'HasPrecipitation' : 'has_precipitation',
    'PrecipitationType' : 'precipitation_type',
    'PrecipitationSummary.Precipitation.Metric.Value' : 'precipition_value',
    'Temperature.Metric.Value' : 'temperature',
    'RealFeelTemperature.Metric.Value' : 'real_feel_temperature',
    'DewPoint.Metric.Value': 'relative_humidity',
    'Wind.Direction.Localized' : 'wind_dewpoint_direction',
    'Wind.Speed.Metric.Value' : 'wind_speed',
    'UVIndex': 'uv_index',
    'Visibility.Metric.Value': 'visibility',
    'Pressure.Metric.Value' : 'pressure'
}
    df_clean_current_conditions = df_clean_current_conditions.rename(columns=renaming_fields_map)
    # transformation 3 -> convert "Date" field to date (and not timestamp)
    df_clean_current_conditions["date"] = pd.to_datetime(df_clean_current_conditions["date"]).dt.date
    # transformation 4 --> create a column from a transformation made from another function
    df_clean_current_conditions['uv_index_category'] = uv_index_category_func(uv_index= df_clean_current_conditions.uv_index.values)
    return df_clean_current_conditions

## Execution

In [5]:
accuweather_client = AccuWeatherApiClient(api_key="5Kzw2rB1gGZ5YJN0cGFgGjjYMaUz1PYP")
response = extract_current_weather(accuweather_client = accuweather_client, location_name = 'pucon')
df_clean_current_conditions = raw_current_conditions_transform(df_current_conditions = response, uv_index_category_func = uv_index_category)
df_clean_current_conditions.head() 

Unnamed: 0,location_key,location_name,date,weather_text,has_precipitation,precipitation_type,temperature,real_feel_temperature,relative_humidity,wind_dewpoint_direction,wind_speed,uv_index,visibility,pressure,precipition_value,uv_index_category
0,57347,pucon,2024-01-23,Sunny,False,,27.3,25.8,13.2,SW,13.5,0,16.1,1013.9,0.0,LOW
