In [1]:
import requests
import pandas as pd
import os
from datetime import datetime
from dotenv import load_dotenv
import logging
from pytz import timezone
import numpy as np

load_dotenv(dotenv_path="config/.env")

logging.basicConfig(level=logging.INFO)

## Extracting Data from Open Weather Map


In [2]:
# Extracting OWM Data

API_KEY = os.getenv("OWM_API_KEY")
BASE_URL = "https://api.openweathermap.org/data/2.5/weather"

# Daftar kota yang akan diambil datanya
CITIES = ["Jakarta", "Tokyo", "London", "New York", "Sydney"]
CITY_LIST_PATH = "../config/city_list.csv"

def fetch_weather_for_city(city: str) -> dict:
    """Ambil data cuaca untuk satu kota"""
    params = {
        "q": city,
        "appid": API_KEY,
        "units": "metric"  # Celsius
    }
    try:
        response = requests.get(BASE_URL, params=params)
        response.raise_for_status()
        data = response.json()
        return {
            "city": data["name"],
            "country": data["sys"]["country"],
            "lat": data["coord"]["lat"],
            "lon": data["coord"]["lon"],
            "temperature": data["main"]["temp"],
            "humidity": data["main"]["humidity"],
            "weather": data["weather"][0]["main"],
            "description": data["weather"][0]["description"],
            "wind_speed": data["wind"]["speed"],
            "timestamp": datetime.utcfromtimestamp(data["dt"]),
            "fetched_at": datetime.utcnow()
        }
    except Exception as e:
        logging.error(f"Error fetching weather for {city}: {e}")
        return None
    
def fetch_weather_by_id(city_id: int) -> dict:
    """Fetch weather data by OpenWeatherMap city ID"""
    params = {
        "id": city_id,
        "appid": API_KEY,
        "units": "metric"
    }
    try:
        response = requests.get(BASE_URL, params=params)
        response.raise_for_status()
        data = response.json()
        return {
            "city_id": data["id"],
            "city": data["name"],
            "country": data["sys"]["country"],
            "lat": data["coord"]["lat"],
            "lon": data["coord"]["lon"],
            "temperature": data["main"]["temp"],
            "humidity": data["main"]["humidity"],
            "weather": data["weather"][0]["main"],
            "description": data["weather"][0]["description"],
            "wind_speed": data["wind"]["speed"],
            "timestamp": datetime.utcfromtimestamp(data["dt"]),
            "fetched_at": datetime.utcnow()
        }
    except Exception as e:
        logging.error(f"Error fetching city ID {city_id}: {e}")
        return None

def fetch_weather_all_cities(city_file: str = CITY_LIST_PATH) -> pd.DataFrame:
    cities_df = pd.read_csv(city_file)
    records = []
    for _, row in cities_df.iterrows():
        result = fetch_weather_by_id(row["id"])
        if result:
            records.append(result)
        else:
            logging.warning(f"Skipping city ID {row['id']} ({row['name']})")
    return pd.DataFrame(records)


## Transforming and Enriching the Data

In [4]:
# Default: Waktu Jakarta (bisa disesuaikan)
WIB = timezone("Asia/Jakarta")

def clean_weather_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    Bersihkan dan transformasi data cuaca:
    - Pastikan tipe data konsisten
    - Drop null penting
    - Konversi timestamp ke timezone lokal
    """
    if df.empty:
        return df

    df = df.dropna(subset=["temperature", "humidity", "weather", "timestamp"])

    df["timestamp"] = pd.to_datetime(df["timestamp"]).dt.tz_localize("UTC").dt.tz_convert(WIB)
    df["fetched_at"] = pd.to_datetime(df["fetched_at"]).dt.tz_localize("UTC").dt.tz_convert(WIB)

    return df


def enrich_weather_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    Tambahkan kolom turunan:
    - temp_category (dingin, hangat, panas)
    - local_time, day_of_week, hour_of_day
    """
    if df.empty:
        return df

    def classify_temp(temp):
        if temp < 20:
            return "Cold"
        elif temp < 30:
            return "Warm"
        else:
            return "Hot"

    df["temp_category"] = df["temperature"].apply(classify_temp)
    df["day_of_week"] = df["timestamp"].dt.day_name()
    df["hour_of_day"] = df["timestamp"].dt.hour

    return df


raw = fetch_weather_all_cities()
clean = clean_weather_data(raw)
enriched = enrich_weather_data(clean)

print(enriched.head())


ERROR:root:Error fetching city ID 1650077: 401 Client Error: Unauthorized for url: https://api.openweathermap.org/data/2.5/weather?id=1650077&units=metric
ERROR:root:Error fetching city ID 1625822: 401 Client Error: Unauthorized for url: https://api.openweathermap.org/data/2.5/weather?id=1625822&units=metric
ERROR:root:Error fetching city ID 1835848: 401 Client Error: Unauthorized for url: https://api.openweathermap.org/data/2.5/weather?id=1835848&units=metric
ERROR:root:Error fetching city ID 5128581: 401 Client Error: Unauthorized for url: https://api.openweathermap.org/data/2.5/weather?id=5128581&units=metric
ERROR:root:Error fetching city ID 292223: 401 Client Error: Unauthorized for url: https://api.openweathermap.org/data/2.5/weather?id=292223&units=metric
ERROR:root:Error fetching city ID 1850147: 401 Client Error: Unauthorized for url: https://api.openweathermap.org/data/2.5/weather?id=1850147&units=metric
ERROR:root:Error fetching city ID 360630: 401 Client Error: Unauthorized

Empty DataFrame
Columns: []
Index: []


In [None]:
# Extract
raw_data = fetch_weather_all_cities()
if raw_data.empty:
    logging.error("No data fetched from API. Pipeline stopped.")


# Transform
cleaned = clean_weather_data(raw_data)
enriched = enrich_weather_data(cleaned)

if enriched.empty:
    logging.error("Transformed data is empty. Pipeline stopped.")


ERROR:root:Error fetching city ID 1650077: 401 Client Error: Unauthorized for url: https://api.openweathermap.org/data/2.5/weather?id=1650077&units=metric
ERROR:root:Error fetching city ID 1625822: 401 Client Error: Unauthorized for url: https://api.openweathermap.org/data/2.5/weather?id=1625822&units=metric
ERROR:root:Error fetching city ID 1835848: 401 Client Error: Unauthorized for url: https://api.openweathermap.org/data/2.5/weather?id=1835848&units=metric
ERROR:root:Error fetching city ID 5128581: 401 Client Error: Unauthorized for url: https://api.openweathermap.org/data/2.5/weather?id=5128581&units=metric
ERROR:root:Error fetching city ID 292223: 401 Client Error: Unauthorized for url: https://api.openweathermap.org/data/2.5/weather?id=292223&units=metric
ERROR:root:Error fetching city ID 1850147: 401 Client Error: Unauthorized for url: https://api.openweathermap.org/data/2.5/weather?id=1850147&units=metric
ERROR:root:Error fetching city ID 360630: 401 Client Error: Unauthorized

In [9]:
API_KEY = os.getenv("abcdefg")
BASE_URL = "https://api.openweathermap.org/data/2.5/weather"
CITY_LIST_PATH = "config/city_list.csv"

params = {
    "id": 1234567,
    "appid": API_KEY,
    "units": "metric"
}

print(BASE_URL, params)

https://api.openweathermap.org/data/2.5/weather {'id': 1234567, 'appid': None, 'units': 'metric'}
