# Testing scripts, retrieving weather data

In [1]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Access the API key
api_key = os.getenv("API_KEY")

In [2]:
import requests
from datetime import date, datetime, timedelta
import time

In [3]:
# Testing geocoding to get the latlong data of a city
def get_city_coordinates(city, api_key):
    url = f"http://api.openweathermap.org/geo/1.0/direct?q={city}&limit=1&appid={api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data:
            lat = data[0]['lat']
            lon = data[0]['lon']
            return lat, lon
        else:
            print(f"City '{city}' not found.")
            return None, None
    else:
        print("Error fetching coordinates:", response.status_code)
        return None, None

city = "Surabaya"
# city = "Jakarta"
lat, lon = get_city_coordinates(city, api_key)
print(lat)
print(lon)

-7.2459717
112.7378266


In [4]:
# Function to fetch weather data
def get_weather(lat, lon, part, api_key):
    url = f"https://api.openweathermap.org/data/3.0/onecall?lat={lat}&lon={lon}&exclude={part}&appid={api_key}&units=metric"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        return data
    else:
        print("Error:", response.status_code)
        return None

# Function to display weather data
def display_weather(data):
    if data:
        temp = data["current"]["temp"]
        weather = data["current"]["weather"][0]["description"]
        humidity = data["current"]["humidity"]
        wind_speed = data["current"]["wind_speed"]

        # print(f"Weather in {city}:")
        print(f"Temperature: {temp}°C")
        print(f"Condition: {weather}")
        print(f"Humidity: {humidity}%")
        print(f"Wind Speed: {wind_speed} m/s")
    else:
        print("No data to display.")

# Main program
if __name__ == "__main__":
    city_name = input("Enter the city name: ")
    part = 'minutely,hourly'
    weather_data = get_weather(lat, lon, part, api_key)
    display_weather(weather_data)

Temperature: 30.57°C
Condition: few clouds
Humidity: 79%
Wind Speed: 6.17 m/s


In [5]:
dt_utc = datetime.utcnow()
dt_loc = datetime.now()
test_dt_24 = 1735707600
test_conv_utc = datetime.utcfromtimestamp(test_dt_24)
test_conv_wib = datetime.fromtimestamp(test_dt_24)
print(dt_utc)
print(dt_utc.timestamp())
print(dt_loc)
print(dt_loc.timestamp())
print("Timestamp UTC:", test_conv_utc)
print("Timestamp WIB:", test_conv_wib)

2025-02-03 09:29:30.465777
1738549770.465777
2025-02-03 16:29:30.465817
1738574970.465817
Timestamp UTC: 2025-01-01 05:00:00
Timestamp WIB: 2025-01-01 12:00:00


In [6]:
print(datetime(2024 + 1, 1, 1, 12, 0, 0)) # WIB
print(datetime(2024 + 1, 1, 1, 12, 0, 0).timestamp()) # WIB
print(datetime.fromtimestamp(datetime(2024 + 1, 1, 1, 12, 0, 0).timestamp()))
print(datetime.utcfromtimestamp(datetime(2024 + 1, 1, 1, 12, 0, 0).timestamp()))

2025-01-01 12:00:00
1735707600.0
2025-01-01 12:00:00
2025-01-01 05:00:00


In [19]:
i = 2024
test_1 = datetime(i, 1, 1, 12, 0, 0) # timestamp already in WIB
test_2 = test_1.timestamp() # UNIX
test_3 = datetime.utcfromtimestamp(test_2) # UTC from UNIX
test_4 = test_1 + timedelta(days=1)
print(test_1)
print(test_2)
print(test_3)
print(test_4)
print(test_4.year)
print(test_4.month)

2024-01-01 12:00:00
1704085200.0
2024-01-01 05:00:00
2024-01-02 12:00:00
2024
1


In [28]:
def get_historical_weather(lat, lon, api_key, days=30):
    """Fetch historical weather data for the last 'days' days."""
    base_url = base_url = f"https://api.openweathermap.org/data/3.0/onecall/timemachine"
    weather_data = []

    for i in range(days):
        # Calculate the timestamp for each day
        ts_d_min_i = datetime.utcnow() - timedelta(days=i+1) # Timestamp of day minus (i+1)
        timestamp = int(ts_d_min_i.timestamp())
        params = {
            "lat": lat,
            "lon": lon,
            "dt": timestamp,
            "appid": api_key,
            "units": "metric" # Optional: Metric units for temperature
        }

        # Make the API call
        response = requests.get(base_url, params=params)
        if response.status_code == 200:
            day_data = response.json()
            weather_data.append({
                "timestamp_utc": timestamp,
                "timezone_offset": day_data["timezone_offset"],
                "date_utc": datetime.utcfromtimestamp(timestamp).strftime('%Y-%m-%d'),
                "weather": day_data["data"][0]["weather"][0]["main"],
                "description": day_data["data"][0]["weather"][0]["description"],
                "temperature": day_data["data"][0]["temp"],
                "pressure": day_data["data"][0]["pressure"],
                "humidity": day_data["data"][0]["humidity"],
                "dew_point": day_data["data"][0]["dew_point"],
                "wind_speed": day_data["data"][0]["wind_speed"]
            })
        else:
            print(f"Error fetching weather data for day {i+1}: {response.status_code}")
            break

        # Avoid hitting rate limits
        time.sleep(1)

    return weather_data

In [29]:
import csv

# Save the weather data to a CSV file
def save_to_csv(data, city_name):
    filename = "{}_weather.csv".format(city_name.lower())
    with open(filename, mode="w", newline="") as file:
        writer = csv.DictWriter(
            file,
            fieldnames=[
                "timestamp_unix",
                "timezone_offset",
                "date_wib",
                "weather",
                "description",
                "temperature",
                "pressure",
                "humidity",
                "dew_point",
                "wind_speed"
                ])
        writer.writeheader()
        writer.writerows(data)
    print(f"Weather data saved to {filename}")


In [None]:
def test_get_historical_weather(lat, lon, api_key, month=1):
    # """Fetch historical weather data for the defined year."""
    base_url = base_url = f"https://api.openweathermap.org/data/3.0/onecall/timemachine"
    weather_data = []
    # ts = datetime(year, 1, 1, 12, 0, 0) # timestamp already in WIB/UTC+7
    ts = datetime(2024, month, 1, 12, 0, 0) # timestamp already in WIB/UTC+7
    i = 0

    while month == month:
        dt = int(ts.timestamp()) # converting timestamp to UNIX
        params = {
            "lat": lat,
            "lon": lon,
            "dt": dt,
            "appid": api_key,
            "units": "metric" # Optional: Metric units for temperature
        }

        # Make the API call
        response = requests.get(base_url, params=params)
        if response.status_code == 200:
            day_data = response.json()
            weather_data.append({
                "timestamp_unix": dt,
                "timezone_offset": day_data["timezone_offset"], # offset to be added to timestamp_utc to convert it to GMT+7
                "date_wib": datetime.fromtimestamp(dt).strftime('%Y-%m-%d'),
                "weather": day_data["data"][0]["weather"][0]["main"],
                "description": day_data["data"][0]["weather"][0]["description"],
                "temperature": day_data["data"][0]["temp"],
                "pressure": day_data["data"][0]["pressure"],
                "humidity": day_data["data"][0]["humidity"],
                "dew_point": day_data["data"][0]["dew_point"],
                "wind_speed": day_data["data"][0]["wind_speed"]
            })
        else:
            print(f"Error fetching weather data for day {i+1}: {response.status_code}")
            break

        # Avoid hitting rate limits
        time.sleep(1)
        
        ts = ts + timedelta(days=1)
        i += 1
        month = ts.month

        # Avoid hitting rate limits
        time.sleep(1)

    return weather_data

In [30]:
def main():
    # Change to your desired city
    city_name = "Surabaya"
    # city_name = input("Enter the city name: ")
    
    # Number of past days to retrieve
    month = 1

    # Step 1: Get city coordinates
    lat, lon = get_city_coordinates(city_name, api_key)
    if lat is None or lon is None:
        return

    # Step 2: Fetch historical weather data
    weather_data = get_historical_weather(lat, lon, api_key, month)

    # Step 3: Display or process the data
    for day in weather_data:
        timestamp = day["timestamp_unix"]
        date = day["date_wib"]
        temp = day["temperature"]
        weather = day["weather"]
        weather_desc = day["description"]
        print(f"Date: {date}, Time: {timestamp}, Temp: {temp}°C, Weather: {weather}, {weather_desc}")

    # Step 4: Store the weather data to a CSV file
    save_to_csv(weather_data, city_name)

if __name__ == "__main__":
    main()

KeyboardInterrupt: 

# EDA

In [26]:
# Load the CSV data
filename = "surabaya_weather.csv"  # Replace with your actual CSV file
weather_data = pd.read_csv(filename)

# Convert date to a datetime object for better handling
weather_data['date_utc'] = pd.to_datetime(weather_data['date_utc'])

In [None]:
weather_data.head()

In [None]:
weather_data.info()

In [None]:
# Test
weather_data["timestamp_utc"].head()
weather_data["timestamp_utc"].head().apply(lambda x: datetime.fromtimestamp(x))


In [None]:
weather_data["timestamp_wib"] = weather_data["timestamp_utc"] + weather_data["timezone_offset"]
weather_data["timestamp_wib"] = weather_data["timestamp_wib"].apply(lambda x: datetime.fromtimestamp(x))
weather_data["timestamp_wib"].head()

In [None]:
weather_data["date_wib"] = weather_data["timestamp_wib"].dt.normalize()
weather_data["date_wib"].head()

In [None]:
weather_data.head()

In [None]:
weather_data.info()

In [None]:
weather_data[['temperature', 'pressure', 'humidity', 'dew_point', 'date_wib']].describe()

# Testing Viz

In [49]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Line chart

In [None]:
# Plot temperature trends
plt.figure(figsize=(10, 6))
sns.lineplot(x='date_wib', y='temperature', data=weather_data, marker='o', label='Temperature')
plt.title('Daily Temperature in Surabaya')
plt.xlabel('Date')
plt.ylabel('Temperature (°C)')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
sns.lineplot(data=weather_data, x="date_wib", y="temperature", label="Temperature")

In [None]:
sns.lineplot(data=weather_data, x="date_wib", y="humidity", label="Humidity")

# Distribution

In [None]:
plt.hist(weather_data["temperature"], bins=10, edgecolor="black")
plt.xlabel("Temperature (°C)")
plt.ylabel("Frequency")
plt.title("Temperature Distribution in Surabaya")
plt.show()

In [None]:
sns.histplot(
    data=weather_data,
    x="temperature",
    bins=10
)
plt.show()

# Correlation

In [None]:
corr = weather_data[['temperature', 'pressure', 'humidity', 'dew_point']].corr()
sns.heatmap(corr, annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap of Weather Data")
plt.show()

In [None]:
# Temperature v dew point
sns.scatterplot(
    data=weather_data,
    x="temperature",
    y="dew_point",
    hue="weather"
)
plt.show()

In [None]:
# Temperature v pressure
sns.scatterplot(
    data=weather_data,
    x="temperature",
    y="pressure",
    hue="weather"
)
plt.show()

In [None]:
# Temperature v humidity
sns.scatterplot(
    data=weather_data,
    x="temperature",
    y="humidity",
    hue="weather"
)
plt.show()

# Bar

In [None]:
weather_data["weather"].value_counts().plot(kind="bar")
plt.title("Weather Frequency")
plt.ylabel("Count")
plt.show()

In [None]:
sns.countplot(weather_data, x="weather")
plt.show()

In [None]:
weather_data["description"].value_counts().plot(kind="bar")
plt.title("Weather Condition Frequency")
plt.xlabel("Condition")
plt.ylabel("Count")
plt.show()

In [None]:
sns.countplot(weather_data, x="description")
plt.show()

# Finding the extreme weather days

In [None]:
hottest_day = weather_data.loc[weather_data["temperature"].idxmax()]
coldest_day = weather_data.loc[weather_data["temperature"].idxmin()]
print(f"Hottest day: {hottest_day['date_wib']} with {hottest_day['temperature']}°C")
print(f"Coldest day: {coldest_day['date_wib']} with {coldest_day['temperature']}°C")


# Summary statistic

In [None]:
weather_data.groupby("weather")["temperature"].mean()

## Interactive Viz

In [68]:
import plotly.express as px

In [None]:
# Interactive temperature plot
fig = px.line(weather_data, x='date', y='temperature', title='Daily Temperature in Surabaya',
              labels={'temperature': 'Temperature (°C)', 'date': 'Date'})
fig.show()