<a href="https://colab.research.google.com/github/mickeytheretriever/weatherAPIminiproject/blob/main/MikhailDanilov_ETL_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# load libraries

import pandas as pd
import numpy as np
import requests
from datetime import date, timedelta

In [23]:
# downloading API weather data from open-meteo.com for 5 major Portuguese cities

# Define the cities with their coordinates (latitude, longitude)
cities = {
    "Lisbon": {"latitude": 38.7223, "longitude": -9.1393},
    "Porto": {"latitude": 41.1579, "longitude": -8.6291},
    "Braga": {"latitude": 41.5454, "longitude": -8.4132},
    "Faro": {"latitude": 37.0194, "longitude": -7.9223},
    "Evora": {"latitude": 38.5709, "longitude": -7.9137}
}

# Calculate start and end dates for the 7-day forecast
today = date.today()
tomorrow = today + timedelta(days=1)
end_date = tomorrow + timedelta(days=6) # 7 days starting from tomorrow

# Base URL for Open-Meteo API
BASE_URL = "https://api.open-meteo.com/v1/forecast"

# Parameters for the daily weather forecast
params = {
    "daily": "temperature_2m_max,temperature_2m_min,precipitation_sum,rain_sum,wind_speed_10m_max,weather_code",
    "timezone": "Europe/Lisbon",
    "start_date": tomorrow.isoformat(),
    "end_date": end_date.isoformat()
}


In [24]:


all_weather_data = []

for city_name, coords in cities.items():
    print(f"Fetching data for {city_name}...")
    city_params = params.copy()
    city_params["latitude"] = coords["latitude"]
    city_params["longitude"] = coords["longitude"]

    response = requests.get(BASE_URL, params=city_params)
    response.raise_for_status() # Raise an exception for HTTP errors
    data = response.json()

    if "daily" in data:
        daily_data = data["daily"]
        # Create a DataFrame for the current city's data
        df_city = pd.DataFrame({
            "date": pd.to_datetime(daily_data["time"]),
            "city": city_name,
            "Maximum Daily Temperature, C": daily_data["temperature_2m_max"],
            "Minimum Daily Temperature, C": daily_data["temperature_2m_min"],
            "Precipitation, mm": daily_data["precipitation_sum"],
            "Rain, mm": daily_data["rain_sum"],
            "Max Wind Speed, m/sec": daily_data["wind_speed_10m_max"],
            "Weather Code": daily_data["weather_code"]
        })
        all_weather_data.append(df_city)

# Concatenate all city DataFrames into a single DataFrame
weather_data = pd.concat(all_weather_data, ignore_index=True)

print("Weather data downloaded successfully.")

Fetching data for Lisbon...
Fetching data for Porto...
Fetching data for Braga...
Fetching data for Faro...
Fetching data for Evora...
Weather data downloaded successfully.


In [25]:
# Display the first few rows of the combined DataFrame
display(weather_data.head())

Unnamed: 0,date,city,"Maximum Daily Temperature, C","Minimum Daily Temperature, C","Precipitation, mm","Rain, mm","Max Wind Speed, m/sec",Weather Code
0,2025-12-12,Lisbon,13.2,11.5,33.0,29.1,13.3,63
1,2025-12-13,Lisbon,16.3,11.4,0.2,0.2,18.4,3
2,2025-12-14,Lisbon,15.7,11.9,0.0,0.0,16.1,1
3,2025-12-15,Lisbon,14.1,10.7,2.7,1.5,13.9,80
4,2025-12-16,Lisbon,12.8,10.1,1.2,0.0,28.8,80


In [26]:
#adding columns with Temperature Data in Fahrenheit using data in C

weather_data['Maximum Daily Temperature, F'] = (weather_data['Maximum Daily Temperature, C'] * 9/5) + 32
weather_data['Minimum Daily Temperature, F'] = (weather_data['Minimum Daily Temperature, C'] * 9/5) + 32

display(weather_data.head())

Unnamed: 0,date,city,"Maximum Daily Temperature, C","Minimum Daily Temperature, C","Precipitation, mm","Rain, mm","Max Wind Speed, m/sec",Weather Code,"Maximum Daily Temperature, F","Minimum Daily Temperature, F"
0,2025-12-12,Lisbon,13.2,11.5,33.0,29.1,13.3,63,55.76,52.7
1,2025-12-13,Lisbon,16.3,11.4,0.2,0.2,18.4,3,61.34,52.52
2,2025-12-14,Lisbon,15.7,11.9,0.0,0.0,16.1,1,60.26,53.42
3,2025-12-15,Lisbon,14.1,10.7,2.7,1.5,13.9,80,57.38,51.26
4,2025-12-16,Lisbon,12.8,10.1,1.2,0.0,28.8,80,55.04,50.18


In [27]:
# adding weather description based on the weather code to the list

def get_weather_description(code):
    descriptions = {
        0: "Clear sky",
        1: "Mainly clear",
        2: "Partly cloudy",
        3: "Overcast",
        45: "Fog",
        48: "Depositing rime fog",
        51: "Light drizzle",
        53: "Moderate drizzle",
        55: "Dense drizzle",
        56: "Light freezing drizzle",
        57: "Dense freezing drizzle",
        61: "Slight rain",
        63: "Moderate rain",
        65: "Heavy rain",
        66: "Light freezing rain",
        67: "Heavy freezing rain",
        71: "Slight snowfall",
        73: "Moderate snowfall",
        75: "Heavy snowfall",
        77: "Snow grains",
        80: "Slight rain showers",
        81: "Moderate rain showers",
        82: "Violent rain showers",
        85: "Slight snow showers",
        86: "Heavy snow showers",
        95: "Thunderstorm",
        96: "Thunderstorm with slight hail",
        99: "Thunderstorm with heavy hail"
    }
    return descriptions.get(code, "Unknown")

weather_data['Expected Weather'] = weather_data['Weather Code'].apply(get_weather_description)

display(weather_data.head())

Unnamed: 0,date,city,"Maximum Daily Temperature, C","Minimum Daily Temperature, C","Precipitation, mm","Rain, mm","Max Wind Speed, m/sec",Weather Code,"Maximum Daily Temperature, F","Minimum Daily Temperature, F",Expected Weather
0,2025-12-12,Lisbon,13.2,11.5,33.0,29.1,13.3,63,55.76,52.7,Moderate rain
1,2025-12-13,Lisbon,16.3,11.4,0.2,0.2,18.4,3,61.34,52.52,Overcast
2,2025-12-14,Lisbon,15.7,11.9,0.0,0.0,16.1,1,60.26,53.42,Mainly clear
3,2025-12-15,Lisbon,14.1,10.7,2.7,1.5,13.9,80,57.38,51.26,Slight rain showers
4,2025-12-16,Lisbon,12.8,10.1,1.2,0.0,28.8,80,55.04,50.18,Slight rain showers


In [30]:
# Creating a weekly weather report based on weather_data containing information on
def get_mode(series):
    if not series.empty:
        return series.mode()[0]
    return None

weekly_weather_report = weather_data.groupby('city').agg({
    'Maximum Daily Temperature, C': 'max',
    'Minimum Daily Temperature, C': 'min',
    'Precipitation, mm': 'sum',
    'Rain, mm': 'sum',
    'Max Wind Speed, m/sec': 'max',
    'Expected Weather': get_mode
}).reset_index()

weekly_weather_report = weekly_weather_report.rename(columns={
    'Maximum Daily Temperature, C': 'Weekly Maximum, C',
    'Minimum Daily Temperature, C': 'Weekly Minimum, C',
    'Precipitation, mm': 'Weekly Precipitation, mm',
    'Rain, mm': 'Weekly Rain, mm',
    'Max Wind Speed, m/sec': 'Max Wind Speed, m/sec',
    'Expected Weather': 'Expected weather'
})

display(weekly_weather_report)

Unnamed: 0,city,"Weekly Maximum, C","Weekly Minimum, C","Weekly Precipitation, mm","Weekly Rain, mm","Max Wind Speed, m/sec",Expected weather
0,Braga,16.8,0.7,91.2,74.9,29.8,Thunderstorm
1,Evora,15.5,4.8,29.2,22.1,33.3,Overcast
2,Faro,18.6,10.4,24.0,15.1,31.6,Partly cloudy
3,Lisbon,16.6,10.1,40.1,30.8,28.8,Slight rain showers
4,Porto,17.6,8.1,77.33,59.5,46.5,Slight rain


In [31]:
# Export weather_data to a CSV file
weather_data.to_csv('weather_data.csv', index=False)
print("weather_data.csv has been created successfully.")

weather_data.csv has been created successfully.


In [32]:
# Export weekly_weather_report to a CSV file
weekly_weather_report.to_csv('weekly_weather_report.csv', index=False)
print("weekly_weather_report.csv has been created successfully.")

weekly_weather_report.csv has been created successfully.
