In [2]:
pip install requests python-dotenv

Collecting python-dotenv
  Using cached python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1
Note: you may need to restart the kernel to use updated packages.




In [62]:
import requests
import os
from dotenv import load_dotenv
from datetime import datetime, timedelta
import time
import json
import pandas as pd

# loading environment variables (API key)

In [8]:
# load envrionment variables from the .env file
load_dotenv()

# get the API key from the .env file
api_key =os.getenv("API_KEY")

# getting latitude and longitude encodings for cities (list of cities in config file)

In [11]:
## function for getting lat & long encoding of cities

# Geocoding API endpoint
geocoding_url = "http://api.openweathermap.org/data/2.5/weather"

# List of cities
cities = ["Denver,CO,USA", "Austin,TX,USA", "Stuttgart,DE"]

# Loop through the cities and get their lat, lon
for city in cities:
    # Send GET request to the OpenWeatherMap Geocoding API
    response = requests.get(geocoding_url, params={
        'q': city,
        'appid': api_key
    })
    
    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        lat = data['coord']['lat']
        lon = data['coord']['lon']
        print(f"City: {city} - Latitude: {lat}, Longitude: {lon}")
    else:
        print(f"Failed to get data for {city}")
        
##then store this information within the config file

City: Denver,CO,USA - Latitude: 39.7392, Longitude: -104.9847
City: Austin,TX,USA - Latitude: 30.2711, Longitude: -97.7437
City: Stuttgart,DE - Latitude: 48.7823, Longitude: 9.177


# API call function

In [54]:
# api call to get the current weather 
import requests

def get_weather(api_key, lat, lon, exclude='minutely,daily,hourly', units='imperial', lang='en'):
    # Build the base URL for the OneCall API
    url = f"https://api.openweathermap.org/data/3.0/onecall"
    
    # Prepare the parameters for the API call
    params = {
        'lat': lat,
        'lon': lon,
        'appid': api_key,
        'units': units,  # 'imperial' for Fahrenheit, 'metric' for Celsius
        'lang': lang      # Language for the response
    }
    
    # Add the 'exclude' parameter if it's provided
    if exclude:
        params['exclude'] = exclude
    
    # Make the API request
    response = requests.get(url, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        # Print or process the data
        return data
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None

# Running the API call based upon cities in config file & then writing data to json file

In [55]:
# Step 1: Load the config file
with open('cities_config.json', 'r') as f:
    config_data = json.load(f)
    
weather_data = []

# Step 2: Loop through each city and use the data for API requests
for city in config_data['cities']:
    latitude = city['latitude']
    longitude = city['longitude']
    
    weather_data.append(get_weather(api_key, latitude, longitude))

In [56]:
# File path within the "data" folder
file_path = 'data/raw_weather_data.json'

# Check if the file exists to decide whether to append or create new
if os.path.exists(file_path):
    # If the file exists, load the existing data, then append new data
    with open(file_path, 'r') as json_file:
        existing_data = json.load(json_file)
        existing_data.extend(weather_data)
    
    # Append to the file
    with open(file_path, 'w') as json_file:
        json.dump(existing_data, json_file, indent=4)
else:
    # If the file doesn't exist, create it and write the new data
    with open(file_path, 'w') as json_file:
        json.dump(weather_data, json_file, indent=4)

print(f"Data saved to {file_path}")

Data saved to data/raw_weather_data.json


# clean the weather data and write it to a csv data file

In [69]:
# Function to convert Unix timestamp to local time
def convert_to_local_time(timestamp, offset):
    utc_time = datetime.utcfromtimestamp(timestamp)
    return utc_time + timedelta(seconds=offset)

In [81]:
# Initialize an empty list to store records
data = []

# Process each record
for record in weather_data:
    latitude, longitude = record["lat"], record["lon"]
    timezone = record["timezone"]
    timezone_offset = record["timezone_offset"]

    # Convert timestamps
    current_date = convert_to_local_time(record["current"]["dt"], timezone_offset).strftime('%Y-%m-%d')
    current_time = convert_to_local_time(record["current"]["dt"], timezone_offset).strftime('%H:%M:%S')
    sunrise = convert_to_local_time(record["current"]["sunrise"], timezone_offset).strftime('%H:%M:%S')
    sunset = convert_to_local_time(record["current"]["sunset"], timezone_offset).strftime('%H:%M:%S')

    # Extract weather details
    temp = record["current"]["temp"]
    feels_like = record["current"]["feels_like"]
    pressure = record["current"]["pressure"]
    humidity = record["current"]["humidity"]
    dew_point = record["current"]["dew_point"]
    uvi = record["current"]["uvi"]
    clouds = record["current"]["clouds"]
    visibility = record["current"]["visibility"]
    wind_speed = record["current"]["wind_speed"]
    wind_deg = record["current"]["wind_deg"]
    wind_gust = record["current"].get("wind_gust", 0)
    weather = record["current"]["weather"][0]
    weather_id = weather["id"]
    weather_main = weather["main"]
    weather_description = weather["description"]

    # Handle alerts (if any)
    alerts = record.get("alerts", [])
    alert_messages = "; ".join([alert["event"] + ": " + alert["description"] for alert in alerts])

    # Add the record to the data list
    data.append({
        "latitude": lat,
        "longitude": lon,
        "timezone": timezone,
        "timezone_offset": timezone_offset,
        "current_time": current_time,
        "current_date": current_date,
        "sunrise": sunrise,
        "sunset": sunset,
        "temp_F": temp,
        "feels_like_F": feels_like,
        "humidity": humidity,
        "dew_point": dew_point,
        "uvi": uvi,
        "clouds": clouds,
        "visibility": visibility,
        "wind_speed_mph": wind_speed,
        "wind_deg": wind_deg,
        "wind_gust_mph": wind_gust,
        "weather_id": weather_id,
        "weather_main": weather_main,
        "weather_description": weather_description,
        "alerts": alert_messages
    })

# Create a DataFrame from the data list
df = pd.DataFrame(data)

   latitude  longitude         timezone  timezone_offset current_time  \
0   48.7823      9.177   America/Denver           -25200     09:54:58   
1   48.7823      9.177  America/Chicago           -21600     10:54:58   
2   48.7823      9.177    Europe/Berlin             3600     17:54:58   

  current_date   sunrise    sunset  temp_F  feels_like_F  ...   uvi  clouds  \
0   2025-02-16  06:50:57  17:37:19   25.79         22.30  ...  1.93      45   
1   2025-02-16  07:10:08  18:20:12   43.21         34.30  ...  4.27       0   
2   2025-02-16  07:29:44  17:45:16   32.94         24.71  ...  0.00     100   

   visibility  wind_speed_mph  wind_deg  wind_gust_mph  weather_id  \
0       10000            3.00       217           3.00         802   
1       10000           21.85        10          28.77         800   
2       10000           10.36        60           0.00         804   

   weather_main  weather_description  \
0        Clouds     scattered clouds   
1         Clear            cl

In [82]:
df.head()

Unnamed: 0,latitude,longitude,timezone,timezone_offset,current_time,current_date,sunrise,sunset,temp_F,feels_like_F,...,uvi,clouds,visibility,wind_speed_mph,wind_deg,wind_gust_mph,weather_id,weather_main,weather_description,alerts
0,48.7823,9.177,America/Denver,-25200,09:54:58,2025-02-16,06:50:57,17:37:19,25.79,22.3,...,1.93,45,10000,3.0,217,3.0,802,Clouds,scattered clouds,
1,48.7823,9.177,America/Chicago,-21600,10:54:58,2025-02-16,07:10:08,18:20:12,43.21,34.3,...,4.27,0,10000,21.85,10,28.77,800,Clear,clear sky,
2,48.7823,9.177,Europe/Berlin,3600,17:54:58,2025-02-16,07:29:44,17:45:16,32.94,24.71,...,0.0,100,10000,10.36,60,0.0,804,Clouds,overcast clouds,frost: There is a risk of frost (level 1 of 2)...


In [83]:
# File path within the "data" folder
file_path = 'data/clean_weather_data.csv'

# Check if the file exists to decide whether to append or create new
if os.path.exists(file_path):
    # If the file exists, load the existing data, then append new data
    existing_data = pd.read_csv(file_path)
    updated_data = pd.concat([existing_data, df], ignore_index=True)
    
    # Append to the file
    updated_data.to_csv(file_path, index=False)
else:
    # If the file doesn't exist, create it and write the new data
    df.to_csv(file_path, index=False)

print(f"Data saved to {file_path}")


Data saved to data/clean_weather_data.csv


# write the cleaned data to a postgresql database