In [2]:
pip install requests python-dotenv

Note: you may need to restart the kernel to use updated packages.




In [3]:
import requests
import os
from dotenv import load_dotenv
from datetime import datetime, timedelta
import time
import json
import pandas as pd

# loading environment variables (API key)

In [4]:
# load envrionment variables from the .env file
load_dotenv()

# get the API key from the .env file
api_key =os.getenv("API_KEY")

# getting latitude and longitude encodings for cities (list of cities in config file)

In [5]:
## function for getting lat & long encoding of cities

# Geocoding API endpoint
geocoding_url = "http://api.openweathermap.org/data/2.5/weather"

# List of cities
cities = ["Denver,CO,USA", "Austin,TX,USA", "Stuttgart,DE"]

# Loop through the cities and get their lat, lon
for city in cities:
    # Send GET request to the OpenWeatherMap Geocoding API
    response = requests.get(geocoding_url, params={
        'q': city,
        'appid': api_key
    })
    
    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        lat = data['coord']['lat']
        lon = data['coord']['lon']
        print(f"City: {city} - Latitude: {lat}, Longitude: {lon}")
    else:
        print(f"Failed to get data for {city}")
        
##then store this information within the config file

City: Denver,CO,USA - Latitude: 39.7392, Longitude: -104.9847
City: Austin,TX,USA - Latitude: 30.2711, Longitude: -97.7437
City: Stuttgart,DE - Latitude: 48.7823, Longitude: 9.177


# API call function

In [27]:
# api call to get the current weather 
import requests

def get_weather(api_key, city, lat, lon, exclude='minutely,daily,hourly', units='imperial', lang='en'):
    # Build the base URL for the OneCall API
    url = f"https://api.openweathermap.org/data/3.0/onecall"
    
    # Prepare the parameters for the API call
    params = {
        'lat': lat,
        'lon': lon,
        'appid': api_key,
        'units': units,  # 'imperial' for Fahrenheit, 'metric' for Celsius
        'lang': lang      # Language for the response
    }
    
    # Add the 'exclude' parameter if it's provided
    if exclude:
        params['exclude'] = exclude
    
    # Make the API request
    response = requests.get(url, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        # Print or process the data
        data['City']=city
        print(data)
        return data
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None

# Running the API call based upon cities in config file & then writing data to json file

In [28]:
# Step 1: Load the config file
with open('cities_config.json', 'r') as f:
    config_data = json.load(f)
    
weather_data = []

# Step 2: Loop through each city and use the data for API requests
for city in config_data['cities']:
    latitude = city['latitude']
    longitude = city['longitude']
    city_name = city['name']
    
    weather_data.append(get_weather(api_key, city_name, latitude, longitude))

{'lat': 39.7392, 'lon': -104.9847, 'timezone': 'America/Denver', 'timezone_offset': -25200, 'current': {'dt': 1739808083, 'sunrise': 1739800179, 'sunset': 1739839108, 'temp': 29.61, 'feels_like': 25.3, 'pressure': 1012, 'humidity': 77, 'dew_point': 24.01, 'uvi': 0.97, 'clouds': 99, 'visibility': 10000, 'wind_speed': 4, 'wind_deg': 202, 'weather': [{'id': 804, 'main': 'Clouds', 'description': 'overcast clouds', 'icon': '04d'}]}, 'City': 'Denver,CO,USA'}
{'lat': 30.2711, 'lon': -97.7437, 'timezone': 'America/Chicago', 'timezone_offset': -21600, 'current': {'dt': 1739808083, 'sunrise': 1739797753, 'sunset': 1739838059, 'temp': 45.75, 'feels_like': 43.21, 'pressure': 1025, 'humidity': 62, 'dew_point': 33.49, 'uvi': 3, 'clouds': 14, 'visibility': 10000, 'wind_speed': 5.01, 'wind_deg': 149, 'wind_gust': 8.01, 'weather': [{'id': 801, 'main': 'Clouds', 'description': 'few clouds', 'icon': '02d'}]}, 'City': 'Austin,TX,USA'}
{'lat': 48.7823, 'lon': 9.177, 'timezone': 'Europe/Berlin', 'timezone_o

In [29]:
# File path within the "data" folder
file_path = 'data/raw_weather_data.json'

# Check if the file exists to decide whether to append or create new
if os.path.exists(file_path):
    # If the file exists, load the existing data, then append new data
    with open(file_path, 'r') as json_file:
        existing_data = json.load(json_file)
        existing_data.extend(weather_data)
    
    # Append to the file
    with open(file_path, 'w') as json_file:
        json.dump(existing_data, json_file, indent=4)
else:
    # If the file doesn't exist, create it and write the new data
    with open(file_path, 'w') as json_file:
        json.dump(weather_data, json_file, indent=4)

print(f"Data saved to {file_path}")

Data saved to data/raw_weather_data.json


# clean the weather data and write it to csv data file, all data and data to insert into database

In [9]:
# Function to convert Unix timestamp to local time
def convert_to_local_time(timestamp, offset):
    utc_time = datetime.utcfromtimestamp(timestamp)
    return utc_time + timedelta(seconds=offset)

In [30]:
# Initialize an empty list to store records
data = []

# Process each record
for record in weather_data:
    latitude, longitude = record["lat"], record["lon"]
    timezone = record["timezone"]
    timezone_offset = record["timezone_offset"]
    city = record['City']

    # Convert timestamps
    current_date = convert_to_local_time(record["current"]["dt"], timezone_offset).strftime('%Y-%m-%d')
    current_time = convert_to_local_time(record["current"]["dt"], timezone_offset).strftime('%H:%M:%S')
    sunrise = convert_to_local_time(record["current"]["sunrise"], timezone_offset).strftime('%H:%M:%S')
    sunset = convert_to_local_time(record["current"]["sunset"], timezone_offset).strftime('%H:%M:%S')

    # Extract weather details
    temp = record["current"]["temp"]
    feels_like = record["current"]["feels_like"]
    pressure = record["current"]["pressure"]
    humidity = record["current"]["humidity"]
    dew_point = record["current"]["dew_point"]
    uvi = record["current"]["uvi"]
    clouds = record["current"]["clouds"]
    visibility = record["current"]["visibility"]
    wind_speed = record["current"]["wind_speed"]
    wind_deg = record["current"]["wind_deg"]
    wind_gust = record["current"].get("wind_gust", 0)
    weather = record["current"]["weather"][0]
    weather_id = weather["id"]
    weather_main = weather["main"]
    weather_description = weather["description"]

    # Handle alerts (if any)
    alerts = record.get("alerts", [])
    alert_messages = "; ".join([alert["event"] + ": " + alert["description"] for alert in alerts])

    # Add the record to the data list
    data.append({
        "latitude": latitude,
        "longitude": longitude,
        "timezone": timezone,
        "timezone_offset": timezone_offset,
        "city": city,
        "current_time": current_time,
        "current_date": current_date,
        "sunrise": sunrise,
        "sunset": sunset,
        "temp_F": temp,
        "feels_like_F": feels_like,
        "humidity": humidity,
        "dew_point": dew_point,
        "uvi": uvi,
        "clouds": clouds,
        "visibility": visibility,
        "wind_speed_mph": wind_speed,
        "wind_deg": wind_deg,
        "wind_gust_mph": wind_gust,
        "weather_id": weather_id,
        "weather_main": weather_main,
        "weather_description": weather_description,
        "alerts": alert_messages
    })

# Create a DataFrame from the data list
df = pd.DataFrame(data)

In [31]:
df.head()

Unnamed: 0,latitude,longitude,timezone,timezone_offset,city,current_time,current_date,sunrise,sunset,temp_F,...,uvi,clouds,visibility,wind_speed_mph,wind_deg,wind_gust_mph,weather_id,weather_main,weather_description,alerts
0,39.7392,-104.9847,America/Denver,-25200,"Denver,CO,USA",09:01:23,2025-02-17,06:49:39,17:38:28,29.61,...,0.97,99,10000,4.0,202,0.0,804,Clouds,overcast clouds,
1,30.2711,-97.7437,America/Chicago,-21600,"Austin,TX,USA",10:01:23,2025-02-17,07:09:13,18:20:59,45.75,...,3.0,14,10000,5.01,149,8.01,801,Clouds,few clouds,
2,48.7823,9.177,Europe/Berlin,3600,"Stuttgart,DE",17:01:23,2025-02-17,07:27:58,17:46:55,36.95,...,0.0,0,10000,8.05,80,0.0,800,Clear,clear sky,frost: There is a risk of frost (level 1 of 2)...


In [83]:
# File path within the "data" folder
file_path = 'data/clean_weather_data.csv'

# Check if the file exists to decide whether to append or create new
if os.path.exists(file_path):
    # If the file exists, load the existing data, then append new data
    existing_data = pd.read_csv(file_path)
    updated_data = pd.concat([existing_data, df], ignore_index=True)
    
    # Append to the file
    updated_data.to_csv(file_path, index=False)
else:
    # If the file doesn't exist, create it and write the new data
    df.to_csv(file_path, index=False)

print(f"Data saved to {file_path}")


Data saved to data/clean_weather_data.csv


In [32]:
# File path within the "data" folder
file_path = 'data/db_ready_data.csv'

# Check if the file exists to decide whether to append or create new
if os.path.exists(file_path):
    # If the file exists, load the existing data, then append new data
    existing_data = pd.read_csv(file_path)
    updated_data = pd.concat([existing_data, df], ignore_index=True)
    
    # Append to the file
    updated_data.to_csv(file_path, index=False)
else:
    # If the file doesn't exist, create it and write the new data
    df.to_csv(file_path, index=False)

print(f"Data saved to {file_path}")


Data saved to data/db_ready_data.csv


# write the cleaned data to a postgresql database

In [None]:
## need to create database in postgres
## need to create schema in postgres
## need to figure out how to write/insert data correct from df to database