In [None]:
import pandas as pd
import requests
from datetime import datetime, timedelta

# Suppress FutureWarning
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Your API keys
EIA_API_KEY = 'NdeYqSkXdfEcdBe5v2lK3CQ3EbUmthN1plxBnGOC'
OPENWEATHERMAP_API_KEY = 'a6b6e1dd5687468e47b01971cbe9d6e0'

def get_renewable_production(start_date, end_date, state_code):
    base_url = 'https://api.eia.gov/v2/electricity/electric-power-operational-data/data/'
    all_data = []
    current_start = start_date
    
    while current_start <= end_date:
        params = {
            'api_key': EIA_API_KEY,
            'frequency': 'monthly',
            'data[0]': 'generation',
            'facets[fueltypeid][]': ['SUN', 'WND'],  # Solar and Wind
            'facets[location][]': [state_code],  # State code (e.g., 'NY' for New York)
            'start': current_start,
            'end': end_date,
            'sort[0][column]': 'period',
            'sort[0][direction]': 'asc',
            'length': 5000  # Maximum allowed by the API
        }
        response = requests.get(base_url, params=params)
        if response.status_code == 200:
            data = response.json()['response']['data']
            all_data.extend(data)
            if len(data) < 5000:
                break
            current_start = data[-1]['period']
        else:
            print(f"Error fetching data: {response.status_code}")
            print(f"Response content: {response.text}")
            return None

    print(f"Total records fetched: {len(all_data)}")
    print(f"Date range in fetched data: from {all_data[0]['period']} to {all_data[-1]['period']}")
    
    df = pd.DataFrame(all_data)
    df['date'] = pd.to_datetime(df['period'])
    df['generation'] = pd.to_numeric(df['generation'], errors='coerce')
    
    df = df.groupby(['date', 'fueltypeid'])['generation'].sum().reset_index()
    df_pivot = df.pivot(index='date', columns='fueltypeid', values='generation')
    df_pivot.columns.name = None
    df_pivot = df_pivot.reset_index()
    return df_pivot

def get_weather_data(lat, lon, start_date, end_date):
    base_url = "http://history.openweathermap.org/data/2.5/history/city"
    all_data = []
    current_date = start_date

    while current_date <= end_date:
        params = {
            'lat': lat,
            'lon': lon,
            'type': 'month',
            'start': int(current_date.timestamp()),
            'end': int(min(current_date + timedelta(days=30), end_date).timestamp()),
            'appid': OPENWEATHERMAP_API_KEY,
            'units': 'metric'
        }
        response = requests.get(base_url, params=params)
        if response.status_code == 200:
            data = response.json()
            all_data.extend(data['list'])
            current_date += timedelta(days=30)
        else:
            print(f"Error fetching data: {response.status_code}")
            print(f"Response content: {response.text}")
            return None

    df = pd.DataFrame(all_data)
    df['date'] = pd.to_datetime(df['dt'], unit='s')
    df['temp'] = df['temp'].apply(lambda x: x['day'])
    df['wind_speed'] = df['speed']
    df['precipitation'] = df['rain'].fillna(0)

    df = df[['date', 'temp', 'wind_speed', 'precipitation']]
    return df

# Get data
start_date = datetime.strptime('2003-01-01', '%Y-%m-%d')
end_date = datetime.strptime('2023-12-31', '%Y-%m-%d')
state_code = 'NY'  # New York
state_coordinates = (42.6526, -73.7562)  # Albany, NY (rough center of the state)

# Fetch renewable energy production data for New York
production_data = get_renewable_production(start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'), state_code)

if production_data is not None:
    print("Renewable Energy Production Data for New York:")
    print(production_data.head())
    print(f"Date range in production data: from {production_data['date'].min()} to {production_data['date'].max()}")
    print(f"Number of months in production data: {len(production_data)}")

# Fetch weather data for New York (using Albany's coordinates)
weather_data = get_weather_data(state_coordinates[0], state_coordinates[1], start_date, end_date)
print("\nWeather Data:")
print(weather_data.head())
print(f"Date range in weather data: from {weather_data['date'].min()} to {weather_data['date'].max()}")
print(f"Number of days in weather data: {len(weather_data)}")

# Merge datasets
if production_data is not None and weather_data is not None:
    weather_data['month'] = weather_data['date'].dt.to_period('M')
    monthly_weather = weather_data.groupby('month').mean().reset_index()
    monthly_weather['date'] = monthly_weather['month'].dt.to_timestamp()

    merged_data = pd.merge(production_data, monthly_weather, on='date')

    print("\nMerged Production and Weather Data:")
    print(merged_data.head())

    # Save to CSV
    merged_data.to_csv(f'../data/{state_code}_renewable_energy_weather_data.csv', index=False)

In [112]:
merged_data.shape

(36, 7)