In [9]:
import sys
import os
import pandas as pd

# Add the 'src' directory to the Python path
src_path = os.path.join(os.getcwd(), '..', 'src')
sys.path.append(src_path)

# Import your function
from data_collection import fetch_data_from_fred

# Define your FRED API key
api_key = 'e916710d165717e6348556cdce8111f3'

# Define the series IDs
series_ids = {
    'Unemployment_Rate': 'UNRATE',
    'GDP_Growth': 'A191RL1Q225SBEA',
    'CPI': 'CPIAUCSL',
    'Interest_Rate': 'FEDFUNDS',
    'Money_Supply': 'M2SL',
    'PPI': 'PPIACO',
    'Consumer_Confidence': 'UMCSENT'
}

# Output directory
output_dir = 'C:/Users/ghkjs/OneDrive/바탕 화면/EconomicIndicatorPrediction/data/processed/FillingMissingValue'

# Step 1: Collect starting and ending dates
starting_dates = []
ending_dates = []

for name, series_id in series_ids.items():
    # Fetch the data
    data = fetch_data_from_fred(series_id, api_key)
    
    # Ensure the index is datetime
    data.index = pd.to_datetime(data.index)
    
    # Print the columns to check
    print(f"Data columns for {name}: {data.columns}")
    
    # Record the first and last date
    starting_dates.append(data.index.min())
    ending_dates.append(data.index.max())

# Step 2: Find the latest starting date and earliest ending date
latest_start_date = max(starting_dates)
earliest_end_date = min(ending_dates)
print(f"Latest starting date among all series: {latest_start_date.date()}")
print(f"Earliest ending date among all series: {earliest_end_date.date()}")

# Step 3: Process each series
dataframes = {}
for name, series_id in series_ids.items():
    # Fetch the data
    data = fetch_data_from_fred(series_id, api_key)
    
    # Ensure the index is datetime
    data.index = pd.to_datetime(data.index)
    
    # Trim data to the date range
    data = data[(data.index >= latest_start_date) & (data.index <= earliest_end_date)]
    
    # Resample to monthly frequency
    data_monthly = data.resample('ME').mean()
    
    # Apply linear interpolation
    data_interpolated = data_monthly.interpolate(method='linear')
    
    # Set the index name to 'Date' so it appears in the CSV
    data_interpolated.index.name = 'Date'
    
    # Store the dataframe
    dataframes[name] = data_interpolated
    
    # Save to CSV
    data_interpolated_csv_file_path = os.path.join(output_dir, f'{name}_monthly_linear_interpolation.csv')
    data_interpolated.to_csv(data_interpolated_csv_file_path, index=True)
    print(f'Data saved to {data_interpolated_csv_file_path}')


Data columns for Unemployment_Rate: Index(['UNRATE'], dtype='object')
Data columns for GDP_Growth: Index(['A191RL1Q225SBEA'], dtype='object')
Data columns for CPI: Index(['CPIAUCSL'], dtype='object')
Data columns for Interest_Rate: Index(['FEDFUNDS'], dtype='object')
Data columns for Money_Supply: Index(['M2SL'], dtype='object')
Data columns for PPI: Index(['PPIACO'], dtype='object')
Data columns for Consumer_Confidence: Index(['UMCSENT'], dtype='object')
Latest starting date among all series: 1959-01-01
Earliest ending date among all series: 2024-07-01
Data saved to C:/Users/ghkjs/OneDrive/바탕 화면/EconomicIndicatorPrediction/data/processed/FillingMissingValue\Unemployment_Rate_monthly_linear_interpolation.csv
Data saved to C:/Users/ghkjs/OneDrive/바탕 화면/EconomicIndicatorPrediction/data/processed/FillingMissingValue\GDP_Growth_monthly_linear_interpolation.csv
Data saved to C:/Users/ghkjs/OneDrive/바탕 화면/EconomicIndicatorPrediction/data/processed/FillingMissingValue\CPI_monthly_linear_inter

  values = values.astype(str)
