# Steps to Filter Data for Ethiopia and Merge with Brent Oil Prices
Load and Explore Economic Indicators

Start by loading and exploring the GDP Growth Rates data. We will then proceed with other indicators like Inflation Rates and Interest Rates.

In [2]:
import pandas as pd

# Load GDP Growth Rates
gdp_growth = pd.read_csv('../data_collected/Economic Indicators/GDP Growth Rates/API_NY.GDP.MKTP.KD.ZG_DS2_en_csv_v2_261404.csv', skiprows=4)
print(gdp_growth.head())

# Load Inflation Rates
inflation_rates = pd.read_csv('../data_collected/Economic Indicators/Inflation Rates/API_FP.CPI.TOTL_DS2_en_csv_v2_269369.csv', skiprows=4)
print(inflation_rates.head())

# Load Interest Rates
interest_rates = pd.read_csv('../data_collected/Economic Indicators/Interest Rates/API_FR.INR.RINR_DS2_en_csv_v2_262777.csv', skiprows=4)
print(interest_rates.head())

                  Country Name Country Code         Indicator Name  \
0                        Aruba          ABW  GDP growth (annual %)   
1  Africa Eastern and Southern          AFE  GDP growth (annual %)   
2                  Afghanistan          AFG  GDP growth (annual %)   
3   Africa Western and Central          AFW  GDP growth (annual %)   
4                       Angola          AGO  GDP growth (annual %)   

      Indicator Code  1960      1961      1962      1963      1964      1965  \
0  NY.GDP.MKTP.KD.ZG   NaN       NaN       NaN       NaN       NaN       NaN   
1  NY.GDP.MKTP.KD.ZG   NaN  0.460106  7.868013  5.616400  4.668135  5.138990   
2  NY.GDP.MKTP.KD.ZG   NaN       NaN       NaN       NaN       NaN       NaN   
3  NY.GDP.MKTP.KD.ZG   NaN  1.873455  3.707643  7.145784  5.406403  4.102491   
4  NY.GDP.MKTP.KD.ZG   NaN       NaN       NaN       NaN       NaN       NaN   

   ...      2015      2016      2017      2018      2019       2020  \
0  ... -0.623626  1.719625 

## Preprocess the Data

Clean and prepare the economic indicators data for analysis, focusing on Ethiopia.

In [5]:
import pandas as pd

# Function to preprocess the data
def preprocess_economic_data(file_path, country):
    # Load the dataset
    df = pd.read_csv(file_path, skiprows=4)
    
    # Filter for the specific country
    df = df[df['Country Name'] == country]
    
    # Drop unnecessary columns
    df = df.drop(columns=['Country Code', 'Indicator Name', 'Indicator Code'])
    
    # Remove columns with names that are not numeric (e.g., "Unnamed: 68")
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    
    # Melt the dataframe
    df = df.melt(id_vars=['Country Name'], var_name='Year', value_name='Value')
    
    # Convert 'Year' to numeric, forcing errors to NaN, then drop NaNs
    df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
    df = df.dropna(subset=['Year'])
    
    # Pivot the dataframe
    df = df.pivot(index='Year', columns='Country Name', values='Value')
    return df

# File paths
gdp_growth_path = '../data_collected/Economic Indicators/GDP Growth Rates/API_NY.GDP.MKTP.KD.ZG_DS2_en_csv_v2_261404.csv'
inflation_rates_path = '../data_collected/Economic Indicators/Inflation Rates/API_FP.CPI.TOTL_DS2_en_csv_v2_269369.csv'
interest_rates_path = '../data_collected/Economic Indicators/Interest Rates/API_FR.INR.RINR_DS2_en_csv_v2_262777.csv'

# Preprocess each dataset for Ethiopia
country = 'Ethiopia'
gdp_growth_clean = preprocess_economic_data(gdp_growth_path, country)
inflation_rates_clean = preprocess_economic_data(inflation_rates_path, country)
interest_rates_clean = preprocess_economic_data(interest_rates_path, country)

# Display the cleaned data
print("GDP Growth Rates (Ethiopia):")
print(gdp_growth_clean.head())

print("\nInflation Rates (Ethiopia):")
print(inflation_rates_clean.head())

print("\nInterest Rates (Ethiopia):")
print(interest_rates_clean.head())

FileNotFoundError: [Errno 2] No such file or directory: 'data_collected/Economic Indicators/GDP Growth Rates/API_NY.GDP.MKTP.KD.ZG_DS2_en_csv_v2_261404.csv'

## Merge Economic Indicators with Brent Oil Prices

Combine the Brent oil prices data with the cleaned economic indicators for Ethiopia.

In [None]:
# Load Brent oil prices
brent_oil_prices = pd.read_csv('data/processed/brent_oil_prices_with_features.csv', parse_dates=['Date'])
brent_oil_prices['Year'] = brent_oil_prices['Date'].dt.year

# Merge with economic indicators for Ethiopia
merged_data = brent_oil_prices.merge(gdp_growth_clean, left_on='Year', right_index=True, how='left', suffixes=('', '_GDP'))
merged_data = brent_oil_prices.merge(inflation_rates_clean, left_on='Year', right_index=True, how='left', suffixes=('', '_Inflation'))
merged_data = brent_oil_prices.merge(interest_rates_clean, left_on='Year', right_index=True, how='left', suffixes=('', '_Interest'))

print(merged_data.head())

## Advanced Time Series Modeling

Implement advanced time series models such as VAR, Markov-Switching ARIMA, and LSTM.

### Vector Autoregression (VAR)

In [None]:
from statsmodels.tsa.api import VAR

# Select relevant columns for VAR model
var_data = merged_data[['Price', 'Ethiopia_GDP', 'Ethiopia_Inflation', 'Ethiopia_Interest']].dropna()

# Fit VAR model
model = VAR(var_data)
var_results = model.fit(maxlags=15, ic='aic')
print(var_results.summary())

### Markov-Switching ARIMA

In [None]:
from statsmodels.tsa.regime_switching.markov_regression import MarkovRegression

# Fit Markov-Switching ARIMA model
markov_model = MarkovRegression(merged_data['Price'], k_regimes=2, trend='c', switching_variance=True)
markov_results = markov_model.fit()
print(markov_results.summary())

## LSTM (Long Short-Term Memory)

In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Prepare data for LSTM
def create_lstm_data(df, look_back=1):
    dataX, dataY = [], []
    for i in range(len(df)-look_back):
        a = df[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(df[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

# Normalize the data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(merged_data[['Price']].values)

# Create LSTM data
look_back = 3
trainX, trainY = create_lstm_data(scaled_data, look_back)

# Reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))

# Build LSTM model
model = Sequential()
model.add(LSTM(50, input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

# Fit the model
model.fit(trainX, trainY, epochs=20, batch_size=1, verbose=2)

# Make predictions
trainPredict = model.predict(trainX)

# Inverse transform predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])

# Evaluate the model
from sklearn.metrics import mean_squared_error
trainScore = mean_squared_error(trainY[0], trainPredict[:,0])
print('Train Score: %.2f RMSE' % (trainScore ** 0.5))