# Neural network

In [1]:
import pandas as pd

# Load the dataset
file_path = "OSEBX_Market_Macro_Data_2015_2024.csv"
df = pd.read_csv(file_path)

# Display basic information and first few rows
df.info(), df.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7080 entries, 0 to 7079
Data columns (total 34 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Date                     7080 non-null   object 
 1   Instrument               7080 non-null   object 
 2   First Trade Date         7080 non-null   object 
 3   ClosePrice               7080 non-null   float64
 4   OpenPrice                7080 non-null   float64
 5   Volume                   7080 non-null   float64
 6   BidPrice                 7080 non-null   float64
 7   AskPrice                 7080 non-null   float64
 8   DividendYield            7079 non-null   float64
 9   BookValuePerShare        7080 non-null   float64
 10  Beta                     7069 non-null   float64
 11  MarketCap                7080 non-null   float64
 12  CommonSharesOutstanding  7080 non-null   float64
 13  MonthlyReturn            7080 non-null   float64
 14  EconomicSector          

(None,
          Date Instrument First Trade Date  ClosePrice   OpenPrice   Volume  \
 0  2015-01-31    AFGA.OL       1997-09-08   79.420230   79.890172      0.0   
 1  2015-02-28    AFGA.OL       1997-09-08   85.059536   85.529479      0.0   
 2  2015-03-31    AFGA.OL       1997-09-08   93.048554   93.988438  29730.0   
 3  2015-04-30    AFGA.OL       1997-09-08   97.747976   93.988438  31574.0   
 4  2015-05-31    AFGA.OL       1997-09-08  105.267051  100.567629      0.0   
 
      BidPrice    AskPrice  DividendYield  BookValuePerShare  ...  \
 0   79.420230   80.595086            NaN          15.058302  ...   
 1   84.354623   85.059536       5.524862          15.723256  ...   
 2   92.578611   93.518496       5.050505          15.723256  ...   
 3   96.808091   97.747976       4.807692          15.723256  ...   
 4  104.327166  105.267051       4.464286          15.723256  ...   
 
    TurnoverRatio   BrentOil    USDNOK    EURNOK US10Y    USCPI USGDPGrowth  \
 0       0.000000  52.

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Drop rows with missing target variable (OSEBXReturns)
df_clean = df.dropna(subset=["OSEBXReturns"]).copy()

# Convert Date column to datetime format
df_clean["Date"] = pd.to_datetime(df_clean["Date"])

# Selecting Features (X) and Target (Y)
features = [
    "Momentum_3M", "Momentum_6M", "Momentum_12M",
    "Volatility_3M", "Volatility_6M", "Volatility_12M",
    "Volume", "TurnoverRatio", "BidAskSpread",
    "MarketCap", "DividendYield", "BookValuePerShare",
    "EarningsPerShare", "Beta", "USDNOK", "EURNOK",
    "US10Y", "USCPI", "USGDPGrowth", "NorgesBank10Y", "NorwegianCPI",
    "BrentOil"
]

target = "OSEBXReturns"

# Drop remaining rows with missing features
df_clean = df_clean.dropna(subset=features)

# Sorting dataset by Date
df_clean = df_clean.sort_values(by="Date")

# Splitting data into training (2015-2019), validation (2020-2022), and test (2023-2024)
train = df_clean[(df_clean["Date"].dt.year >= 2015) & (df_clean["Date"].dt.year <= 2019)]
valid = df_clean[(df_clean["Date"].dt.year >= 2020) & (df_clean["Date"].dt.year <= 2022)]
test = df_clean[(df_clean["Date"].dt.year >= 2023)]

# Extract features and target
X_train, y_train = train[features], train[target]
X_valid, y_valid = valid[features], valid[target]
X_test, y_test = test[features], test[target]

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)
X_test_scaled = scaler.transform(X_test)

# Convert back to DataFrame
X_train_scaled = pd.DataFrame(X_train_scaled, columns=features, index=X_train.index)
X_valid_scaled = pd.DataFrame(X_valid_scaled, columns=features, index=X_valid.index)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=features, index=X_test.index)

# Display the cleaned and split dataset info
display(df_clean)

# Summary
{
    "Train Set": X_train_scaled.shape,
    "Validation Set": X_valid_scaled.shape,
    "Test Set": X_test_scaled.shape,
    "Target Variable": target,
    "Feature Count": len(features)
}


Unnamed: 0,Date,Instrument,First Trade Date,ClosePrice,OpenPrice,Volume,BidPrice,AskPrice,DividendYield,BookValuePerShare,...,TurnoverRatio,BrentOil,USDNOK,EURNOK,US10Y,USCPI,USGDPGrowth,OSEBXReturns,NorgesBank10Y,NorwegianCPI
480,2015-01-31,AKSOA.OL,2014-09-29,24.114697,24.650013,0.0,24.114697,24.197544,11.898241,22.904350,...,0.0,52.990002,7.72540,8.72510,1.68,234.747,3.6,-0.020052,1.512,98.5
2760,2015-01-31,FLNG.OL,2007-04-19,86.190732,86.190732,0.0,86.190732,88.653324,10.452511,16.617404,...,0.0,52.990002,7.72540,8.72510,1.68,234.747,3.6,-0.020052,1.512,98.5
6600,2015-01-31,VEI.OL,1986-06-23,67.629914,67.841920,0.0,67.417908,67.629914,13.295551,17.980553,...,0.0,52.990002,7.72540,8.72510,1.68,234.747,3.6,-0.020052,1.512,98.5
4800,2015-01-31,NYKD.OL,2020-01-27,100.450000,102.000000,0.0,19.200000,19.200000,0.000000,3.134430,...,0.0,52.990002,7.72540,8.72510,1.68,234.747,3.6,-0.020052,1.512,98.5
600,2015-01-31,ATEA.OL,1985-03-28,75.787717,77.113450,0.0,75.566762,75.787717,73.954984,34.743722,...,0.0,52.990002,7.72540,8.72510,1.68,234.747,3.6,-0.020052,1.512,98.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5759,2024-12-31,SCHB.OL,2015-06-01,334.200000,332.600000,0.0,333.200000,334.200000,21.170455,185.422331,...,0.0,72.940002,11.32762,11.78811,4.58,317.603,2.3,-0.020052,3.599,137.6
1559,2024-12-31,BWLPG.OL,2013-11-21,125.300000,127.000000,0.0,125.300000,125.400000,25.788244,11.047649,...,0.0,72.940002,11.32762,11.78811,4.58,317.603,2.3,-0.020052,3.599,137.6
5879,2024-12-31,SNI.OL,1996-02-02,289.000000,292.000000,0.0,289.000000,290.000000,10.533452,35.611202,...,0.0,72.940002,11.32762,11.78811,4.58,317.603,2.3,-0.020052,3.599,137.6
4919,2024-12-31,NYKD.OL,2020-01-27,3.046000,3.000000,0.0,3.020000,3.046000,0.000000,0.569875,...,0.0,72.940002,11.32762,11.78811,4.58,317.603,2.3,-0.020052,3.599,137.6


{'Train Set': (3528, 22),
 'Validation Set': (2124, 22),
 'Test Set': (1416, 22),
 'Target Variable': 'OSEBXReturns',
 'Feature Count': 22}

In [4]:
pip install tensorflow scikit-learn pandas numpy matplotlib


Collecting tensorflow
  Downloading tensorflow-2.16.2-cp311-cp311-macosx_10_15_x86_64.whl.metadata (4.1 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=23.5.26 (from tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting h5py>=3.10.0 (from tensorflow)
  Downloading h5py-3.13.0-cp311-cp311-macosx_10_9_x86_64.whl.metadata (2.5 kB)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-macosx_10_9_x86_64.whl.metadata (5.2 kB)
Collecting ml-dtypes~=0.3.1 (from tensorflow)
  Downloading ml_dtypes-0.3.2-cp311-cp311-macosx_10_9_universal2.whl.metadata (20 kB)
Collecting opt-einsum

Downloading flatbuffers-25.2.10-py2.py3-none-any.whl (30 kB)
Downloading gast-0.6.0-py3-none-any.whl (21 kB)
Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.5/57.5 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading h5py-3.13.0-cp311-cp311-macosx_10_9_x86_64.whl (3.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading keras-3.8.0-py3-none-any.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
[?25hDownloading libclang-18.1.1-py2.py3-none-macosx_10_9_x86_64.whl (26.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.5/26.5 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading ml_dtypes-0.3.2-cp311-cp311-macosx_10_9_universal2.whl (389 kB)
[2K   [90m━━━━━━━━

In [5]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from sklearn.metrics import r2_score
import numpy as np

# Load your cleaned dataset (X_train_scaled, X_valid_scaled, etc.)
# Ensure you have run the data preprocessing steps before this!

# Set random seed for reproducibility
tf.random.set_seed(42)

# Define the Neural Network (NN3) Model
model = keras.Sequential([
    layers.Dense(32, activation="relu", kernel_regularizer=regularizers.l2(0.01), input_shape=(X_train_scaled.shape[1],)),
    layers.BatchNormalization(),
    layers.Dense(16, activation="relu", kernel_regularizer=regularizers.l2(0.01)),
    layers.BatchNormalization(),
    layers.Dense(8, activation="relu", kernel_regularizer=regularizers.l2(0.01)),
    layers.BatchNormalization(),
    layers.Dense(1, activation="linear")  # Output layer (predicting excess return)
])

# Compile the model
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse", metrics=["mae"])

# Train the model with early stopping
early_stopping = keras.callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)

history = model.fit(
    X_train_scaled, y_train,
    validation_data=(X_valid_scaled, y_valid),
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

# Evaluate the model
y_train_pred = model.predict(X_train_scaled).flatten()
y_valid_pred = model.predict(X_valid_scaled).flatten()
y_test_pred = model.predict(X_test_scaled).flatten()

# Compute R-squared
r2_train = r2_score(y_train, y_train_pred)
r2_valid = r2_score(y_valid, y_valid_pred)
r2_test = r2_score(y_test, y_test_pred)

# Print Results
print(f"Train R²: {r2_train:.4f}")
print(f"Validation R²: {r2_valid:.4f}")
print(f"Test R² (Out-of-Sample): {r2_test:.4f}")


ModuleNotFoundError: No module named 'tensorflow.python'