In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load datasets
train_data_path = 'top 5+ stock 2019-2023.xlsx'
forecast_data_path = 'top 5 2024_2033.xlsx'

# Read the data
train_data = pd.ExcelFile(train_data_path).parse(0)  # Assuming the first sheet contains data
forecast_data = pd.ExcelFile(forecast_data_path).parse(0)  # Assuming the first sheet contains data

# Inspect the data
print("Training data head:")
print(train_data.head())

print("\nForecast data head:")
print(forecast_data.head())

# Preprocessing the training data
top5_features = [col for col in train_data.columns if col not in ['Country', 'Year', 'Repurchase']]  # Identify feature columns

X = train_data[top5_features]  # Features
y = train_data['Repurchase']  # Target variable

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build the neural network model
model = Sequential([
    Dense(64, activation='relu', input_dim=X_train.shape[1]),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Train the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=16, verbose=1)

# Preprocessing the forecast data
forecast_features = forecast_data[top5_features]
forecast_scaled = scaler.transform(forecast_features)

# Predict repurchase values for 2024-2033
forecast_data['Repurchase_Predicted'] = model.predict(forecast_scaled)

# Reshape the output to match the required format
output_data = forecast_data[['Country', 'Year']].copy()
output_data['Parameter'] = 'Repurchase of Common Stock'
output_data['Value'] = forecast_data['Repurchase_Predicted']

# Save the results
output_path = 'repurchase_predictions_2024_2033.xlsx'
output_data.to_excel(output_path, index=False)
print(f"Predictions saved to {output_path}")


Training data head:
  Company_name                                   Parameters    2019    2020  \
0          BKR                   repurchase of common stock     0.0     0.0   
1          BKR                               dividends paid  -630.0  -278.0   
2          BKR  net cash flows used in financing activities  -695.0 -1007.0   
3          BKR  net cash flows used in investing activities -1445.0  -486.0   
4          BKR     proceeds from sales of equity securities  2669.0  2666.0   

     2021    2022    2023  
0  -434.0  -828.0  -538.0  
1  -161.0  -435.0  -576.0  
2  -838.0 -1798.0 -1671.0  
3  -534.0  -967.0 -1659.0  
4  2665.0  2664.0  2663.0  

Forecast data head:
  Company_name                                   Parameters      2024  \
0          BKR                               dividends paid  -570.824   
1          BKR  net cash flows used in financing activities -1917.007   
2          BKR  net cash flows used in investing activities -1714.013   
3          BKR     proce

KeyError: 'Repurchase'

In [5]:
print(train_data.columns)


Index(['Company_name', 'Parameters', 2019, 2020, 2021, 2022, 2023], dtype='object')


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load datasets
train_data_path = 'top 5+ stock 2019-2023.xlsx'
forecast_data_path = 'top 5 2024_2033.xlsx'

# Read the data
train_data = pd.ExcelFile(train_data_path).parse(0)  # Assuming the first sheet contains data
forecast_data = pd.ExcelFile(forecast_data_path).parse(0)  # Assuming the first sheet contains data

# Inspect the data
print("Training data head:")
print(train_data.head())

print("\nForecast data head:")
print(forecast_data.head())

# Filter and reshape the training data
train_data = train_data[train_data['Parameters'] == 'Repurchase of Common Stock']
train_data = train_data.melt(id_vars=['Company_name', 'Parameters'], 
                             var_name='Year', 
                             value_name='Repurchase')
train_data['Year'] = train_data['Year'].astype(int)  # Ensure year is an integer

# Select features and target
X = train_data[['Company_name', 'Year']]  # Features (Company name and Year for simplicity in this example)
y = train_data['Repurchase']  # Target variable

# Encode categorical variables (if any)
X = pd.get_dummies(X, columns=['Company_name'], drop_first=True)

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build the neural network model
model = Sequential([
    Dense(64, activation='relu', input_dim=X_train.shape[1]),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Train the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=16, verbose=1)

# Preprocess the forecast data
forecast_data = forecast_data[forecast_data['Parameters'] == 'Repurchase of Common Stock']
forecast_data = forecast_data.melt(id_vars=['Company_name', 'Parameters'], 
                                   var_name='Year', 
                                   value_name='Repurchase')
forecast_data['Year'] = forecast_data['Year'].astype(int)

forecast_X = forecast_data[['Company_name', 'Year']]
forecast_X = pd.get_dummies(forecast_X, columns=['Company_name'], drop_first=True)
forecast_X_scaled = scaler.transform(forecast_X)

# Predict repurchase values for 2024-2033
forecast_data['Repurchase_Predicted'] = model.predict(forecast_X_scaled)

# Reshape the output to match the required format
output_data = forecast_data[['Company_name', 'Year']].copy()
output_data['Parameter'] = 'Repurchase of Common Stock'
output_data['Value'] = forecast_data['Repurchase_Predicted']

# Save the results
output_path = 'repurchase_predictions_2024_2033.xlsx'
output_data.to_excel(output_path, index=False)
print(f"Predictions saved to {output_path}")


Training data head:
  Company_name                                   Parameters    2019    2020  \
0          BKR                   repurchase of common stock     0.0     0.0   
1          BKR                               dividends paid  -630.0  -278.0   
2          BKR  net cash flows used in financing activities  -695.0 -1007.0   
3          BKR  net cash flows used in investing activities -1445.0  -486.0   
4          BKR     proceeds from sales of equity securities  2669.0  2666.0   

     2021    2022    2023  
0  -434.0  -828.0  -538.0  
1  -161.0  -435.0  -576.0  
2  -838.0 -1798.0 -1671.0  
3  -534.0  -967.0 -1659.0  
4  2665.0  2664.0  2663.0  

Forecast data head:
  Company_name                                   Parameters      2024  \
0          BKR                               dividends paid  -570.824   
1          BKR  net cash flows used in financing activities -1917.007   
2          BKR  net cash flows used in investing activities -1714.013   
3          BKR     proce

ValueError: Found array with 0 sample(s) (shape=(0, 1)) while a minimum of 1 is required by StandardScaler.