In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Load the training data
training_data = pd.read_excel("top 5+ stock 2019-2023.xlsx")

# Normalize parameter names in training data
training_data["Parameters"] = training_data["Parameters"].str.lower().str.strip()

# Transform the data for training
training_data_melted = training_data.melt(
    id_vars=["Company_name", "Parameters"], 
    var_name="Year", 
    value_name="Value"
)

# Pivot the data to have years as columns
training_data_pivot = training_data_melted.pivot_table(
    index=["Company_name", "Parameters"],
    columns="Year",
    values="Value"
).reset_index()

# Extract features and target columns
features = training_data_pivot.iloc[:, 2:-1].values  # All years except the last one
target = training_data_pivot.iloc[:, -1].values  # The last year as the target

# Normalize the features and target
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)
target_scaled = scaler.fit_transform(target.reshape(-1, 1))

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    features_scaled, target_scaled, test_size=0.2, random_state=42
)

# Define the neural network model
model = Sequential([
    Dense(128, activation="relu", input_dim=X_train.shape[1]),
    Dropout(0.2),
    Dense(64, activation="relu"),
    Dropout(0.2),
    Dense(1, activation="linear")
])

model.compile(optimizer="adam", loss="mse", metrics=["mae"])

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32
)

# Load the forecast data
forecast_data = pd.read_excel("top 5 2024_2033.xlsx")

# Normalize parameter names in forecast data
forecast_data["Parameters"] = forecast_data["Parameters"].str.lower().str.strip()

# Define the parameter to forecast
parameter_to_forecast = "repurchase of common stock".lower().strip()

# Check if the parameter exists in the forecast data
if parameter_to_forecast not in forecast_data["Parameters"].values:
    available_parameters = forecast_data["Parameters"].unique()
    raise ValueError(
        f"No matching data found in forecast data for '{parameter_to_forecast}'.\n"
        f"Available parameters: {available_parameters}"
    )

# Filter the forecast data for the desired parameter
forecast_data_filtered = forecast_data[forecast_data["Parameters"] == parameter_to_forecast]

# Prepare the forecast features
forecast_features = forecast_data_filtered.iloc[:, 2:].values
forecast_features_scaled = scaler.transform(forecast_features)

# Predict future values
predicted_values_scaled = model.predict(forecast_features_scaled)

# Inverse scale the predictions
predicted_values = scaler.inverse_transform(predicted_values_scaled)

# Add the predictions to the forecast data
forecast_data_filtered.loc[:, "Predicted Values"] = predicted_values

# Save the updated forecast data
forecast_data_filtered.to_csv("forecasted_data_with_predictions.csv", index=False)

print("Forecasting complete. Results saved to 'forecasted_data_with_predictions.csv'.")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step - loss: 0.8176 - mae: 0.8855 - val_loss: 0.7799 - val_mae: 0.8830
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 0.7236 - mae: 0.8385 - val_loss: 0.7061 - val_mae: 0.8403
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 657ms/step - loss: 0.6658 - mae: 0.8047 - val_loss: 0.6391 - val_mae: 0.7994
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 331ms/step - loss: 0.6293 - mae: 0.7761 - val_loss: 0.5759 - val_mae: 0.7588
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 0.6098 - mae: 0.7705 - val_loss: 0.5158 - val_mae: 0.7182
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 559ms/step - loss: 0.5013 - mae: 0.7011 - val_loss: 0.4584 - val_mae: 0.6769
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 255ms/step - loss: 0.4630 - mae: 0.6

ValueError: No matching data found in forecast data for 'repurchase of common stock'.
Available parameters: ['dividends paid' 'net cash flows used in financing activities'
 'net cash flows used in investing activities'
 'proceeds from sales of equity securities' 'total current assets'
 'all other current assets' 'other intangible assets, net'
 'other non-operating income' 'proceeds from issuance of long-term debt'
 'property, plant, and equipment less accumulated depreciation'
 'accumulated other comprehensive loss' 'contract & other deferred assets'
 'depreciation & amortization' 'net cash paid for acquisitions'
 'changes in operating assets & liabilities (total)'
 'cost of services sold' 'gross profit' 'sales of services'
 'ebitda margin' 'increase (decrease) in cash & cash equivalents'
 'operating cash flow margin' 'operating income' 'sales of goods'
 'all other current liabilities']

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load datasets (replace file paths with actual locations of your datasets)
data_2019_2023 = pd.read_excel("top 5+ stock 2019-2023.xlsx")
data_2024_2033 = pd.read_excel("top 5 2024_2033.xlsx")

# Extract relevant data
repurchase_data = data_2019_2023[data_2019_2023['Parameters'] == 'repurchase of common stock']
top_5_parameters = data_2019_2023[data_2019_2023['Parameters'] != 'repurchase of common stock']

# Reshape and align datasets
# Melt datasets to align by year
top_5_data_flattened = top_5_parameters.melt(id_vars=['Company_name', 'Parameters'], 
                                             var_name='Year', 
                                             value_name='Value')
repurchase_flattened = repurchase_data.melt(id_vars=['Company_name', 'Parameters'], 
                                            var_name='Year', 
                                            value_name='Repurchase')

# Merge datasets
merged_data = pd.merge(top_5_data_flattened, repurchase_flattened, on=['Company_name', 'Year'])
merged_data.rename(columns={'Parameters_x': 'Parameters', 'Parameters_y': 'Repurchase_Parameter'}, inplace=True)

# Compute top 5 parameters by correlation
correlations = (
    merged_data.groupby('Parameters')
    .apply(lambda group: group['Value'].corr(group['Repurchase']))
    .sort_values(ascending=False)
    .head(5)
)
top_5_selected_params = correlations.index.tolist()

# Filter data for top 5 parameters
filtered_data = merged_data[merged_data['Parameters'].isin(top_5_selected_params)]

# Prepare features and target
data_pivoted = filtered_data.pivot_table(index=['Company_name', 'Year'], 
                                         columns='Parameters', 
                                         values='Value')
target = filtered_data.drop_duplicates(subset=['Company_name', 'Year']).set_index(['Company_name', 'Year'])['Repurchase']

# Align features and target
data_pivoted, target = data_pivoted.align(target, join='inner', axis=0)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data_pivoted, target, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build neural network model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer for regression
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, validation_split=0.2, verbose=1)

# Evaluate the model
test_loss, test_mae = model.evaluate(X_test_scaled, y_test, verbose=1)
print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")

# Load forecast data for 2024-2033
forecast_data_flattened = data_2024_2033.melt(id_vars=['Company_name', 'Parameters'], 
                                              var_name='Year', 
                                              value_name='Value')
forecast_filtered = forecast_data_flattened[forecast_data_flattened['Parameters'].isin(top_5_selected_params)]

# Prepare features for prediction
forecast_features = forecast_filtered.pivot_table(index=['Company_name', 'Year'], 
                                                   columns='Parameters', 
                                                   values='Value')

# Standardize forecast features
forecast_features_scaled = scaler.transform(forecast_features)

# Make predictions
predictions = model.predict(forecast_features_scaled)

# Prepare output
forecast_features['Predicted_Repurchase'] = predictions
forecast_features.reset_index(inplace=True)
forecast_features = forecast_features[['Company_name', 'Year', 'Predicted_Repurchase']]

# Save predictions to CSV
forecast_features.to_csv("predicted_repurchase_2024_2033.csv", index=False)
print("Predictions saved to 'predicted_repurchase_2024_2033.csv'")


Epoch 1/50


  .apply(lambda group: group['Value'].corr(group['Repurchase']))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 4361462.0000 - mae: 1187.5000 - val_loss: 2554.4470 - val_mae: 30.0000
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 418ms/step - loss: 4361460.0000 - mae: 1187.4990 - val_loss: 2554.3940 - val_mae: 30.0000
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step - loss: 4361457.5000 - mae: 1187.4980 - val_loss: 2554.3411 - val_mae: 30.0000
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step - loss: 4361455.5000 - mae: 1187.4971 - val_loss: 2554.2881 - val_mae: 30.0000
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 283ms/step - loss: 4361453.5000 - mae: 1187.4961 - val_loss: 2554.2354 - val_mae: 30.0000
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 350ms/step - loss: 4361450.5000 - mae: 1187.4951 - val_loss: 2554.1821 - val_mae: 30.0000
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load datasets (replace file paths with actual locations of your datasets)
data_2019_2023 = pd.read_excel("top 5+ stock 2019-2023.xlsx")
data_2024_2033 = pd.read_excel("top 5 2024_2033.xlsx")

# Extract relevant data
repurchase_data = data_2019_2023[data_2019_2023['Parameters'] == 'repurchase of common stock']
top_5_parameters = data_2019_2023[data_2019_2023['Parameters'] != 'repurchase of common stock']

# Reshape and align datasets
# Melt datasets to align by year
top_5_data_flattened = top_5_parameters.melt(id_vars=['Company_name', 'Parameters'], 
                                             var_name='Year', 
                                             value_name='Value')
repurchase_flattened = repurchase_data.melt(id_vars=['Company_name', 'Parameters'], 
                                            var_name='Year', 
                                            value_name='Repurchase')

# Merge datasets
merged_data = pd.merge(top_5_data_flattened, repurchase_flattened, on=['Company_name', 'Year'])
merged_data.rename(columns={'Parameters_x': 'Parameters', 'Parameters_y': 'Repurchase_Parameter'}, inplace=True)

# Compute top 5 parameters by correlation
correlations = (
    merged_data.groupby('Parameters')
    .apply(lambda group: group['Value'].corr(group['Repurchase']))
    .sort_values(ascending=False)
    .head(5)
)
top_5_selected_params = correlations.index.tolist()

# Filter data for top 5 parameters
filtered_data = merged_data[merged_data['Parameters'].isin(top_5_selected_params)]

# Prepare features and target
data_pivoted = filtered_data.pivot_table(index=['Company_name', 'Year'], 
                                         columns='Parameters', 
                                         values='Value')
target = filtered_data.drop_duplicates(subset=['Company_name', 'Year']).set_index(['Company_name', 'Year'])['Repurchase']

# Align features and target
data_pivoted, target = data_pivoted.align(target, join='inner', axis=0)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data_pivoted, target, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build a more robust neural network model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer for regression
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
model.fit(X_train_scaled, y_train, epochs=100, batch_size=16, validation_split=0.2, verbose=1)

# Evaluate the model
test_loss, test_mae = model.evaluate(X_test_scaled, y_test, verbose=1)
print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")

# Load forecast data for 2024-2033
forecast_data_flattened = data_2024_2033.melt(id_vars=['Company_name', 'Parameters'], 
                                              var_name='Year', 
                                              value_name='Value')
forecast_filtered = forecast_data_flattened[forecast_data_flattened['Parameters'].isin(top_5_selected_params)]

# Prepare features for prediction
forecast_features = forecast_filtered.pivot_table(index=['Company_name', 'Year'], 
                                                   columns='Parameters', 
                                                   values='Value')

# Standardize forecast features
forecast_features_scaled = scaler.transform(forecast_features)

# Make predictions
predictions = model.predict(forecast_features_scaled)

# Prepare output
forecast_features['Predicted_Repurchase'] = predictions
forecast_features.reset_index(inplace=True)
forecast_features = forecast_features[['Company_name', 'Year', 'Predicted_Repurchase']]

# Save predictions to CSV
forecast_features.to_csv("predicted_repurchase_2024_2033.csv", index=False)
print("Predictions saved to 'predicted_repurchase_2024_2033.csv'")


  .apply(lambda group: group['Value'].corr(group['Repurchase']))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - loss: 4361462.0000 - mae: 1187.5000 - val_loss: 2554.4470 - val_mae: 30.0000
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - loss: 4361460.0000 - mae: 1187.4990 - val_loss: 2554.3940 - val_mae: 30.0000
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step - loss: 4361457.5000 - mae: 1187.4980 - val_loss: 2554.3411 - val_mae: 30.0000
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step - loss: 4361455.5000 - mae: 1187.4971 - val_loss: 2554.2881 - val_mae: 30.0000
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step - loss: 4361453.0000 - mae: 1187.4961 - val_loss: 2554.2354 - val_mae: 30.0000
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 317ms/step - loss: 4361450.5000 - mae: 1187.4951 - val_loss: 2554.1821 - val_mae: 30.0000
Epoch 7/100
[1m1/1

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Load datasets (replace file paths with actual locations of your datasets)
data_2019_2023 = pd.read_excel("top 5+ stock 2019-2023.xlsx")
data_2024_2033 = pd.read_excel("top 5 2024_2033.xlsx")

# Extract relevant data
repurchase_data = data_2019_2023[data_2019_2023['Parameters'] == 'repurchase of common stock']
top_5_parameters = data_2019_2023[data_2019_2023['Parameters'] != 'repurchase of common stock']

# Reshape and align datasets
# Melt datasets to align by year
top_5_data_flattened = top_5_parameters.melt(id_vars=['Company_name', 'Parameters'], 
                                             var_name='Year', 
                                             value_name='Value')
repurchase_flattened = repurchase_data.melt(id_vars=['Company_name', 'Parameters'], 
                                            var_name='Year', 
                                            value_name='Repurchase')

# Merge datasets
merged_data = pd.merge(top_5_data_flattened, repurchase_flattened, on=['Company_name', 'Year'])
merged_data.rename(columns={'Parameters_x': 'Parameters', 'Parameters_y': 'Repurchase_Parameter'}, inplace=True)

# Compute top 5 parameters by correlation
correlations = (
    merged_data.groupby('Parameters')
    .apply(lambda group: group['Value'].corr(group['Repurchase']))
    .sort_values(ascending=False)
    .head(5)
)
top_5_selected_params = correlations.index.tolist()

# Filter data for top 5 parameters
filtered_data = merged_data[merged_data['Parameters'].isin(top_5_selected_params)]

# Prepare features and target
data_pivoted = filtered_data.pivot_table(index=['Company_name', 'Year'], 
                                         columns='Parameters', 
                                         values='Value')
target = filtered_data.drop_duplicates(subset=['Company_name', 'Year']).set_index(['Company_name', 'Year'])['Repurchase']

# Align features and target
data_pivoted, target = data_pivoted.align(target, join='inner', axis=0)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data_pivoted, target, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build an enhanced neural network model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.2),  # Add dropout for regularization
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer for regression
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
model.fit(X_train_scaled, y_train, epochs=200, batch_size=16, validation_split=0.2, verbose=1)

# Evaluate the model
test_loss, test_mae = model.evaluate(X_test_scaled, y_test, verbose=1)
print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")

# Load forecast data for 2024-2033
forecast_data_flattened = data_2024_2033.melt(id_vars=['Company_name', 'Parameters'], 
                                              var_name='Year', 
                                              value_name='Value')
forecast_filtered = forecast_data_flattened[forecast_data_flattened['Parameters'].isin(top_5_selected_params)]

# Prepare features for prediction
forecast_features = forecast_filtered.pivot_table(index=['Company_name', 'Year'], 
                                                   columns='Parameters', 
                                                   values='Value')

# Standardize forecast features
forecast_features_scaled = scaler.transform(forecast_features)

# Make predictions
predictions = model.predict(forecast_features_scaled)

# Prepare output
forecast_features['Predicted_Repurchase'] = predictions
forecast_features.reset_index(inplace=True)
forecast_features = forecast_features[['Company_name', 'Year', 'Predicted_Repurchase']]

# Save predictions to CSV
forecast_features.to_csv("predicted_repurchase_2024_2033.csv", index=False)
print("Predictions saved to 'predicted_repurchase_2024_2033.csv'")


Epoch 1/200


  .apply(lambda group: group['Value'].corr(group['Repurchase']))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - loss: 4361462.0000 - mae: 1187.5000 - val_loss: 2554.4470 - val_mae: 30.0000
Epoch 2/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 274ms/step - loss: 4361460.0000 - mae: 1187.4990 - val_loss: 2554.3940 - val_mae: 30.0000
Epoch 3/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step - loss: 4361458.0000 - mae: 1187.4980 - val_loss: 2554.3411 - val_mae: 30.0000
Epoch 4/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step - loss: 4361455.0000 - mae: 1187.4971 - val_loss: 2554.2881 - val_mae: 30.0000
Epoch 5/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step - loss: 4361453.5000 - mae: 1187.4961 - val_loss: 2554.2354 - val_mae: 30.0000
Epoch 6/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step - loss: 4361450.5000 - mae: 1187.4951 - val_loss: 2554.1821 - val_mae: 30.0000
Epoch 7/200
[1m1/1[0m [32m━━

In [None]:
#1
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.losses import Huber

# Load datasets (replace file paths with actual locations of your datasets)
data_2019_2023 = pd.read_excel("top 5+ stock 2019-2023.xlsx")
data_2024_2033 = pd.read_excel("top 5 2024_2033.xlsx")

# Extract relevant data
repurchase_data = data_2019_2023[data_2019_2023['Parameters'] == 'repurchase of common stock']
top_5_parameters = data_2019_2023[data_2019_2023['Parameters'] != 'repurchase of common stock']

# Reshape and align datasets
# Melt datasets to align by year
top_5_data_flattened = top_5_parameters.melt(id_vars=['Company_name', 'Parameters'], 
                                             var_name='Year', 
                                             value_name='Value')
repurchase_flattened = repurchase_data.melt(id_vars=['Company_name', 'Parameters'], 
                                            var_name='Year', 
                                            value_name='Repurchase')

# Merge datasets
merged_data = pd.merge(top_5_data_flattened, repurchase_flattened, on=['Company_name', 'Year'])
merged_data.rename(columns={'Parameters_x': 'Parameters', 'Parameters_y': 'Repurchase_Parameter'}, inplace=True)

# Compute top 5 parameters by correlation
correlations = (
    merged_data.groupby('Parameters')
    .apply(lambda group: group['Value'].corr(group['Repurchase']))
    .sort_values(ascending=False)
    .head(5)
)
top_5_selected_params = correlations.index.tolist()

# Filter data for top 5 parameters
filtered_data = merged_data[merged_data['Parameters'].isin(top_5_selected_params)]

# Prepare features and target
data_pivoted = filtered_data.pivot_table(index=['Company_name', 'Year'], 
                                         columns='Parameters', 
                                         values='Value')
target = filtered_data.drop_duplicates(subset=['Company_name', 'Year']).set_index(['Company_name', 'Year'])['Repurchase']

# Align features and target
data_pivoted, target = data_pivoted.align(target, join='inner', axis=0)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data_pivoted, target, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build an enhanced neural network model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    BatchNormalization(),  # Normalize activations
    Dropout(0.3),  # Add dropout for regularization
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer for regression
])

# Compile the model with Huber loss
model.compile(optimizer='adam', loss=Huber(), metrics=['mae'])

# Train the model
model.fit(X_train_scaled, y_train, epochs=300, batch_size=16, validation_split=0.2, verbose=1)

# Evaluate the model
test_loss, test_mae = model.evaluate(X_test_scaled, y_test, verbose=1)
print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")

# Load forecast data for 2024-2033
forecast_data_flattened = data_2024_2033.melt(id_vars=['Company_name', 'Parameters'], 
                                              var_name='Year', 
                                              value_name='Value')
forecast_filtered = forecast_data_flattened[forecast_data_flattened['Parameters'].isin(top_5_selected_params)]

# Prepare features for prediction
forecast_features = forecast_filtered.pivot_table(index=['Company_name', 'Year'], 
                                                   columns='Parameters', 
                                                   values='Value')

# Standardize forecast features
forecast_features_scaled = scaler.transform(forecast_features)

# Make predictions
predictions = model.predict(forecast_features_scaled)

# Prepare output
forecast_features['Predicted_Repurchase'] = predictions
forecast_features.reset_index(inplace=True)
forecast_features = forecast_features[['Company_name', 'Year', 'Predicted_Repurchase']]

# Save predictions to CSV
forecast_features.to_csv("predicted_repurchase_2024_2033.csv", index=False)
print("Predictions saved to 'predicted_repurchase_2024_2033.csv'")


Epoch 1/300


  .apply(lambda group: group['Value'].corr(group['Repurchase']))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step - loss: 1187.0000 - mae: 1187.5000 - val_loss: 29.6248 - val_mae: 30.0000
Epoch 2/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 349ms/step - loss: 1186.9990 - mae: 1187.4990 - val_loss: 29.6245 - val_mae: 30.0000
Epoch 3/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 240ms/step - loss: 1186.9980 - mae: 1187.4980 - val_loss: 29.6243 - val_mae: 30.0000
Epoch 4/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 297ms/step - loss: 1186.9971 - mae: 1187.4971 - val_loss: 29.6240 - val_mae: 30.0000
Epoch 5/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 193ms/step - loss: 1186.9961 - mae: 1187.4961 - val_loss: 29.6238 - val_mae: 30.0000
Epoch 6/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 471ms/step - loss: 1186.9951 - mae: 1187.4951 - val_loss: 29.6235 - val_mae: 30.0000
Epoch 7/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.losses import Huber

# Load datasets (replace file paths with actual locations of your datasets)
data_2019_2023 = pd.read_excel("top 5+ stock 2019-2023.xlsx")
data_2024_2033 = pd.read_excel("top 5 2024_2033.xlsx")

# Ensure column names are stripped of spaces
data_2019_2023.columns = data_2019_2023.columns.str.strip()
data_2024_2033.columns = data_2024_2033.columns.str.strip()

# Extract relevant data
repurchase_data = data_2019_2023[data_2019_2023['Parameters'].str.strip() == 'repurchase of common stock']
top_5_parameters = data_2019_2023[data_2019_2023['Parameters'].str.strip() != 'repurchase of common stock']

# Reshape and align datasets
# Melt datasets to align by year
top_5_data_flattened = top_5_parameters.melt(id_vars=['Company_name', 'Parameters'], 
                                             var_name='Year', 
                                             value_name='Value')
repurchase_flattened = repurchase_data.melt(id_vars=['Company_name', 'Parameters'], 
                                            var_name='Year', 
                                            value_name='Repurchase')

# Merge datasets
merged_data = pd.merge(top_5_data_flattened, repurchase_flattened, on=['Company_name', 'Year'], how='inner')
merged_data.rename(columns={'Parameters_x': 'Parameters', 'Parameters_y': 'Repurchase_Parameter'}, inplace=True)

# Compute top 5 parameters by correlation
correlations = (
    merged_data.groupby('Parameters')
    .apply(lambda group: group['Value'].corr(group['Repurchase']))
    .dropna()  # Remove NaN correlations
    .sort_values(ascending=False)
    .head(5)
)
top_5_selected_params = correlations.index.tolist()

# Filter data for top 5 parameters
filtered_data = merged_data[merged_data['Parameters'].isin(top_5_selected_params)]

# Prepare features and target
data_pivoted = filtered_data.pivot_table(index=['Company_name', 'Year'], 
                                         columns='Parameters', 
                                         values='Value')
target = filtered_data.drop_duplicates(subset=['Company_name', 'Year']).set_index(['Company_name', 'Year'])['Repurchase']

# Align features and target
data_pivoted, target = data_pivoted.align(target, join='inner', axis=0)

# Fill missing values
data_pivoted.fillna(0, inplace=True)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data_pivoted, target, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build an enhanced neural network model
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    BatchNormalization(),  # Normalize activations
    Dropout(0.3),  # Add dropout for regularization
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')  # Output layer for regression
])

# Compile the model with Huber loss
model.compile(optimizer='adam', loss=Huber(), metrics=['mae'])

# Train the model
model.fit(X_train_scaled, y_train, epochs=500, batch_size=16, validation_split=0.2, verbose=1)

# Evaluate the model
test_loss, test_mae = model.evaluate(X_test_scaled, y_test, verbose=1)
print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")

# Load forecast data for 2024-2033
forecast_data_flattened = data_2024_2033.melt(id_vars=['Company_name', 'Parameters'], 
                                              var_name='Year', 
                                              value_name='Value')
forecast_filtered = forecast_data_flattened[forecast_data_flattened['Parameters'].isin(top_5_selected_params)]

# Prepare features for prediction
forecast_features = forecast_filtered.pivot_table(index=['Company_name', 'Year'], 
                                                   columns='Parameters', 
                                                   values='Value')

# Ensure forecast features match training features
forecast_features = forecast_features.reindex(columns=X_train.columns, fill_value=0)

# Standardize forecast features
forecast_features_scaled = scaler.transform(forecast_features)

# Make predictions
predictions = model.predict(forecast_features_scaled)

# Ensure predicted values are within a realistic range
predictions = predictions.flatten()
predictions = pd.Series(predictions, index=forecast_features.index)
predictions = np.clip(predictions, np.min(y_train), np.max(y_train))

# Prepare output
forecast_features['Predicted_Repurchase'] = predictions
forecast_features.reset_index(inplace=True)
forecast_features = forecast_features[['Company_name', 'Year', 'Predicted_Repurchase']]

# Save predictions to CSV
forecast_features.to_csv("predicted_repurchase_2024_2033.csv", index=False)
print("Predictions saved to 'predicted_2024_2033.csv'")

  .apply(lambda group: group['Value'].corr(group['Repurchase']))


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.