In [None]:
#1
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.losses import Huber

# Load datasets (replace file paths with actual locations of your datasets)
data_2019_2023 = pd.read_excel("top 5+ stock 2019-2023.xlsx")
data_2024_2033 = pd.read_excel("top 5 2024_2033.xlsx")

# Extract relevant data
repurchase_data = data_2019_2023[data_2019_2023['Parameters'] == 'repurchase of common stock']
top_5_parameters = data_2019_2023[data_2019_2023['Parameters'] != 'repurchase of common stock']

# Reshape and align datasets
# Melt datasets to align by year
top_5_data_flattened = top_5_parameters.melt(id_vars=['Company_name', 'Parameters'], 
                                             var_name='Year', 
                                             value_name='Value')
repurchase_flattened = repurchase_data.melt(id_vars=['Company_name', 'Parameters'], 
                                            var_name='Year', 
                                            value_name='Repurchase')

# Merge datasets
merged_data = pd.merge(top_5_data_flattened, repurchase_flattened, on=['Company_name', 'Year'])
merged_data.rename(columns={'Parameters_x': 'Parameters', 'Parameters_y': 'Repurchase_Parameter'}, inplace=True)

# Compute top 5 parameters by correlation
correlations = (
    merged_data.groupby('Parameters')
    .apply(lambda group: group['Value'].corr(group['Repurchase']))
    .sort_values(ascending=False)
    .head(5)
)
top_5_selected_params = correlations.index.tolist()

# Filter data for top 5 parameters
filtered_data = merged_data[merged_data['Parameters'].isin(top_5_selected_params)]

# Prepare features and target
data_pivoted = filtered_data.pivot_table(index=['Company_name', 'Year'], 
                                         columns='Parameters', 
                                         values='Value')
target = filtered_data.drop_duplicates(subset=['Company_name', 'Year']).set_index(['Company_name', 'Year'])['Repurchase']

# Align features and target
data_pivoted, target = data_pivoted.align(target, join='inner', axis=0)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data_pivoted, target, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build an enhanced neural network model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    BatchNormalization(),  # Normalize activations
    Dropout(0.3),  # Add dropout for regularization
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer for regression
])

# Compile the model with Huber loss
model.compile(optimizer='adam', loss=Huber(), metrics=['mae'])

# Train the model
model.fit(X_train_scaled, y_train, epochs=300, batch_size=16, validation_split=0.2, verbose=1)

# Evaluate the model
test_loss, test_mae = model.evaluate(X_test_scaled, y_test, verbose=1)
print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")

# Load forecast data for 2024-2033
forecast_data_flattened = data_2024_2033.melt(id_vars=['Company_name', 'Parameters'], 
                                              var_name='Year', 
                                              value_name='Value')
forecast_filtered = forecast_data_flattened[forecast_data_flattened['Parameters'].isin(top_5_selected_params)]

# Prepare features for prediction
forecast_features = forecast_filtered.pivot_table(index=['Company_name', 'Year'], 
                                                   columns='Parameters', 
                                                   values='Value')

# Standardize forecast features
forecast_features_scaled = scaler.transform(forecast_features)

# Make predictions
predictions = model.predict(forecast_features_scaled)

# Prepare output
forecast_features['Predicted_Repurchase'] = predictions
forecast_features.reset_index(inplace=True)
forecast_features = forecast_features[['Company_name', 'Year', 'Predicted_Repurchase']]

# Save predictions to CSV
forecast_features.to_csv("predicted_repurchase_2024_2033.csv", index=False)
print("Predictions saved to 'predicted_repurchase_2024_2033.csv'")


In [None]:
#2
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.losses import Huber

# Load datasets (Replace paths with actual file locations)
data_2019_2023 = pd.read_excel("top 5+ stock 2019-2023.xlsx")
data_2024_2033 = pd.read_excel("top 5 2024_2033.xlsx")

# Extract repurchase data (Target Variable)
repurchase_data = data_2019_2023[data_2019_2023['Parameters'] == 'repurchase of common stock']

# Extract financial parameters (Features)
top_5_parameters = data_2019_2023[data_2019_2023['Parameters'] != 'repurchase of common stock']

# Reshape datasets for merging
repurchase_flattened = repurchase_data.melt(id_vars=['Company_name', 'Parameters'], 
                                            var_name='Year', 
                                            value_name='Repurchase')

top_5_data_flattened = top_5_parameters.melt(id_vars=['Company_name', 'Parameters'], 
                                             var_name='Year', 
                                             value_name='Value')

# Merge features with repurchase data
merged_data = pd.merge(top_5_data_flattened, repurchase_flattened, on=['Company_name', 'Year'])

# Rename columns for clarity
merged_data.rename(columns={'Parameters_x': 'Parameters', 'Parameters_y': 'Repurchase_Parameter'}, inplace=True)

# Compute top 5 parameters most correlated with repurchase
correlations = (
    merged_data.groupby('Parameters')
    .apply(lambda group: group['Value'].corr(group['Repurchase']))
    .sort_values(ascending=False)
    .head(5)  # Select top 5 parameters
)

# Get top 5 selected parameters
top_5_selected_params = correlations.index.tolist()

# Filter dataset using selected parameters
filtered_data = merged_data[merged_data['Parameters'].isin(top_5_selected_params)]

# Pivot data to match ML input format
data_pivoted = filtered_data.pivot_table(index=['Company_name', 'Year'], 
                                         columns='Parameters', 
                                         values='Value')

# Extract target (Repurchase values)
target = filtered_data.drop_duplicates(subset=['Company_name', 'Year']).set_index(['Company_name', 'Year'])['Repurchase']

# Align features and target properly
data_pivoted, target = data_pivoted.align(target, join='inner', axis=0)

# Handle missing values (Forward Fill + Mean Imputation)
data_pivoted = data_pivoted.fillna(method='ffill').fillna(data_pivoted.mean())

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data_pivoted, target, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build the neural network model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer for regression
])

# Compile model with Huber loss (handles outliers better)
model.compile(optimizer='adam', loss=Huber(), metrics=['mae'])

# Train the model
model.fit(X_train_scaled, y_train, epochs=300, batch_size=16, validation_split=0.2, verbose=1)

# Evaluate model on test data
test_loss, test_mae = model.evaluate(X_test_scaled, y_test, verbose=1)
print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")

# --------------------------------
# STEP 2: Forecasting for 2024-2033
# --------------------------------

# Prepare forecast dataset
forecast_flattened = data_2024_2033.melt(id_vars=['Company_name', 'Parameters'], 
                                         var_name='Year', 
                                         value_name='Value')

# Filter forecast data using top 5 selected parameters
forecast_filtered = forecast_flattened[forecast_flattened['Parameters'].isin(top_5_selected_params)]

# Pivot forecast data to match model input format
forecast_features = forecast_filtered.pivot_table(index=['Company_name', 'Year'], 
                                                  columns='Parameters', 
                                                  values='Value')

# Handle missing values in forecast dataset
forecast_features = forecast_features.fillna(method='ffill').fillna(forecast_features.mean())

# Standardize forecast data using trained scaler
forecast_features_scaled = scaler.transform(forecast_features)

# Make predictions for 2024-2033
predictions = model.predict(forecast_features_scaled)

# Prepare output dataframe
forecast_features['Predicted_Repurchase'] = predictions
forecast_features.reset_index(inplace=True)
forecast_results = forecast_features[['Company_name', 'Year', 'Predicted_Repurchase']]

# Save corrected predictions to CSV
forecast_results.to_csv("corrected_predicted_repurchase_2024_2033.csv", index=False)
print("✅ Predictions saved to 'corrected_predicted_repurchase_2024_2033.csv'")


  .apply(lambda group: group['Value'].corr(group['Repurchase']))
  data_pivoted = data_pivoted.fillna(method='ffill').fillna(data_pivoted.mean())
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 1187.3820 - mae: 1187.8820 - val_loss: 29.5960 - val_mae: 29.9787
Epoch 2/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 191ms/step - loss: 1187.1602 - mae: 1187.6602 - val_loss: 29.5927 - val_mae: 29.9792
Epoch 3/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 448ms/step - loss: 1187.3175 - mae: 1187.8175 - val_loss: 29.5910 - val_mae: 29.9811
Epoch 4/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 239ms/step - loss: 1187.6307 - mae: 1188.1307 - val_loss: 29.5909 - val_mae: 29.9831
Epoch 5/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 193ms/step - loss: 1187.1835 - mae: 1187.6835 - val_loss: 29.5912 - val_mae: 29.9855
Epoch 6/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 189ms/step - loss: 1186.9894 - mae: 1187.4894 - val_loss: 29.5907 - val_mae: 29.9876
Epoch 7/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━

  forecast_features = forecast_features.fillna(method='ffill').fillna(forecast_features.mean())


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 183ms/step
✅ Predictions saved to 'corrected_predicted_repurchase_2024_2033.csv'


In [None]:
#3
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.losses import Huber

# Load datasets (Replace paths with actual file locations)
data_2019_2023 = pd.read_excel("top 5+ stock 2019-2023.xlsx")
data_2024_2033 = pd.read_excel("top 5 2024_2033.xlsx")

# Extract repurchase data (Target Variable)
repurchase_data = data_2019_2023[data_2019_2023['Parameters'] == 'repurchase of common stock']

# Extract financial parameters (Features)
top_5_parameters = data_2019_2023[data_2019_2023['Parameters'] != 'repurchase of common stock']

# Reshape datasets for merging
repurchase_flattened = repurchase_data.melt(id_vars=['Company_name', 'Parameters'], 
                                            var_name='Year', 
                                            value_name='Repurchase')

top_5_data_flattened = top_5_parameters.melt(id_vars=['Company_name', 'Parameters'], 
                                             var_name='Year', 
                                             value_name='Value')

# Merge features with repurchase data
merged_data = pd.merge(top_5_data_flattened, repurchase_flattened, on=['Company_name', 'Year'])

# Rename columns for clarity
merged_data.rename(columns={'Parameters_x': 'Parameters', 'Parameters_y': 'Repurchase_Parameter'}, inplace=True)

# Compute top 5 parameters most correlated with repurchase
correlations = (
    merged_data.groupby('Parameters')
    .apply(lambda group: group['Value'].corr(group['Repurchase']))
    .sort_values(ascending=False)
    .head(5)  # Select top 5 parameters
)

# Get top 5 selected parameters
top_5_selected_params = correlations.index.tolist()

# Filter dataset using selected parameters
filtered_data = merged_data[merged_data['Parameters'].isin(top_5_selected_params)]

# Pivot data to match ML input format
data_pivoted = filtered_data.pivot_table(index=['Company_name', 'Year'], 
                                         columns='Parameters', 
                                         values='Value')

# Extract target (Repurchase values)
target = filtered_data.drop_duplicates(subset=['Company_name', 'Year']).set_index(['Company_name', 'Year'])['Repurchase']

# Align features and target properly
data_pivoted, target = data_pivoted.align(target, join='inner', axis=0)

# Handle missing values (Forward Fill + Mean Imputation)
data_pivoted = data_pivoted.fillna(method='ffill').fillna(data_pivoted.mean())

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data_pivoted, target, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build the neural network model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer for regression
])

# Compile model with Huber loss (handles outliers better)
model.compile(optimizer='adam', loss=Huber(), metrics=['mae'])

# Train the model
model.fit(X_train_scaled, y_train, epochs=300, batch_size=16, validation_split=0.2, verbose=1)

# Evaluate model on test data
test_loss, test_mae = model.evaluate(X_test_scaled, y_test, verbose=1)
print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")

# --------------------------------
# STEP 2: Forecasting for 2024-2033
# --------------------------------

# Prepare forecast dataset
forecast_flattened = data_2024_2033.melt(id_vars=['Company_name', 'Parameters'], 
                                         var_name='Year', 
                                         value_name='Value')

# Filter forecast data using top 5 selected parameters
forecast_filtered = forecast_flattened[forecast_flattened['Parameters'].isin(top_5_selected_params)]

# Pivot forecast data to match model input format
forecast_features = forecast_filtered.pivot_table(index=['Company_name', 'Year'], 
                                                  columns='Parameters', 
                                                  values='Value')

# Handle missing values in forecast dataset
forecast_features = forecast_features.fillna(method='ffill').fillna(forecast_features.mean())

# Standardize forecast data using trained scaler
forecast_features_scaled = scaler.transform(forecast_features)

# Make predictions for 2024-2033
predictions = model.predict(forecast_features_scaled)

# Prepare output dataframe
# Convert predictions to integers (rounding)
forecast_features['Predicted_Repurchase'] = np.round(predictions).astype(int)

# Save the corrected integer predictions
forecast_features.reset_index(inplace=True)
forecast_results = forecast_features[['Company_name', 'Year', 'Predicted_Repurchase']]
forecast_results.to_csv("final_corrected_predicted_repurchase_2024_2033.csv", index=False)

print("✅ Predictions saved as integers in 'final_corrected_predicted_repurchase_2024_2033.csv'")

  .apply(lambda group: group['Value'].corr(group['Repurchase']))
  data_pivoted = data_pivoted.fillna(method='ffill').fillna(data_pivoted.mean())
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - loss: 1187.2588 - mae: 1187.7588 - val_loss: 29.6689 - val_mae: 30.0520
Epoch 2/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 227ms/step - loss: 1186.8270 - mae: 1187.3270 - val_loss: 29.6636 - val_mae: 30.0450
Epoch 3/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 213ms/step - loss: 1186.3450 - mae: 1186.8450 - val_loss: 29.6561 - val_mae: 30.0348
Epoch 4/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 502ms/step - loss: 1186.6486 - mae: 1187.1486 - val_loss: 29.6504 - val_mae: 30.0269
Epoch 5/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 202ms/step - loss: 1186.3241 - mae: 1186.8241 - val_loss: 29.6450 - val_mae: 30.0207
Epoch 6/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 228ms/step - loss: 1186.2577 - mae: 1186.7577 - val_loss: 29.6376 - val_mae: 30.0153
Epoch 7/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━

  forecast_features = forecast_features.fillna(method='ffill').fillna(forecast_features.mean())


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 196ms/step
✅ Predictions saved as integers in 'final_corrected_predicted_repurchase_2024_2033.csv'
