In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.losses import Huber
from sklearn.linear_model import LinearRegression

# Load datasets
file_2019_2023 = "top 5+ stock 2019-2023.xlsx"
file_2024_2033 = "top 5 2024_2033.xlsx"

data_2019_2023 = pd.read_excel(file_2019_2023)
data_2024_2033 = pd.read_excel(file_2024_2033)

# Ensure 'Year' column is treated as string for consistency
def format_year_columns(df):
    df.columns = df.columns.map(str)
    return df

data_2019_2023 = format_year_columns(data_2019_2023)
data_2024_2033 = format_year_columns(data_2024_2033)

# Extract repurchase data (Target Variable)
repurchase_data = data_2019_2023[data_2019_2023['Parameters'] == 'repurchase of common stock']
financial_parameters = data_2019_2023[data_2019_2023['Parameters'] != 'repurchase of common stock']

# Reshape datasets for merging
repurchase_melted = repurchase_data.melt(id_vars=['Company_name', 'Parameters'], 
                                         var_name='Year', 
                                         value_name='Repurchase')
financial_melted = financial_parameters.melt(id_vars=['Company_name', 'Parameters'], 
                                             var_name='Year', 
                                             value_name='Value')

# Ensure Year is properly formatted
repurchase_melted['Year'] = repurchase_melted['Year'].astype(str)
financial_melted['Year'] = financial_melted['Year'].astype(str)

# Merge financial features with repurchase data
merged_data = pd.merge(financial_melted, repurchase_melted, on=['Company_name', 'Year'])

# Ensure column consistency after merging
merged_data.rename(columns={
    'Parameters_x': 'Parameters',
    'Parameters_y': 'Repurchase_Parameter'
}, inplace=True, errors='ignore')

# Compute top 5 parameters most correlated with repurchase
correlations = merged_data.groupby('Parameters')[['Value', 'Repurchase']].corr().unstack().iloc[:, 1]
correlations = correlations.dropna().sort_values(ascending=False).head(5)
top_5_selected_params = correlations.index.tolist()

# Extract and process forecasted financial parameters
df_forecasted = data_2024_2033[data_2024_2033['Parameters'].isin(top_5_selected_params)]
forecast_melted = df_forecasted.melt(id_vars=['Company_name', 'Parameters'], 
                                     var_name='Year', 
                                     value_name='Value')

# Ensure Year is properly formatted in forecasted data
forecast_melted['Year'] = forecast_melted['Year'].astype(str)

# Compute regression-based correction factor
correction_factors = {}
for param in top_5_selected_params:
    historical_subset = merged_data[merged_data['Parameters'] == param]
    if len(historical_subset) > 5:  # Ensure enough data points
        reg = LinearRegression()
        X = historical_subset[['Value']].values.reshape(-1, 1)
        y = historical_subset['Repurchase'].values.reshape(-1, 1)
        reg.fit(X, y)
        correction_factors[param] = reg.coef_[0][0]  # Regression coefficient
    else:
        correction_factors[param] = 1  # Default factor

# Apply regression-based correction factor to forecasted repurchase values
forecast_melted['Corrected_Repurchase'] = forecast_melted.apply(
    lambda row: row['Value'] * correction_factors.get(row['Parameters'], 1), axis=1)

# Aggregate corrected predictions
corrected_repurchase = forecast_melted.groupby(['Company_name', 'Year'])['Corrected_Repurchase'].sum().reset_index()
corrected_repurchase['Corrected_Repurchase'] = corrected_repurchase['Corrected_Repurchase'].round().astype(int)

# Save corrected predictions
corrected_repurchase.to_csv("corrected_predicted_repurchase_2024_2033.csv", index=False)

print("✅ Corrected repurchase predictions using regression-based adjustment saved successfully!")


✅ Corrected repurchase predictions using regression-based adjustment saved successfully!
