<a href="https://colab.research.google.com/github/jphilli1/Repo-Github/blob/main/Work/CFA%20L2%20Project/Financial_Forecasting_ML_L2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import subprocess
import os

# Set working directory to script's location
script_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(script_dir)
print(f"Current working directory set to: {os.getcwd()}")
print(f"Files in current directory: {os.listdir()}")
cmd_path = os.path.join(os.path.dirname(__file__), "setup_env.cmd")
subprocess.call(cmd_path, shell=True)



# --- Imports after environment setup ---
import pandas as pd
import warnings
import sys
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from sklearn import linear_model
from sklearn.ensemble import RandomForestRegressor

# Suppress warnings
warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_columns', 250)


# Load the financial data
try:
    df = pd.read_csv('financial_data.csv')
    print("financial_data.csv loaded successfully.")
except FileNotFoundError:
    print("Error: 'financial_data.csv' not found. Please ensure it's in the same directory as the script.")
    sys.exit(1)

# --- Filter DataFrame for Apple data (existing task) ---
apple_df = df[df['Ticker'] == 'AAPL'].copy() # Use .copy() to avoid SettingWithCopyWarning

REVENUE_COLUMN = 'Revenue'
DATE_COLUMN = 'Report Date'

if REVENUE_COLUMN in apple_df.columns and DATE_COLUMN in apple_df.columns:
    if not apple_df.empty:
        apple_df[REVENUE_COLUMN] = pd.to_numeric(apple_df[REVENUE_COLUMN], errors='coerce')
        apple_df.dropna(subset=[REVENUE_COLUMN], inplace=True)

        if not apple_df.empty:
            max_revenue_row = apple_df.loc[apple_df[REVENUE_COLUMN].idxmax()]
            max_quarterly_revenue = max_revenue_row[REVENUE_COLUMN]
            max_revenue_date = max_revenue_row[DATE_COLUMN]
            print(f"Maximum Quarterly Revenue for Apple: ${max_quarterly_revenue:,.2f}")
            print(f"Corresponding Date: {max_revenue_date}")
        else:
            print("No valid Apple (AAPL) revenue data found after cleaning.")
    else:
        print("No Apple (AAPL) data found in the DataFrame after filtering.")
else:
    print(f"Required columns ('{REVENUE_COLUMN}' or '{DATE_COLUMN}') not found in the DataFrame for Apple. Available columns are: {apple_df.columns.tolist()}")

# Drop the "% Change in Quarterly Revenue (Target Output)" column from apple_df
# Check if the column exists before dropping to prevent errors
target_revenue_col = '% Change in Quarterly Revenue (Target Output)'
if target_revenue_col in apple_df.columns:
    apple_df = apple_df.drop(columns = target_revenue_col)
    print(f"'{target_revenue_col}' dropped from Apple DataFrame.")
else:
    print(f"'{target_revenue_col}' not found in Apple DataFrame, skipping drop.")
'''
PRACTICE OPPORTUNITY:

Write a Python code that performs the following tasks:
Filter out the Pandas DataFrame df to only contain rows pertaining to Nvidia corporation (Ticker Symbol: NVDA)
Display the histogram for the output column "% Change in Quarterly EPS (Target Output)" for Nvidia corporation using 100 bins
'''
# --- Filter DataFrame for Nvidia data (NEW TASK) ---
nvidia_df = df[df['Ticker'] == 'NVDA'].copy() # Use .copy() for the new DataFrame

# --- Display histogram for Nvidia's "% Change in Quarterly EPS (Target Output)" ---
eps_target_column = '% Change in Quarterly EPS (Target Output)'

if eps_target_column in nvidia_df.columns:
    if not nvidia_df.empty:
        # Convert the EPS column to numeric, handling potential non-numeric values
        nvidia_df[eps_target_column] = pd.to_numeric(nvidia_df[eps_target_column], errors='coerce')
        nvidia_df.dropna(subset=[eps_target_column], inplace=True)

        if not nvidia_df.empty:
            print(f"\nDisplaying histogram for Nvidia's '{eps_target_column}':")
            # Multiply by 100 to display as percentage if not already in percentage form
            fig = px.histogram(nvidia_df[eps_target_column] * 100, nbins=100,
                               title=f'Histogram of {eps_target_column} for Nvidia (NVDA)',
                               labels={'value': 'EPS Change (%)', 'count': 'Frequency'})
            fig.update_layout({'plot_bgcolor': "white"})
            fig.show() # Display the plot
        else:
            print(f"No valid data found in '{eps_target_column}' for Nvidia after cleaning.")
    else:
        print("No Nvidia (NVDA) data found in the DataFrame after filtering.")
else:
    print(f"'{eps_target_column}' not found in Nvidia DataFrame. Available columns are: {nvidia_df.columns.tolist()}")

# --- Remaining original correlation analysis (for Apple_df) ---
print("\nProceeding with correlation analysis for Apple data:")
target_eps_col = '% Change in Quarterly EPS (Target Output)'
if target_eps_col in apple_df.columns:
    correlation_matrix = apple_df.corr(numeric_only=True) # Ensure only numeric columns are used for correlation

    print("\nCorrelation matrix for Apple (Full):")
    print(correlation_matrix)

    print(f"\nCorrelation coefficients with '{target_eps_col}' for Apple:")
    print(correlation_matrix[[target_eps_col]])

    # Let's sort the correlation values in a descending order
    # Ensure the target column itself is not included in 'top_positive_corr' if it's perfectly 1
    # Drop rows where target column is correlated with itself (value is 1.0)
    sorted_corr = correlation_matrix[[target_eps_col]].sort_values(
        target_eps_col, ascending=False
    ).drop(labels=[target_eps_col], errors='ignore')


    # Let's display the top 10 positively correlated features with the output (target column)
    top_positive_corr = sorted_corr.head(10)
    print("\nTop 10 Positively Correlated Features with EPS Change (Apple):")
    print(top_positive_corr)

    # Let's obtain the top 10 negatively correlated features with the output (target column)
    top_negative_corr = correlation_matrix[[target_eps_col]].sort_values(target_eps_col).head(10)
    # Remove the target column itself if it's in the negative list due to 0/NaN values etc.
    top_negative_corr = top_negative_corr.drop(labels=[target_eps_col], errors='ignore')
    print("\nTop 10 Negatively Correlated Features with EPS Change (Apple):")
    print(top_negative_corr)


    # Let's display the top positively and negatively correlated features with the output
    # Ensure 'index' is accessed correctly from the reset_index() call
    selected_columns = []
    if not top_negative_corr.empty:
        selected_columns.extend(top_negative_corr.index.tolist()) # Use .index to get column names directly
    if not top_positive_corr.empty:
        selected_columns.extend(top_positive_corr.index.tolist())

    # Add the target EPS column to the selected columns if it's not already there for viewing
    if target_eps_col not in selected_columns:
        selected_columns.append(target_eps_col)

    # Filter apple_df to only include the selected columns
    filtered_apple_df_for_display = apple_df[selected_columns]
    print("\nApple DataFrame with Top Positively and Negatively Correlated Features:")
    print(filtered_apple_df_for_display.head())


    # Let's re-calculate the correlation matrix to only calculate the top 10 positively and negatively correlated features
    # Filter the DataFrame to include only these specific columns for the final heatmap
    filtered_corr_df = apple_df[selected_columns]
    correlation_matrix_filtered = filtered_corr_df.corr(numeric_only=True)
    print("\nRecalculated Correlation Matrix for Top Features (Apple):")
    print(correlation_matrix_filtered)

    # Let's use Seaborn to display a heatmap for the correlation matrix
    f, ax = plt.subplots(figsize = (15, 9))
    sns.heatmap(correlation_matrix_filtered, annot = True, cmap='coolwarm', fmt=".2f")
    plt.title('Heatmap of Top Correlated Features for Apple EPS Change')
    plt.show() # Display the plot
else:
    print(f"Target column '{target_eps_col}' not found in Apple DataFrame for correlation analysis.")
apple_df['Publish Date'] = pd.to_datetime(apple_df['Publish Date'])
# Sorting the DataFrame in an ascending order based on the "Publish Date" column
apple_df.sort_values(by = 'Publish Date', ascending = True, inplace = True)
apple_df
# Let's drop the following columns from the Pandas DataFrame
cols_to_drop = ['Ticker','Sector', 'Industry','Company Name', 'Report Date', 'Currency',
                'Fiscal Year', 'Publish Date', 'Restated Date']

apple_df = apple_df.drop(columns = cols_to_drop)
apple_df
# Let's display the original "Fiscal Period" column
print(apple_df['Fiscal Period'])

# Let's display the one-hot encoded version of the "Fiscal Period" column
fiscal_encoded = pd.get_dummies(apple_df['Fiscal Period'])
fiscal_encoded
# Drop the 'Fiscal Period' column from the Pandas DataFrame
apple_df = apple_df.drop('Fiscal Period', axis = 1)

# Concatenate the original DataFrame and the one-hot encoded "Fiscal Period" column
apple_df = pd.concat([apple_df, fiscal_encoded], axis = 1)
apple_df

'''
PRACTICE OPPORTUNITY:

Write a Python code that performs the following tasks:
Read the "financial_data.csv" file using Pandas and place the result in a Pandas DataFrame titled "df"
Filter "df" Pandas DataFrame to only include General Electric data (Ticker Symbol: GE), and place the results in a Pandas DataFrame titled "general_electric_df"
Perform one-hot encoding to the "Fiscal Period" column in "general_electric_df" DataFrame using Pandas pd.get_dummies() function
Drop the "Fiscal Period" column from "general_electric_df" DataFrame and concatenate the one-hot encoded data
'''
# --- Filter DataFrame for General Electric data (NEW TASK) ---
general_electric_df = df[df['Ticker'] == 'GE'].copy() # Filter for GE and create a copy

print("\nGeneral Electric DataFrame created.")
# print(general_electric_df.head()) # Uncomment to verify

# --- Perform one-hot encoding on 'Fiscal Period' column ---
FISCAL_PERIOD_COLUMN = 'Fiscal Period'

if FISCAL_PERIOD_COLUMN in general_electric_df.columns:
    # Create one-hot encoded columns
    one_hot_encoded_periods = pd.get_dummies(general_electric_df[FISCAL_PERIOD_COLUMN], prefix=FISCAL_PERIOD_COLUMN)
    print(f"One-hot encoding performed on '{FISCAL_PERIOD_COLUMN}'.")

    # Drop the original 'Fiscal Period' column and concatenate the one-hot encoded data
    general_electric_df = pd.concat(
        [general_electric_df.drop(columns=[FISCAL_PERIOD_COLUMN]), one_hot_encoded_periods],
        axis=1
    )
    print(f"Original '{FISCAL_PERIOD_COLUMN}' column dropped and one-hot encoded columns concatenated.")
    # print(general_electric_df.head()) # Uncomment to verify the new DataFrame structure
else:
    print(f"'{FISCAL_PERIOD_COLUMN}' column not found in General Electric DataFrame. Skipping one-hot encoding.")
'''
PRACTICE OPPORTUNITY:

Using Scikit-Learn library, split the data into 30% for testing and 70% for training
Perform a sanity check by obtaining the shape of the training and testing datasets
Enable shuffling and rerun the code. Comment on your results.
'''
#%%
# --- Prepare data for General Electric ---
# Ensure general_electric_df exists and has been processed (one-hot encoded) from previous steps
# If general_electric_df is not available from previous runs, you would need to re-run the prior cell.
# Assuming 'general_electric_df' is already created and one-hot encoded from the previous step.

TARGET_OUTPUT_COLUMN = '% Change in Quarterly EPS (Target Output)'

# Drop the target output column to create the input features (X_ge)
# Create a list of columns to drop for X_ge
columns_to_drop_X = [TARGET_OUTPUT_COLUMN, 'Company Name', 'Sector', 'Industry', 'Ticker', 'Report Date', 'Currency', 'Publish Date', 'Restated Date']
# Filter out columns that do not exist in the DataFrame
existing_columns_to_drop_X = [col for col in columns_to_drop_X if col in general_electric_df.columns]


# Separate features (X_ge) and target (y_ge)
X_ge = general_electric_df.drop(columns=existing_columns_to_drop_X, axis=1).copy()
y_ge = general_electric_df[TARGET_OUTPUT_COLUMN].copy()

# Convert all columns in X_ge to numeric, coercing errors to NaN
# This is crucial for machine learning models and train_test_split if non-numeric data remains
for col in X_ge.columns:
    X_ge[col] = pd.to_numeric(X_ge[col], errors='coerce')

# Handle NaNs in X_ge (e.g., fill with 0 or mean, or drop rows)
# For simplicity, we'll fill NaN with 0 for now. Consider more sophisticated imputation methods for real projects.
X_ge.fillna(0, inplace=True)

# Ensure y_ge is also numeric and handle potential NaNs
y_ge = pd.to_numeric(y_ge, errors='coerce')
y_ge.fillna(0, inplace=True) # Fill NaNs in target with 0 or a suitable value


print("\n--- General Electric Data Preparation ---")
print("Input features (X_ge) head:")
print(X_ge.head())
print("\nOutput target (y_ge) head:")
print(y_ge.head())


# --- Perform data train/test split with shuffling enabled ---
# Test size 30% (0.3), training size 70%
# shuffle = True: Data points are randomly reordered before splitting.
# This helps ensure that both training and testing sets are representative of the overall dataset
# and prevents any ordering bias (e.g., if data is sorted by date or value).
X_train_ge, X_test_ge, y_train_ge, y_test_ge = train_test_split(X_ge, y_ge, test_size=0.3, shuffle=True, random_state=42) # random_state for reproducibility

# --- Sanity check: Obtain the shape of the training and testing datasets ---
print("\n--- Sanity Check for GE Data Split ---")
print(f"Shape of X_train_ge: {X_train_ge.shape}")
print(f"Shape of X_test_ge: {X_test_ge.shape}")
print(f"Shape of y_train_ge: {y_train_ge.shape}")
print(f"Shape of y_test_ge: {y_test_ge.shape}")

print("\n--- Display Training Set (Shuffled) ---")
print("Note: Data points are randomly ordered in the training set due to 'shuffle = True'.")
print(X_train_ge.head())
'''
PRACTICE OPPORTUNITY:

Set the fit_intercept attribute to False, retrain the multiple linear regression model and evaluate its performance
Display the estimated coefficients and Y-intercept. What do you conclude?
'''

print("--- Training Linear Regression Model without Intercept ---")

linear_regression_model_no_intercept = linear_model.LinearRegression(fit_intercept=False)
linear_regression_model_no_intercept.fit(X_train_ge, y_train_ge) # Corrected to use GE data variables

y_predict_no_intercept = linear_regression_model_no_intercept.predict(X_test_ge) # Corrected

RMSE_no_intercept = float(np.sqrt(mean_squared_error(y_test_ge, y_predict_no_intercept))) # Corrected
MSE_no_intercept = mean_squared_error(y_test_ge, y_predict_no_intercept) # Corrected
MAE_no_intercept = mean_absolute_error(y_test_ge, y_predict_no_intercept) # Corrected

print('\nPerformance Metrics (fit_intercept=False):')
print('Root Mean Squared Error (RMSE) =', RMSE_no_intercept)
print('Mean Squared Error (MSE) =', MSE_no_intercept)
print('Mean Absolute Error (MAE) =', MAE_no_intercept)

plt.figure(figsize = (13, 8))
plt.plot(y_predict_no_intercept, y_test_ge, 'o', color = 'b', markersize = 10, label='Predictions vs Actuals (No Intercept)') # Corrected
plt.xlabel('Model Predictions')
plt.ylabel('Actual Values (Ground Truth)')
plt.title('Model Predictions Vs. Actual Values (Ground Truth) - No Intercept')
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend()
plt.show()

print('\n--- Model Parameters (fit_intercept=False) ---')
print('Trained Model Y-intercept:', linear_regression_model_no_intercept.intercept_)
print('Estimated Coefficients:', linear_regression_model_no_intercept.coef_)


# --- TRAIN AND EVALUATE A RANDOM FOREST ALGORITHM (FIXED) ---
print("\n--- Training Random Forest Regression Model (Initial) ---")

random_forest_model = RandomForestRegressor(n_estimators = 5, max_depth = 10, random_state=42) # Added random_state
random_forest_model.fit(X_train_ge, y_train_ge) # CORRECTED: Use GE data variables

y_predict = random_forest_model.predict(X_test_ge) # CORRECTED: Use GE data variables

RMSE = float(np.sqrt(mean_squared_error(y_test_ge, y_predict))) # CORRECTED
MSE = mean_squared_error(y_test_ge, y_predict) # CORRECTED
MAE = mean_absolute_error(y_test_ge, y_predict) # CORRECTED

print('\nPerformance Metrics (Random Forest, n_estimators=5, max_depth=10):')
print('Root Mean Squared Error (RMSE) =', RMSE)
print('Mean Squared Error (MSE) =', MSE)
print('Mean Absolute Error (MAE) =', MAE)

plt.figure(figsize = (13, 8))
plt.plot(y_predict, y_test_ge, 'o', color = 'r', markersize = 10) # CORRECTED
plt.xlabel('Model Predictions')
plt.ylabel('Actual Values (Ground Truth)')
plt.title('Random Forest Model Predictions Vs. Actual Values (Ground Truth)');
plt.grid(True, linestyle='--', alpha=0.6) # Added grid
plt.show()



'''
PRACTICE OPPORTUNITY:

Increase the maximum depth of the tree by setting max_depth = 100
Retrain the Random Forest Regression model and evaluate its performance
'''

# --- PRACTICE OPPORTUNITY: Random Forest with max_depth = 100 (FIXED) ---
print("\n--- Training Random Forest Regression Model with max_depth = 100 ---")

random_forest_model_100 = RandomForestRegressor(max_depth=100, random_state=42)
random_forest_model_100.fit(X_train_ge, y_train_ge)

y_predict_rf_100 = random_forest_model_100.predict(X_test_ge)

RMSE_rf_100 = float(np.sqrt(mean_squared_error(y_test_ge, y_predict_rf_100)))
MSE_rf_100 = mean_squared_error(y_test_ge, y_predict_rf_100)
MAE_rf_100 = mean_absolute_error(y_test_ge, y_predict_rf_100)

print('\nPerformance Metrics (Random Forest, max_depth=100):')
print('Root Mean Squared Error (RMSE) =', RMSE_rf_100)
print('Mean Squared Error (MSE) =', MSE_rf_100)
print('Mean Absolute Error (MAE) =', MAE_rf_100)

plt.figure(figsize = (13, 8))
plt.plot(y_predict_rf_100, y_test_ge, 'o', color = 'g', markersize = 10, label='Predictions vs Actuals (RF max_depth=100)')
plt.xlabel('Model Predictions')
plt.ylabel('Actual Values (Ground Truth)')
plt.title('Random Forest Model Predictions Vs. Actual Values (Ground Truth) - Max Depth 100')
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend()
plt.show()
'''
TRAIN AND EVALUATE AN ARTIFICIAL NEURAL NETWORK TO SOLVE REGRESSION PROBLEMS
PRACTICE OPPORTUNITY:

Change the architecture of the existing Artificial Neural Network model by introducing an additional dense layer with Dropout. Feel free to choose the number of neurons.
Print the model summary and list the number of trainable parameters
'''
# Data scaling using MinMaxScaler()
scaler_X = MinMaxScaler()
scaler_X.fit(X_train_ge)
X_train_scaled = scaler_X.transform(X_train_ge)
X_test_scaled = scaler_X.transform(X_test_ge)

scaler_y = MinMaxScaler()
scaler_y.fit(pd.DataFrame(y_train_ge))
y_train_scaled = scaler_y.transform(pd.DataFrame(y_train_ge))
y_test_scaled = scaler_y.transform(pd.DataFrame(y_test_ge))

# Building the original ANN model usingimport tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Normalization, Dropout
print("\n--- Original ANN Model Summary ---")
ANN_model = Sequential()
ANN_model.add(Normalization(input_shape = [X_train_ge.shape[1],], axis = None))
ANN_model.add(Dense(1024, activation = 'relu'))
ANN_model.add(Dropout(0.3))
ANN_model.add(Dense(512, activation = 'relu'))
ANN_model.add(Dropout(0.3))
ANN_model.add(Dense(256, activation = 'sigmoid'))
ANN_model.add(Dropout(0.3))
ANN_model.add(Dense(32, activation = 'sigmoid'))
ANN_model.add(Dropout(0.3))
ANN_model.add(Dense(units = 1, activation = 'linear'))
ANN_model.summary()

# Compile the model
ANN_model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 0.00001), loss = 'mean_squared_error')

# Fit the model
print("Training original ANN model for 500 epochs...")
history = ANN_model.fit(X_train_scaled, y_train_scaled, epochs = 500, verbose=0)

# Generate model predictions and scale back
y_predict_scaled = ANN_model.predict(X_test_scaled)
y_predict_original = scaler_y.inverse_transform(y_predict_scaled)
y_test_original_ann = scaler_y.inverse_transform(y_test_scaled)

# Generate regression metrics
RMSE_original = float(np.sqrt(mean_squared_error(y_test_original_ann, y_predict_original)))
MSE_original = mean_squared_error(y_test_original_ann, y_predict_original)
MAE_original = mean_absolute_error(y_test_original_ann, y_predict_original)

print('\nPerformance Metrics (Original ANN Model):')
print('Root Mean Squared Error (RMSE) =', RMSE_original)
print('Mean Squared Error (MSE) =', MSE_original)
print('Mean Absolute Error (MAE) =', MAE_original)

# Plot model predictions
plt.figure(figsize = (13, 8))
plt.plot(y_predict_original, y_test_original_ann, 'o', color = 'r', markersize = 10)
plt.xlabel('Model Predictions')
plt.ylabel('Actual Values (Ground Truth)')
plt.title('Original ANN Model Predictions Vs. Actual Values (Ground Truth)');
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()


# --- PRACTICE OPPORTUNITY: Modified ANN Architecture ---
print("\n--- Modified ANN Model Summary ---")

ANN_model_modified = Sequential()
ANN_model_modified.add(Normalization(input_shape = [X_train_ge.shape[1],], axis = None))
ANN_model_modified.add(Dense(1024, activation = 'relu'))
ANN_model_modified.add(Dropout(0.3))
ANN_model_modified.add(Dense(512, activation = 'relu'))
ANN_model_modified.add(Dropout(0.3))
# New Dense Layer with Dropout
ANN_model_modified.add(Dense(128, activation = 'relu'))
ANN_model_modified.add(Dropout(0.3))
ANN_model_modified.add(Dense(256, activation = 'sigmoid'))
ANN_model_modified.add(Dropout(0.3))
ANN_model_modified.add(Dense(32, activation = 'sigmoid'))
ANN_model_modified.add(Dropout(0.3))
ANN_model_modified.add(Dense(units = 1, activation = 'linear'))

ANN_model_modified.summary()

print("\n--- Compiling and Training Modified ANN Model ---")
ANN_model_modified.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 0.00001), loss = 'mean_squared_error')

print("Training modified model for 500 epochs...")
history_modified = ANN_model_modified.fit(X_train_scaled, y_train_scaled, epochs = 500, verbose=0)

y_predict_scaled_modified = ANN_model_modified.predict(X_test_scaled)
y_predict_modified = scaler_y.inverse_transform(y_predict_scaled_modified)
y_test_original_modified_ann = scaler_y.inverse_transform(y_test_scaled)

RMSE_modified = float(np.sqrt(mean_squared_error(y_test_original_modified_ann, y_predict_modified)))
MSE_modified = mean_squared_error(y_test_original_modified_ann, y_predict_modified)
MAE_modified = mean_absolute_error(y_test_original_modified_ann, y_predict_modified)

print('\nPerformance Metrics (Modified ANN Model):')
print('Root Mean Squared Error (RMSE) =', RMSE_modified)
print('Mean Squared Error (MSE) =', MSE_modified)
print('Mean Absolute Error (MAE) =', MAE_modified)

plt.figure(figsize = (13, 8))
plt.plot(y_predict_modified, y_test_original_modified_ann, 'o', color = 'purple', markersize = 10)
plt.xlabel('Model Predictions')
plt.ylabel('Actual Values (Ground Truth)')
plt.title('Modified ANN Model Predictions Vs. Actual Values (Ground Truth)');
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()