In [None]:
import yfinance as yf

# Define the stock symbol (e.g., 'AAPL' for Apple)
ticker = 'AAPL'

# Fetch the historical data (for example, from 2020-01-01 to 2024-01-01)
data = yf.download(ticker, start='2024-01-01', end='2025-01-01')

# Save the data to a CSV file
data.to_csv('raw_data.csv')

print("CSV file has been saved.")


In [None]:
import pandas as pd

# Read the CSV file
file_path = 'raw_data.csv'  # Replace with your file path

# Read the CSV file (header is on the first row)
df = pd.read_csv(file_path)

# Extract only the 'Price' (Date) and 'Close' (Closing price) columns
df_extracted = df[['Price', 'Close']]

# Save the extracted data to a new CSV file
output_file = 'historical_data.csv'  # Name for the output file
df_extracted.to_csv(output_file, index=False)

print(f"Extracted data saved to {output_file}")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, r2_score

# Load the historical and predicted data
historical_data = pd.read_csv('historical_data.csv', parse_dates=['Time'])
predicted_data = pd.read_csv('predicted_data.csv', parse_dates=['Time'])

# Merge the data on 'Time' column
merged_data = pd.merge(historical_data, predicted_data, on='Time', how='inner')

# Calculate the percentage error
merged_data['Percentage Error'] = abs(
    (merged_data['Closing price'] - merged_data['Predicted values']) / merged_data['Closing price']
) * 100

# Plot the actual and predicted data
plt.figure(figsize=(10, 6))
plt.plot(merged_data['Time'], merged_data['Closing price'], label='Actual Closing Price', color='blue')
plt.plot(merged_data['Time'], merged_data['Predicted values'], label='Predicted Values', color='red', linestyle='--')
plt.xlabel('Time')
plt.ylabel('Price')
plt.title('Actual vs Predicted Closing Prices')
plt.legend()
plt.grid(True)
plt.show()

# Plot the percentage error
plt.figure(figsize=(10, 6))
plt.plot(merged_data['Time'], merged_data['Percentage Error'], label='Percentage Error', color='green')
plt.xlabel('Time')
plt.ylabel('Percentage Error (%)')
plt.title('Percentage Error Over Time')
plt.legend()
plt.grid(True)
plt.show()

# Calculate performance metrics
mae = mean_absolute_error(merged_data['Closing price'], merged_data['Predicted values'])
r2 = r2_score(merged_data['Closing price'], merged_data['Predicted values'])
mean_percentage_error = merged_data['Percentage Error'].mean()

# Print the performance metrics
print(f'Mean Absolute Error: {mae}')
print(f'R-squared: {r2}')
print(f'Mean Percentage Error: {mean_percentage_error:.2f}%')


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, r2_score

# Load the data from the CSV files
historical_data = pd.read_csv("historical_data.csv")
predicted_data = pd.read_csv("predicted_data.csv")

# Merge the two datasets on the 'Time' column
merged_data = pd.merge(historical_data, predicted_data, on="Time", how="inner")

# Extract the actual and predicted prices
actual_prices = merged_data['Closing price'].values
predicted_prices = merged_data['Predicted values'].values

# Define a threshold for filtering out extreme points
threshold = 720

# Filter the data: Remove the extreme points where predicted values are less than the threshold
filtered_actual = []
filtered_predicted = []

for actual, predicted in zip(actual_prices, predicted_prices):
    if abs(predicted) > threshold:
        filtered_actual.append(actual)
        filtered_predicted.append(predicted)

# Convert filtered lists back to numpy arrays for easier computation
filtered_actual = np.array(filtered_actual)
filtered_predicted = np.array(filtered_predicted)

# Calculate percentage error
percentage_error = abs((filtered_actual - filtered_predicted) / filtered_actual) * 100

# Plot the actual vs predicted data
plt.figure(figsize=(10, 6))
plt.plot(filtered_actual, label="Actual Prices", color='blue', marker='o')
plt.plot(filtered_predicted, label="Predicted Prices", color='red', marker='x')
plt.xlabel('Time')
plt.ylabel('Price')
plt.title('Actual vs Predicted Stock Prices')
plt.legend()
plt.xticks(rotation=45)
plt.show()

# Plot the percentage error
plt.figure(figsize=(10, 6))
plt.plot(percentage_error, label="Percentage Error", color='green', marker='.')
plt.xlabel('Index')
plt.ylabel('Percentage Error (%)')
plt.title('Percentage Error for Stock Prices')
plt.legend()
plt.grid(True)
plt.show()

# Calculate MAE, R-squared, and Mean Percentage Error
mae = mean_absolute_error(filtered_actual, filtered_predicted)
r2 = r2_score(filtered_actual, filtered_predicted)
mean_percentage_error = np.mean(percentage_error)

# Print the error metrics
print(f'Mean Absolute Error (MAE): {mae}')
print(f'R-squared: {r2}')
print(f'Mean Percentage Error: {mean_percentage_error:.2f}%')
