In [3]:
import pandas as pd
from yahoo_fin import stock_info as si
from datetime import datetime, timedelta
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Fetch historical data for the past two years up until 2024-02-09
end_date = datetime(2024, 2, 9)
start_date = end_date - timedelta(days=2*365)  # Approximately two years

# Get historical data for AAPL
data = si.get_data('AAPL', start_date=start_date, end_date=end_date)

# Reset index to have a column with dates and rename it to 'date'
data.reset_index(inplace=True)
data.rename(columns={'index': 'date'}, inplace=True)


In [4]:
# Calculate Simple Moving Average (SMA) for the last 10 days
data['SMA_90'] = data['close'].rolling(window=10).mean()

# Drop rows with NaN values
data.dropna(inplace=True)
display(data)

Unnamed: 0,date,open,high,low,close,adjclose,volume,ticker,SMA_90
9,2022-02-23,165.539993,166.149994,159.750000,160.070007,158.047089,90009200,AAPL,169.183002
10,2022-02-24,152.580002,162.850006,152.000000,162.740005,160.683319,141147500,AAPL,167.829002
11,2022-02-25,163.839996,165.119995,160.869995,164.850006,162.766663,91974200,AAPL,167.102003
12,2022-02-28,163.059998,165.419998,162.429993,165.119995,163.033218,95056600,AAPL,166.750003
13,2022-03-01,164.699997,166.600006,161.970001,163.199997,161.137512,83474400,AAPL,166.182002
...,...,...,...,...,...,...,...,...,...
497,2024-02-02,179.860001,187.330002,179.250000,185.850006,185.361740,102518000,AAPL,190.703998
498,2024-02-05,188.149994,189.250000,185.839996,187.679993,187.186920,69668800,AAPL,190.082997
499,2024-02-06,186.860001,189.309998,186.770004,189.300003,188.802673,43490800,AAPL,189.494998
500,2024-02-07,190.639999,191.050003,188.610001,189.410004,188.912399,53439000,AAPL,188.985999


In [5]:
# Split the dataset into 80% training and 20% testing
train_size = int(len(data) * 0.8)
train = data[:train_size]
test = data[train_size:]

# Predict using the SMA model
sma_window = 90
test['SMA_Prediction'] = test['close'].rolling(window=sma_window).mean().shift(1)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['SMA_Prediction'] = test['close'].rolling(window=sma_window).mean().shift(1)


In [6]:
# Drop NaN values in predictions
test.dropna(inplace=True)

# Calculate MAPE
def mean_absolute_percentage_error(y_true, y_pred):
    return (abs((y_true - y_pred) / y_true).mean()) * 100

# Calculate metrics
mae_sma = mean_absolute_error(test['close'], test['SMA_Prediction'])
mse_sma = mean_squared_error(test['close'], test['SMA_Prediction'])
rmse_sma = mse_sma ** 0.5
r2_sma = r2_score(test['close'], test['SMA_Prediction'])
mape_sma = mean_absolute_percentage_error(test['close'], test['SMA_Prediction'])

print(f"SMA Model - MAE: {mae_sma}, RMSE: {rmse_sma}, R-squared: {r2_sma}, MAPE: {mape_sma}")



SMA Model - MAE: 3.2447414633668483, RMSE: 3.8243165552050415, R-squared: -2.590441398472503, MAPE: 1.7152173092683891


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test.dropna(inplace=True)


In [9]:
import pandas as pd
from yahoo_fin import stock_info as si
from datetime import datetime, timedelta
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# List of stock symbols
stocks = ['AAPL', 'ABBV', 'ADBE', 'AMZN', 'AVGO', 'BRK-B', 'CRM', 'COST', 'CVX', 'HD',
          'JNJ', 'JPM', 'LLY', 'MA', 'META', 'MRK', 'MSFT', 'NVDA', 'PG', 'TSLA',
          'UNH', 'V', 'XOM']

# Function to calculate MAPE
def mean_absolute_percentage_error(y_true, y_pred):
    return (abs((y_true - y_pred) / y_true).mean()) * 100

# Initialize the results DataFrame
results = pd.DataFrame(columns=['Stock', 'MAE', 'RMSE', 'R²', 'MAPE'])

# Fetch data, apply SMA model, and calculate metrics for each stock
for stock in stocks:
    try:
        # Fetch historical data for the past two years up until 2024-02-09
        end_date = datetime(2024, 2, 9)
        start_date = end_date - timedelta(days=2*365)  # Approximately two years
        data = si.get_data(stock, start_date=start_date, end_date=end_date)

        # Reset index to have a column with dates and rename it to 'date'
        data.reset_index(inplace=True)
        data.rename(columns={'index': 'date'}, inplace=True)

        # Calculate Simple Moving Average (SMA) for the last 10 days
        data['SMA_10'] = data['close'].rolling(window=15).mean()

        # Drop rows with NaN values
        data.dropna(inplace=True)

        # Split the dataset into 80% training and 20% testing
        train_size = int(len(data) * 0.8)
        train = data[:train_size]
        test = data[train_size:]

        # Predict using the SMA model
        sma_window = 90
        test['SMA_Prediction'] = test['close'].rolling(window=sma_window).mean().shift(1)

        # Drop NaN values in predictions
        test.dropna(inplace=True)

        # Calculate metrics
        mae = mean_absolute_error(test['close'], test['SMA_Prediction'])
        mse = mean_squared_error(test['close'], test['SMA_Prediction'])
        rmse = mse ** 0.5
        r2 = r2_score(test['close'], test['SMA_Prediction'])
        mape = mean_absolute_percentage_error(test['close'], test['SMA_Prediction'])

        # Append the results to the DataFrame
        results = results.append({'Stock': stock, 'MAE': mae, 'RMSE': rmse, 'R²': r2, 'MAPE': mape}, ignore_index=True)
    except Exception as e:
        print(f"Error processing {stock}: {e}")

# Display the results
display(results)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['SMA_Prediction'] = test['close'].rolling(window=sma_window).mean().shift(1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test.dropna(inplace=True)
  results = results.append({'Stock': stock, 'MAE': mae, 'RMSE': rmse, 'R²': r2, 'MAPE': mape}, ignore_index=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['SMA_Prediction'] = test['close'].r

Unnamed: 0,Stock,MAE,RMSE,R²,MAPE
0,AAPL,2.703585,3.046785,-2.600779,1.435826
1,ABBV,19.355399,19.681848,-23.752679,11.343292
2,ADBE,43.106432,44.383123,-24.447113,6.905792
3,AMZN,22.4704,23.14031,-12.940879,13.45821
4,AVGO,234.96555,235.879379,-64.865956,19.132882
5,BRK-B,34.483179,34.723798,-51.878206,8.808584
6,CRM,50.0514,50.10227,-260.25037,17.457837
7,COST,96.483722,96.642589,-115.626871,13.592674
8,CVX,2.305768,2.573174,-0.360881,1.532149
9,HD,38.091061,38.194507,-119.466805,10.629551


In [15]:
import pandas as pd
from yahoo_fin import stock_info as si
from datetime import datetime, timedelta
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# List of stock symbols
stocks = ['AAPL', 'ABBV', 'ADBE', 'AMZN', 'AVGO', 'BRK-B', 'CRM', 'COST', 'CVX', 'HD',
          'JNJ', 'JPM', 'LLY', 'MA', 'META', 'MRK', 'MSFT', 'NVDA', 'PG', 'TSLA',
          'UNH', 'V', 'XOM']

# Function to calculate MAPE
def mean_absolute_percentage_error(y_true, y_pred):
    return (abs((y_true - y_pred) / y_true).mean()) * 100

# Initialize the results DataFrame
results = pd.DataFrame(columns=['Stock', 'MAE', 'RMSE', 'R²', 'MAPE'])

# Fetch data, apply SMA model, and calculate metrics for each stock
for stock in stocks:
    try:
        # Fetch historical data for the past two years up until 2024-02-09
        end_date = datetime(2024, 2, 9)
        start_date = end_date - timedelta(days=2*365)  # Approximately two years
        data = si.get_data(stock, start_date=start_date, end_date=end_date)

        # Reset index to have a column with dates and rename it to 'date'
        data.reset_index(inplace=True)
        data.rename(columns={'index': 'date'}, inplace=True)

        # Calculate Simple Moving Average (SMA) for the last 90 days
        data['SMA_90'] = data['close'].rolling(window=90).mean()

        # Drop rows with NaN values
        data.dropna(inplace=True)

        # Predict using the SMA model
        data['SMA_Prediction'] = data['SMA_90'].shift(1)

        # Drop NaN values in predictions
        data.dropna(inplace=True)

        # Calculate metrics
        mae = mean_absolute_error(data['close'], data['SMA_Prediction'])
        mse = mean_squared_error(data['close'], data['SMA_Prediction'])
        rmse = mse ** 0.5
        r2 = r2_score(data['close'], data['SMA_Prediction'])
        mape = mean_absolute_percentage_error(data['close'], data['SMA_Prediction'])

        # Append the results to the DataFrame
        results = results.append({'Stock': stock, 'MAE': mae, 'RMSE': rmse, 'R²': r2, 'MAPE': mape}, ignore_index=True)
    except Exception as e:
        print(f"Error processing {stock}: {e}")

# Display the results
display(results)


  results = results.append({'Stock': stock, 'MAE': mae, 'RMSE': rmse, 'R²': r2, 'MAPE': mape}, ignore_index=True)
  results = results.append({'Stock': stock, 'MAE': mae, 'RMSE': rmse, 'R²': r2, 'MAPE': mape}, ignore_index=True)
  results = results.append({'Stock': stock, 'MAE': mae, 'RMSE': rmse, 'R²': r2, 'MAPE': mape}, ignore_index=True)
  results = results.append({'Stock': stock, 'MAE': mae, 'RMSE': rmse, 'R²': r2, 'MAPE': mape}, ignore_index=True)
  results = results.append({'Stock': stock, 'MAE': mae, 'RMSE': rmse, 'R²': r2, 'MAPE': mape}, ignore_index=True)
  results = results.append({'Stock': stock, 'MAE': mae, 'RMSE': rmse, 'R²': r2, 'MAPE': mape}, ignore_index=True)
  results = results.append({'Stock': stock, 'MAE': mae, 'RMSE': rmse, 'R²': r2, 'MAPE': mape}, ignore_index=True)
  results = results.append({'Stock': stock, 'MAE': mae, 'RMSE': rmse, 'R²': r2, 'MAPE': mape}, ignore_index=True)
  results = results.append({'Stock': stock, 'MAE': mae, 'RMSE': rmse, 'R²': r2, 'MAPE': 

Unnamed: 0,Stock,MAE,RMSE,R²,MAPE
0,AAPL,10.724031,12.609713,0.568821,6.509144
1,ABBV,7.502806,9.162512,-0.064945,5.014901
2,ADBE,44.415646,55.516949,0.720638,10.366098
3,AMZN,12.73435,15.025335,0.470418,10.95559
4,AVGO,79.293756,103.430342,0.763518,10.375781
5,BRK-B,15.210147,18.450063,0.647772,4.787234
6,CRM,19.30638,23.217364,0.62538,9.873487
7,COST,28.148349,36.849865,0.602638,5.02251
8,CVX,8.857454,11.19942,0.042643,5.583296
9,HD,18.416596,21.281775,0.039546,5.90644


In [18]:
import pandas as pd
from yahoo_fin import stock_info as si
from datetime import datetime, timedelta

# List of stock symbols
stocks = ['AAPL', 'ABBV', 'ADBE', 'AMZN', 'AVGO', 'BRK-B', 'CRM', 'COST', 'CVX', 'HD',
          'JNJ', 'JPM', 'LLY', 'MA', 'META', 'MRK', 'MSFT', 'NVDA', 'PG', 'TSLA',
          'UNH', 'V', 'XOM']

# Function to round prices to the nearest 100
def round_to_nearest_100(price):
    return round(price / 100) * 100

# Initialize the results DataFrame
price_ranges = pd.DataFrame(columns=['Stock', 'Price Range'])

# Fetch data and calculate price range for each stock
for stock in stocks:
    try:
        # Fetch historical data for the past two years up until 2024-02-09
        end_date = datetime(2024, 2, 9)
        start_date = end_date - timedelta(days=2*365)  # Approximately two years
        data = si.get_data(stock, start_date=start_date, end_date=end_date)

        # Calculate the minimum and maximum prices
        min_price = data['close'].min()
        max_price = data['close'].max()

        # Round prices to the nearest 100
        min_price_rounded = round_to_nearest_100(min_price)
        max_price_rounded = round_to_nearest_100(max_price)

        # Format the price range
        price_range = f"${min_price_rounded}-${max_price_rounded}"

        # Append the results to the DataFrame
        price_ranges = price_ranges.append({'Stock': stock, 'Price Range': price_range}, ignore_index=True)
    except Exception as e:
        print(f"Error processing {stock}: {e}")

# Display the results
display(price_ranges)



  price_ranges = price_ranges.append({'Stock': stock, 'Price Range': price_range}, ignore_index=True)
  price_ranges = price_ranges.append({'Stock': stock, 'Price Range': price_range}, ignore_index=True)
  price_ranges = price_ranges.append({'Stock': stock, 'Price Range': price_range}, ignore_index=True)
  price_ranges = price_ranges.append({'Stock': stock, 'Price Range': price_range}, ignore_index=True)
  price_ranges = price_ranges.append({'Stock': stock, 'Price Range': price_range}, ignore_index=True)
  price_ranges = price_ranges.append({'Stock': stock, 'Price Range': price_range}, ignore_index=True)
  price_ranges = price_ranges.append({'Stock': stock, 'Price Range': price_range}, ignore_index=True)
  price_ranges = price_ranges.append({'Stock': stock, 'Price Range': price_range}, ignore_index=True)
  price_ranges = price_ranges.append({'Stock': stock, 'Price Range': price_range}, ignore_index=True)
  price_ranges = price_ranges.append({'Stock': stock, 'Price Range': price_range},

Unnamed: 0,Stock,Price Range
0,AAPL,$100-$200
1,ABBV,$100-$200
2,ADBE,$300-$600
3,AMZN,$100-$200
4,AVGO,$400-$1300
5,BRK-B,$300-$400
6,CRM,$100-$300
7,COST,$400-$700
8,CVX,$100-$200
9,HD,$300-$400
