In [3]:
import yfinance as yf
import pandas as pd

# List of tickers for the companies
tickers = ['AAPL', 'MSFT', 'GOOGL', 'JPM', 'BAC', 'WFC', 'JNJ', 'PFE', 'MRK', 'XOM', 'CVX', 'Meta', 'VZ', 'T']

# Empty list to store the dataframes
combined_df_list = []

# Fetching data for each ticker and appending to the list
for ticker in tickers:
    data = yf.download(ticker, start="2024-01-01", end="2024-04-30")
    
    # Calculate the short-term (12-day) and long-term (26-day) exponential moving averages
    data['EMA_12'] = data['Close'].ewm(span=12, adjust=False).mean()
    data['EMA_26'] = data['Close'].ewm(span=26, adjust=False).mean()
    
    # Calculate the MACD line
    data['MACD'] = data['EMA_12'] - data['EMA_26']
    
    # Calculate the Signal Line (9-day EMA of MACD)
    data['Signal_Line'] = data['MACD'].ewm(span=9, adjust=False).mean()
    
    # Drop the intermediate columns (EMA_12 and EMA_26)
    data.drop(['EMA_12', 'EMA_26'], axis=1, inplace=True)
    
    combined_df_list.append(data)

# Now combined_df_list contains the stock data for each company with MACD and Signal Line


[*********************100%%**********************]  1 of 1 completed


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [12]:
combined_df = pd.concat(combined_df_list, keys=tickers)

# Reset index to remove multi-indexing
combined_df.reset_index(inplace=True)

# Display the combined dataframe
print(combined_df)

     level_0       Date        Open        High         Low       Close  \
0       AAPL 2024-01-02  187.149994  188.440002  183.889999  185.639999   
1       AAPL 2024-01-03  184.220001  185.880005  183.429993  184.250000   
2       AAPL 2024-01-04  182.149994  183.089996  180.880005  181.910004   
3       AAPL 2024-01-05  181.990005  182.759995  180.169998  181.179993   
4       AAPL 2024-01-08  182.089996  185.600006  181.500000  185.559998   
...      ...        ...         ...         ...         ...         ...   
1143       T 2024-04-23   16.379999   16.530001   16.290001   16.500000   
1144       T 2024-04-24   16.900000   17.030001   16.379999   16.809999   
1145       T 2024-04-25   16.700001   17.000000   16.500000   16.580000   
1146       T 2024-04-26   16.469999   16.879999   16.389999   16.750000   
1147       T 2024-04-29   16.990000   17.280001   16.990000   17.020000   

       Adj Close    Volume      MACD  Signal_Line  
0     185.403412  82488700  0.000000     0.0000

In [13]:
combined_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1148 entries, 0 to 1147
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   level_0      1148 non-null   object        
 1   Date         1148 non-null   datetime64[ns]
 2   Open         1148 non-null   float64       
 3   High         1148 non-null   float64       
 4   Low          1148 non-null   float64       
 5   Close        1148 non-null   float64       
 6   Adj Close    1148 non-null   float64       
 7   Volume       1148 non-null   int64         
 8   MACD         1148 non-null   float64       
 9   Signal_Line  1148 non-null   float64       
dtypes: datetime64[ns](1), float64(7), int64(1), object(1)
memory usage: 89.8+ KB


In [14]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [19]:
X = combined_df.drop(['Close', 'level_0', 'Date', 'Adj Close'], axis=1)
y = combined_df['Close']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
model = LinearRegression()
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Evaluating the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 1.5531891324453182


In [23]:
#setting up standard scaler

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error,r2_score

# Load the dataframe


features = combined_df.drop(['Close', 'level_0', 'Date', 'Adj Close'], axis=1)
target = combined_df['Close']

# Feature scaling
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Splitting the scaled data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(scaled_features, target, test_size=0.2, random_state=42)

# Initializing and fitting the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Making predictions
y_pred_StandardScaler = model.predict(X_test)

# Evaluating the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


Mean Squared Error: 1.5531891324453182


In [25]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred_StandardScaler)

# Calculate Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)

# Calculate Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred_StandardScaler)

print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("Mean Absolute Error (MAE):", mae)



Mean Squared Error (MSE): 1.5531891684208108
Root Mean Squared Error (RMSE): 1.2462701025142227
Mean Absolute Error (MAE): 0.685385872690843
