In [1]:
import pandas as pd
import pandas_datareader as web
from datetime import datetime
from joblib import load

In [2]:
#function to fetch data for specified company
def fetch_data_for_company(stock_symbol, start_date, end_date):
    df = web.DataReader(stock_symbol, 'yahoo', start_date, end_date)
    return df

In [3]:
#function to preprocess data
#employs preprocessing techniques seen in
#process-data.ipynb
#and linear-regression.ipynb
def preprocess_data(df):
    # Add your preprocessing steps here
    #drop Adj Close column
    df = df.drop('Adj Close', axis=1)

    #make column names lowercase
    df.columns = df.columns.str.lower()
    #calculate moving averages
    df['7_day_ma'] = df['close'].rolling(window=7).mean()
    df['15_day_ma'] = df['close'].rolling(window=15).mean()
    df['30_day_ma'] = df['close'].rolling(window=30).mean()
    
    #calculate daily returns
    df['daily_returns'] = df['close'].pct_change()
    
    #create column for daily volatility over window_size time frame
    window_size = 30  # change as needed
    df['daily_volatility'] = df['daily_returns'].rolling(window=window_size).std().reset_index(level=0, drop=True)


    #Convert the 'date' column to datetime objects this is necessary to ensure dates are sorted properly
    df['date'] = pd.to_datetime(df['date'])
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['day'] = df['date'].dt.day
    
    # columns to create lag for
    lag_columns = ['open', 'high', 'low', 'volume', '7_day_ma', '15_day_ma', '30_day_ma', 'daily_returns', 'daily_volatility']
    # create lag features
    for col in lag_columns: #for every column
        for n in [1, 3, 5, 7, 15, 30]:  # for every lag features
            column_name = f'{col}_lag_{n}'  # Name for the new lagged column
            df[column_name] = df[col].shift(n)  # Create the lagged column
            
    # drop NaN values
    df = df.dropna()
    
    
    #a list of columns to exclude. This should be all the columns with data from a day d for which we are trying 
    #to make predictions for (because we will not have access to this data in practice)
    exclude = [col for col in lag_columns if any(f'{col}_lag_' in c for c in df.columns)]
    #a list of the features to include. This is all columns that are not the date, target, or included
    #in our list of columns to exclude
    features = [col for col in df.columns if col not in ['date', 'close'] + exclude]
    
    return df

In [4]:

# Load the model
model = load('lr-model.joblib')

In [5]:
#get a list of companies
with open('../news-scraping/s&pCompanies.txt', 'r') as file:
    companies = file.read().splitlines()


In [6]:
#start and end date for data being pulled
start_date = datetime(2023, 6, 1)
end_date = datetime(2023, 10, 1)


In [7]:
for company in companies:# for each company
    try:
        company_data = fetch_data_for_company(company, start_date, end_date)#get company data
        processed_data = preprocess_data(company_data) #preprocess data
        predictions = model.predict(processed_data) #make predictions
        
        # Calculate the difference between actual and predicted prices
        difference = company_data['Close'] - predictions
        difference_sign = difference.apply(lambda x: '+' if x > 0 else '-')

        #Calculate MSE and R2
        mse = mean_squared_error(company_data['Close'], predictions)
        r2 = r2_score(company_data['Close'], predictions)

        #output results
        print(f"Company: {company}")
        print(f"Actual Prices: \n{company_data['Close']}")
        print(f"Predicted Prices: \n{predictions}")
        print(f"Difference: \n{difference_sign}{abs(difference)}")
        print(f"MSE: {mse}, R2: {r2}\n")

SyntaxError: unexpected EOF while parsing (798369847.py, line 20)

In [8]:
stocks = 'FB'
data_source = 'yahoo'
start = datetime(2020,1,1)
end = datetime(2020,12,31)
# web.DataReader('FB', 'yahoo', start='2020-01-01', end='2019-10-09')
df = web.DataReader(stocks, data_source, start, end)
df


TypeError: string indices must be integers