In [1]:
!pip install yfinance # uncomment these to install missing packages if they are not already installed
!pip install pandas



In [12]:
import yfinance as yf
import pandas as pd

def get_price(tick,start='2022-10-01',end=None):
    return yf.Ticker(tick).history(start=start,end=end)['Close']

def get_prices(tickers,start='2022-10-01',end=None):
    df=pd.DataFrame()
    for s in tickers:
        df[s]=get_price(s,start,end)
    return df

def get_price_local(tick,csv_file,start='2022-10-01',end=None):
    df = pd.read_csv(csv_file, parse_dates=['Date'], index_col='Date')
    return df[tick]

def get_prices_local(tickers,csv_file,start='2022-10-01',end=None):
    df=pd.DataFrame()
    for s in tickers:
        df[s]=get_price_local(s,csv_file,start,end)
    return df


# Prepare training and testing data sets

In [15]:
feature_stocks=['tsla','meta','goog','amzn','nflx','gbtc','gdx','intc','dal','c']
predict_stock='msft'

# training set
start_date_train='2023-1-01'
end_date_train='2024-6-30'

#Get training set through yfinance
#X_train=get_prices(feature_stocks,start=start_date_train,end=end_date_train)
#y_train=get_prices([predict_stock],start=start_date_train,end=end_date_train)
#Get training set locally through csv (yfinance causing issues)
X_train=get_prices_local(feature_stocks,"/content/x_training.csv",start=start_date_train,end=end_date_train)
y_train=get_prices_local([predict_stock],"/content/y_training.csv",start=start_date_train,end=end_date_train)

# testing set
start_date_test='2024-11-01'
end_date_test='2024-12-31'
#Get testing set through yfinance
#X_test=get_prices(feature_stocks,start=start_date_test,end=end_date_test)
#y_test=get_prices([predict_stock],start=start_date_test,end=end_date_test)
#Get testing set through local file
X_test=get_prices_local(feature_stocks,"/content/x_testing.csv",start=start_date_train,end=end_date_train)
y_test=get_prices_local([predict_stock],"/content/y_testing.csv",start=start_date_train,end=end_date_train)

In [16]:
X_train

Unnamed: 0_level_0,tsla,meta,goog,amzn,nflx,gbtc,gdx,intc,dal,c
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-01-03 00:00:00-05:00,108.099998,124.265312,89.378853,85.820000,294.950012,8.200000,28.842234,25.775143,32.105247,41.898052
2023-01-04 00:00:00-05:00,113.639999,126.885307,88.392395,85.139999,309.410004,8.380000,30.067492,26.691210,33.857689,42.977985
2023-01-05 00:00:00-05:00,110.339996,126.456947,86.459335,83.120003,309.700012,8.450000,29.804935,26.575495,34.684696,42.785809
2023-01-06 00:00:00-05:00,113.059998,129.525223,87.844376,86.080002,315.549988,8.650000,30.689848,27.703699,35.472309,43.298317
2023-01-09 00:00:00-05:00,119.769997,128.977325,88.482086,87.360001,315.170013,9.650000,30.398119,28.262981,36.200855,43.508812
...,...,...,...,...,...,...,...,...,...,...
2024-06-24 00:00:00-04:00,182.580002,498.032776,180.347717,185.570007,669.020020,52.610001,33.852570,30.377298,49.083549,59.807232
2024-06-25 00:00:00-04:00,187.350006,509.702240,185.126007,186.339996,672.409973,55.020000,33.447327,30.546227,48.497330,60.041241
2024-06-26 00:00:00-04:00,196.369995,512.217773,184.916504,193.610001,677.690002,54.130001,33.427559,30.347488,47.871361,59.719482
2024-06-27 00:00:00-04:00,197.419998,518.646423,186.402878,197.850006,684.340027,54.520000,33.832802,30.397173,48.288670,60.011990


In [17]:
y_train

Unnamed: 0_level_0,msft
Date,Unnamed: 1_level_1
2023-01-03 00:00:00-05:00,235.711700
2023-01-04 00:00:00-05:00,225.400925
2023-01-05 00:00:00-05:00,218.720535
2023-01-06 00:00:00-05:00,221.298248
2023-01-09 00:00:00-05:00,223.452896
...,...
2024-06-24 00:00:00-04:00,445.971893
2024-06-25 00:00:00-04:00,449.239441
2024-06-26 00:00:00-04:00,450.444855
2024-06-27 00:00:00-04:00,451.132263


# Convert training and testing data into numpy array

In [20]:
import numpy as np

X_train=np.array(X_train)
y_train=np.array(y_train)
X_test=np.array(X_test)
y_test=np.array(y_test)


# Use linear regression to predict msft stock price from the other stocks' prices

## 1. Append a dummy feature to both X_train and X_test

In [24]:
# Your solution here
X_train_shape = np.shape(X_train)
X_train_ones = np.ones((X_train_shape[0],1))
X_test_shape = np.shape(X_test)
X_test_ones = np.ones((X_test_shape[0],1))

X_train = np.append(X_train,X_train_ones,axis=1)
X_test =  np.append(X_test,X_test_ones,axis=1)

## 2. Find the best linear regression model based on your training data ($w=(X X')^{-1} X y$)
### Note that you may need to transpose the matrices to make things work



In [25]:
# Your solution here
XT = np.linalg.inv(X_train)
X_mult_XT_train = np
weights = np.linalg.inv(X_train)

LinAlgError: Last 2 dimensions of the array must be square

## 3. Report your training and testing error
### How far your prediction from the actual price. Compute the mean square error for both training and testing

In [None]:
# Your solution here