In [1]:
import time 
import datetime 
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
from scipy.stats import linregress
import requests
import yfinance as yf

In [2]:
tickers = ["UAL", "DAL", "AAL", "LUV"]

# Create an empty list to store the dataframes
dataframes = []

# Get data using yahoo finance for the tickers
for ticker in tickers:
    ticker_data = yf.Ticker(ticker)
    df = pd.DataFrame(ticker_data.get_shares_full(start='2000-01-01', end='2023-08-01'))
    df.reset_index(inplace=True)
    df['Ticker'] = ticker
    dataframes.append(df)

# Concatenate all dataframes
stock_df = pd.concat(dataframes)

# Change the index column (date) to an actual date
stock_df['index'] = stock_df['index'].dt.strftime('%Y-%m-%d')

# Have to rename som columns...
stock_df.rename(columns={0:'Outstanding Shares','index':'Date'}, inplace = True)
stock_df.head()

Unnamed: 0,Date,Outstanding Shares,Ticker
0,2015-10-22,372810000,UAL
1,2016-02-19,359485000,UAL
2,2016-04-22,336823000,UAL
3,2016-07-19,322408000,UAL
4,2016-07-19,317286000,UAL


In [3]:
# Initialize a list of tickers for the stock
stock_names = ['United Airilnes', 'Delta Airlines','American Airlines','SouthWest Airlines']

# Define the variables
# We wil use 2015 in this one so we can fill forward later
precovid_date = int(time.mktime(datetime.datetime(2015, 1, 5, 23, 59).timetuple()))
postcovid_date = int(time.mktime(datetime.datetime(2023, 8, 1, 23, 59).timetuple()))
interval = "1wk"

# Initialize an empty list to store the dataframes for each stock
dfs = []

# Repeat a data fetch, this time from a query string
for ticker in tickers:
    query_string = f"https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={precovid_date}&period2={postcovid_date}&interval={interval}&events=history&includeAdjustedClose=true"
    df = pd.read_csv(query_string)
    df['Ticker'] = ticker
    dfs.append(df)

# Concatenate all the dataframes in the dfs list based on the common "Date" column
combined_df = pd.concat(dfs)

# Rearrange columns with "Ticker" as the first column
combined_df = combined_df[['Ticker'] + [col for col in combined_df.columns if col != 'Ticker']]

combined_df

Unnamed: 0,Ticker,Date,Open,High,Low,Close,Adj Close,Volume
0,UAL,2015-01-05,66.629997,67.519997,63.110001,65.339996,65.339996,21566500
1,UAL,2015-01-12,66.160004,68.260002,62.369999,65.750000,65.750000,32651700
2,UAL,2015-01-19,67.110001,73.220001,65.849998,73.099998,73.099998,30047300
3,UAL,2015-01-26,72.540001,74.519997,68.260002,69.370003,69.370003,28651400
4,UAL,2015-02-02,69.080002,72.230003,65.739998,67.580002,67.580002,36767600
...,...,...,...,...,...,...,...,...
443,LUV,2023-07-03,36.029999,38.000000,36.029999,37.590000,37.590000,25464200
444,LUV,2023-07-10,37.650002,39.529999,36.169998,36.360001,36.360001,48805800
445,LUV,2023-07-17,36.290001,37.480000,35.880001,37.250000,37.250000,27509700
446,LUV,2023-07-24,36.990002,37.400002,32.660000,33.700001,33.700001,76530200


In [4]:
# Merging the dataframes by date
merged_df = pd.merge(combined_df, stock_df, on =['Ticker','Date'], how = 'left')

# Need to set an initial values for the shares of United because data does not go back far enough.
# Data is later cut at 2016 so the 2015 error data won't matter
merged_df.at[0, 'Outstanding Shares'] = 372810000

# Fill the NaN values forward using the ffill method
merged_df['Outstanding Shares'].fillna(method='ffill', inplace = True)

# Create the Market Cap column
merged_df['Market Cap'] = merged_df['Close']*merged_df['Outstanding Shares']
merged_df

Unnamed: 0,Ticker,Date,Open,High,Low,Close,Adj Close,Volume,Outstanding Shares,Market Cap
0,UAL,2015-01-05,66.629997,67.519997,63.110001,65.339996,65.339996,21566500,372810000.0,2.435940e+10
1,UAL,2015-01-12,66.160004,68.260002,62.369999,65.750000,65.750000,32651700,372810000.0,2.451226e+10
2,UAL,2015-01-19,67.110001,73.220001,65.849998,73.099998,73.099998,30047300,372810000.0,2.725241e+10
3,UAL,2015-01-26,72.540001,74.519997,68.260002,69.370003,69.370003,28651400,372810000.0,2.586183e+10
4,UAL,2015-02-02,69.080002,72.230003,65.739998,67.580002,67.580002,36767600,372810000.0,2.519450e+10
...,...,...,...,...,...,...,...,...,...,...
1788,LUV,2023-07-03,36.029999,38.000000,36.029999,37.590000,37.590000,25464200,607752000.0,2.284540e+10
1789,LUV,2023-07-10,37.650002,39.529999,36.169998,36.360001,36.360001,48805800,607752000.0,2.209786e+10
1790,LUV,2023-07-17,36.290001,37.480000,35.880001,37.250000,37.250000,27509700,607752000.0,2.263876e+10
1791,LUV,2023-07-24,36.990002,37.400002,32.660000,33.700001,33.700001,76530200,607752000.0,2.048124e+10


In [5]:
# Now we need to filter the data frame to be 2016 and beyond
merged_df['Date'] = pd.to_datetime(merged_df['Date'])
filtered_df=merged_df[merged_df['Date'].dt.year >= 2016]
filtered_df['Date Numeric'] = pd.to_numeric(filtered_df['Date'])
filtered_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Date Numeric'] = pd.to_numeric(filtered_df['Date'])


Unnamed: 0,Ticker,Date,Open,High,Low,Close,Adj Close,Volume,Outstanding Shares,Market Cap,Date Numeric
52,UAL,2016-01-04,55.790001,56.34,51.759998,51.889999,51.889999,30039400,372810000.0,19345110000.0,1451865600000000000
53,UAL,2016-01-11,52.200001,52.549999,44.93,45.669998,45.669998,47275300,372810000.0,17026230000.0,1452470400000000000
54,UAL,2016-01-18,46.509998,47.73,42.169998,46.720001,46.720001,38951700,372810000.0,17417680000.0,1453075200000000000
55,UAL,2016-01-25,46.91,49.41,45.27,48.279999,48.279999,36135600,372810000.0,17999270000.0,1453680000000000000
56,UAL,2016-02-01,47.709999,49.200001,45.049999,46.860001,46.860001,31200200,372810000.0,17469880000.0,1454284800000000000


In [6]:
        # furture predictions based on y= mx+b of the post covid regresion line
x= pd.DataFrame(columns =["Date"])
x["Date"]= ["2023-12-30"]
x = x['Date'] = pd.to_datetime(x['Date']).astype(np.int64)
new_x =x["Date"].iloc[0]
for tick in tickers:
    future_df= filtered_df[(filtered_df['Ticker'] == f'{tick}') & (filtered_df['Date'] >= pd.to_datetime('2023-01-01'))]
    #future_df = future_df[(filtered_df['Date'] <= pd.to_datetime('2023-07-01'))]
    X = np.asarray(future_df['Date Numeric']);
    Y = np.asarray(future_df['Market Cap']);
    model = linregress(X, Y)
    slope, intercept = model.slope, model.intercept
    predict = slope*new_x + intercept
    today_df = filtered_df.loc[filtered_df["Date"] == "2023-07-31"]  
    today_df =today_df.loc[filtered_df["Ticker"]== f'{tick}']
    today = today_df["Market Cap"].iloc[0] 
    print(f'The market cap today of {tick} is {today}')
    print(f"the Market Cap 2023-12-30 prediction of {tick} is {predict}")

The market cap today of UAL is 17402437620.430016
the Market Cap 2023-12-30 prediction of UAL is 18604995148.865997
The market cap today of DAL is 29679842344.281986
the Market Cap 2023-12-30 prediction of DAL is 33166991939.413086
The market cap today of AAL is 10541628579.84
the Market Cap 2023-12-30 prediction of AAL is 11024244370.09645
The market cap today of LUV is 20317149360.0
the Market Cap 2023-12-30 prediction of LUV is 19870562516.485744
