In [1]:
import time 
import datetime 
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
from scipy.stats import linregress
import requests
import yfinance as yf

In [2]:
tickers = ["UAL", "DAL", "AAL", "LUV"]

# Create an empty list to store the dataframes
dataframes = []

# Get data using yahoo finance for the tickers
for ticker in tickers:
    ticker_data = yf.Ticker(ticker)
    df = pd.DataFrame(ticker_data.get_shares_full(start='2000-01-01', end='2023-08-01'))
    df.reset_index(inplace=True)
    df['Ticker'] = ticker
    dataframes.append(df)

# Concatenate all dataframes
stock_df = pd.concat(dataframes)

# Change the index column (date) to an actual date
stock_df['index'] = stock_df['index'].dt.strftime('%Y-%m-%d')

# Have to rename som columns...
stock_df.rename(columns={0:'Outstanding Shares','index':'Date'}, inplace = True)


In [3]:
# Initialize a list of tickers for the stock
stock_names = ['United Airilnes', 'Delta Airlines','American Airlines','SouthWest Airlines']

# Define the variables
# We wil use 2015 in this one so we can fill forward later
precovid_date = int(time.mktime(datetime.datetime(2015, 1, 5, 23, 59).timetuple()))
postcovid_date = int(time.mktime(datetime.datetime(2023, 8, 1, 23, 59).timetuple()))
interval = "1wk"

# Initialize an empty list to store the dataframes for each stock
dfs = []

# Repeat a data fetch, this time from a query string
for ticker in tickers:
    query_string = f"https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={precovid_date}&period2={postcovid_date}&interval={interval}&events=history&includeAdjustedClose=true"
    df = pd.read_csv(query_string)
    df['Ticker'] = ticker
    dfs.append(df)

# Concatenate all the dataframes in the dfs list based on the common "Date" column
combined_df = pd.concat(dfs)

# Rearrange columns with "Ticker" as the first column
combined_df = combined_df[['Ticker'] + [col for col in combined_df.columns if col != 'Ticker']]



In [4]:
# Merging the dataframes by date
merged_df = pd.merge(combined_df, stock_df, on =['Ticker','Date'], how = 'left')

# Need to set an initial values for the shares of United because data does not go back far enough.
# Data is later cut at 2016 so the 2015 error data won't matter
merged_df.at[0, 'Outstanding Shares'] = 372810000

# Fill the NaN values forward using the ffill method
merged_df['Outstanding Shares'].fillna(method='ffill', inplace = True)

# Create the Market Cap column
merged_df['Market Cap'] = merged_df['Close']*merged_df['Outstanding Shares']


In [5]:
# Now we need to filter the data frame to be 2016 and beyond
merged_df['Date'] = pd.to_datetime(merged_df['Date'])
filtered_df=merged_df[merged_df['Date'].dt.year >= 2016]
filtered_df['Date Numeric'] = pd.to_numeric(filtered_df['Date'])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Date Numeric'] = pd.to_numeric(filtered_df['Date'])


In [6]:
        # furture predictions based on y= mx+b of the year to date regresion line
current_ls =[]
predic_ls = []
x= pd.DataFrame(columns =["Date"])
x["Date"]= ["2023-12-30"]
x = x['Date'] = pd.to_datetime(x['Date']).astype(np.int64)
new_x =x["Date"].iloc[0]
for tick in tickers:
    future_df= filtered_df[(filtered_df['Ticker'] == f'{tick}') & (filtered_df['Date'] >= pd.to_datetime('2023-01-01'))]
    X = np.asarray(future_df['Date Numeric']);
    Y = np.asarray(future_df['Market Cap']);
    model = linregress(X, Y)
    slope, intercept = model.slope, model.intercept
    predict = slope*new_x + intercept
    today_df = filtered_df.loc[filtered_df["Date"] == "2023-07-31"]  
    today_df =today_df.loc[filtered_df["Ticker"]== f'{tick}']
    today = today_df["Market Cap"].iloc[0] 
    current_ls.append(today)
    predic_ls.append(predict)
    #print(f'The market cap today of {tick} is {today}')
    #print(f"the Market Cap 2023-12-30 prediction of {tick} is {predict}")

In [7]:
#  calculate market cap before covid 2019-12-30
cap_ls= []
dec_df = filtered_df.loc[filtered_df["Date"] == "2019-12-30"]
for tick in tickers:
    loop_df= dec_df.loc[dec_df["Ticker"] == tick]
    cap = loop_df["Market Cap"].iloc[0]
    cap_ls.append(cap)
    #print(f' the pre covid market cap for {tick} is {cap}')

In [8]:
black_ls=[]
black = filtered_df.loc[filtered_df["Date"] == "2020-03-16"]
for tick in tickers:
    loop_df= black.loc[black["Ticker"] == tick]
    cat = loop_df["Market Cap"].iloc[0]
    black_ls.append(cat)

In [9]:

stock_prediction=pd.DataFrame(list(zip(tickers, cap_ls,black_ls, current_ls, predic_ls,)),
              columns=[ 'Ticker','Precovid Cap','Black Monday 2','Current Cap', 'Prediction Jan 24'])


In [10]:
# zip sum values to df
c = stock_prediction["Current Cap"].sum()
s =stock_prediction["Prediction Jan 24"].sum()
b = stock_prediction["Black Monday 2"].sum()
p = stock_prediction["Precovid Cap"].sum()
t = "sum"
sum_ls = [t,p,c,s]
stock_prediction.loc[-1] = [t, p,b, c, s]  


In [11]:
#divide values by 1 billion to remove e
value = 1000000000
stock_prediction["Current Cap"]=(stock_prediction["Current Cap"]/value).round(2)
stock_prediction["Precovid Cap"]=(stock_prediction["Precovid Cap"]/value).round(2)
stock_prediction["Prediction Jan 24"]=(stock_prediction["Prediction Jan 24"]/value).round(2)
stock_prediction["Black Monday 2"]=(stock_prediction["Black Monday 2"]/value).round(2)

In [12]:
# display df for presentation
print("Market Capital In Billions of Dollars")
display(stock_prediction)



Market Capital In Billions of Dollars


Unnamed: 0,Ticker,Precovid Cap,Black Monday 2,Current Cap,Prediction Jan 24
0,UAL,22.4,6.07,17.21,18.56
1,DAL,37.87,13.93,29.29,33.07
2,AAL,12.11,4.44,10.36,10.98
3,LUV,29.01,17.05,20.28,19.86
-1,sum,101.39,41.5,77.14,82.46
