# Stock Price Analysis

In this notebook, we will be analyzing the data I managed to find on Bloomberg for companies in the airline sector. This will give us a good idea of how to construct our portfolios

In [1]:
import numpy as np
import pandas as pd
import os

In [22]:
files = [x for x in os.listdir('stock_prices') if x != '.DS_Store']
files

['NWA.csv',
 'UAL.csv',
 'SKYW.csv',
 'SAVE.csv',
 'LCC.csv',
 'XJT.csv',
 'VA.csv',
 'JBLU.csv',
 'HA.csv',
 'ALGT.csv',
 'AAI.csv',
 'ANS.csv',
 'FRNTQ.csv',
 'GLUX.csv',
 'ALK.csv',
 'AAL.csv',
 'LUV.csv',
 'DAL.csv',
 'MESA.csv',
 'MEH.csv',
 'RHWI.csv']

In [24]:
tickers = [x[:-4] for x in files]
tickers

['NWA',
 'UAL',
 'SKYW',
 'SAVE',
 'LCC',
 'XJT',
 'VA',
 'JBLU',
 'HA',
 'ALGT',
 'AAI',
 'ANS',
 'FRNTQ',
 'GLUX',
 'ALK',
 'AAL',
 'LUV',
 'DAL',
 'MESA',
 'MEH',
 'RHWI']

In [45]:
sample1 = pd.read_csv('stock_prices/'+files[0])
sample1.columns = ['Date',list(sample1.columns)[1]]
sample1 = sample1.set_index('Date')
sample1.index = pd.to_datetime(sample1.index)
sample1.head()

Unnamed: 0_level_0,NWA US Equity
Date,Unnamed: 1_level_1
2007-05-18,24.625
2007-05-21,24.8
2007-05-22,24.6
2007-05-23,24.7
2007-05-24,24.88


In [46]:
sample2 = pd.read_csv('stock_prices/'+files[1])
sample2.columns = ['Date',list(sample2.columns)[1]]
sample2 = sample2.set_index('Date')
sample2.index = pd.to_datetime(sample2.index)
sample2.head()

Unnamed: 0_level_0,UAL US Equity
Date,Unnamed: 1_level_1
2006-01-25,43.5
2006-01-26,43.15
2006-01-27,43.0
2006-01-30,43.0
2006-01-31,41.98


In [47]:
pd.concat([sample1,sample2])

Unnamed: 0_level_0,NWA US Equity,UAL US Equity
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2007-05-18,24.625,
2007-05-21,24.800,
2007-05-22,24.600,
2007-05-23,24.700,
2007-05-24,24.880,
...,...,...
2021-07-26,,49.46
2021-07-27,,48.65
2021-07-28,,49.14
2021-07-29,,48.49


In [52]:
len(sample1)

367

In [53]:
len(sample2)

3906

# Extrapolate to All Stocks

Now, we will combine all the stocks into a single dataframe

In [60]:
price_dict = {}

for file in files:
    sample1 = pd.read_csv('stock_prices/'+file)
    sample1.columns = ['Date',list(sample1.columns)[1]]
    sample1 = sample1.set_index('Date')
    sample1.index = pd.to_datetime(sample1.index)
    price_dict[file[:-4]] = sample1

In [70]:
prices = pd.concat(price_dict, axis=1)
prices

Unnamed: 0_level_0,NWA,UAL,SKYW,SAVE,LCC,XJT,VA,JBLU,HA,ALGT,...,ANS,FRNTQ,GLUX,ALK,AAL,LUV,DAL,MESA,MEH,RHWI
Unnamed: 0_level_1,NWA US Equity,UAL US Equity,SKYW US Equity,SAVE US Equity,LCC US Equity,XJT US Equity,VA US Equity,JBLU US Equity,HA US Equity,ALGT US Equity,...,ANS US Equity,FRNTQ US Equity,GLUX US Equity,ALK US Equity,AAL US Equity,LUV US Equity,DAL US Equity,MESA US Equity,MEH US Equity,RHWI US Equity
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2001-01-02,,,27.5000,,,,,,2.1250,,...,3.8125,19.2083,0.9375,7.3906,,22.3067,,,14.9375,58559.98829
2001-01-03,,,28.8125,,,,,,2.1250,,...,3.8125,20.3333,1.0625,7.9844,,21.9067,,,15.3125,68159.98637
2001-01-04,,,27.4375,,,,,,2.1875,,...,3.8125,20.5625,1.0000,8.5000,,23.1667,,,17.1875,73919.98522
2001-01-05,,,27.7500,,,,,,2.1875,,...,3.7500,21.1667,,8.4531,,23.2667,,,18.3750,68159.98637
2001-01-08,,,26.8750,,,,,,2.1875,,...,3.7500,20.9583,0.9375,8.4688,,22.6667,,,18.1250,71999.98560
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-07-26,,49.46,40.6800,28.94,,,,16.07,20.8400,194.10,...,,,,60.5600,22.08,51.7800,41.67,9.37,,
2021-07-27,,48.65,40.1400,28.03,,,,14.96,20.3600,192.35,...,,,,59.3200,21.46,51.1500,40.96,9.12,,
2021-07-28,,49.14,40.8500,28.51,,,,15.31,20.3100,191.91,...,,,0.0100,59.6600,21.63,51.7600,41.30,9.19,,
2021-07-29,,48.49,41.5700,28.34,,,,15.15,20.2500,196.23,...,,,0.0040,58.8500,21.17,51.5500,41.23,9.09,,


In [71]:
prices.columns = [x[0] for x in prices.columns]

In [72]:
prices

Unnamed: 0_level_0,NWA,UAL,SKYW,SAVE,LCC,XJT,VA,JBLU,HA,ALGT,...,ANS,FRNTQ,GLUX,ALK,AAL,LUV,DAL,MESA,MEH,RHWI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2001-01-02,,,27.5000,,,,,,2.1250,,...,3.8125,19.2083,0.9375,7.3906,,22.3067,,,14.9375,58559.98829
2001-01-03,,,28.8125,,,,,,2.1250,,...,3.8125,20.3333,1.0625,7.9844,,21.9067,,,15.3125,68159.98637
2001-01-04,,,27.4375,,,,,,2.1875,,...,3.8125,20.5625,1.0000,8.5000,,23.1667,,,17.1875,73919.98522
2001-01-05,,,27.7500,,,,,,2.1875,,...,3.7500,21.1667,,8.4531,,23.2667,,,18.3750,68159.98637
2001-01-08,,,26.8750,,,,,,2.1875,,...,3.7500,20.9583,0.9375,8.4688,,22.6667,,,18.1250,71999.98560
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-07-26,,49.46,40.6800,28.94,,,,16.07,20.8400,194.10,...,,,,60.5600,22.08,51.7800,41.67,9.37,,
2021-07-27,,48.65,40.1400,28.03,,,,14.96,20.3600,192.35,...,,,,59.3200,21.46,51.1500,40.96,9.12,,
2021-07-28,,49.14,40.8500,28.51,,,,15.31,20.3100,191.91,...,,,0.0100,59.6600,21.63,51.7600,41.30,9.19,,
2021-07-29,,48.49,41.5700,28.34,,,,15.15,20.2500,196.23,...,,,0.0040,58.8500,21.17,51.5500,41.23,9.09,,


# Inspection

Let's make sure everything makes sense!

In [74]:
counts = prices.count(axis=1)

In [75]:
print(min(counts), max(counts))

8 17
