## Load Financial Data

In [3]:
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import numpy as np
pd.options.display.float_format = '{:.4f}'.format

In [11]:
SP500 = pd.read_excel("SP500.xls", parse_dates=["Date"], index_col="Date", usecols="A:E")
SP500.head()

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1970-12-31,92.27,92.79,91.36,92.15
1971-01-04,92.15,92.19,90.64,91.15
1971-01-05,91.15,92.28,90.69,91.8
1971-01-06,91.8,93.0,91.5,92.35
1971-01-07,92.35,93.26,91.75,92.38


In [20]:
stocks = pd.read_csv("stocks.csv", header=[0, 1], index_col=[0], parse_dates=[0]).Close
stocks.head()

Unnamed: 0_level_0,AAPL,BA,DIS,IBM,KO,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-12-31,7.5261,54.13,32.25,130.9,28.5,30.48
2010-01-04,7.6432,56.18,32.07,132.45,28.52,30.95
2010-01-05,7.6564,58.02,31.99,130.85,28.175,30.96
2010-01-06,7.5346,59.78,31.82,130.0,28.165,30.77
2010-01-07,7.5207,62.2,31.83,129.55,28.095,30.45


## Merge Time Series

In [24]:
appl = stocks.loc["2010-01-01" : "2014-12-31", "AAPL"].to_frame()
ba = stocks.loc["2012-01-01" : "2016-12-31", "BA"].to_frame()

In [27]:
# Join two dataframes apple and ba
appl["BA"] = ba.BA
appl

Unnamed: 0_level_0,AAPL,BA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-01-04,7.6432,
2010-01-05,7.6564,
2010-01-06,7.5346,
2010-01-07,7.5207,
2010-01-08,7.5707,
...,...,...
2014-12-24,28.0025,131.2400
2014-12-26,28.4975,131.6300
2014-12-29,28.4775,132.2900
2014-12-30,28.1300,131.8300


In [28]:
# Reindex dataframe
ba.reindex(appl.index)

Unnamed: 0_level_0,BA
Date,Unnamed: 1_level_1
2010-01-04,
2010-01-05,
2010-01-06,
2010-01-07,
2010-01-08,
...,...
2014-12-24,131.2400
2014-12-26,131.6300
2014-12-29,132.2900
2014-12-30,131.8300


# Datetime Methods

In [29]:
close = stocks.copy()

In [33]:
close["Day"] = close.index.day_name()
close["Quarter"] = close.index.quarter
close

Unnamed: 0_level_0,AAPL,BA,DIS,IBM,KO,MSFT,Day,Quarter
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2009-12-31,7.5261,54.1300,32.2500,130.9000,28.5000,30.4800,Thursday,4
2010-01-04,7.6432,56.1800,32.0700,132.4500,28.5200,30.9500,Monday,1
2010-01-05,7.6564,58.0200,31.9900,130.8500,28.1750,30.9600,Tuesday,1
2010-01-06,7.5346,59.7800,31.8200,130.0000,28.1650,30.7700,Wednesday,1
2010-01-07,7.5207,62.2000,31.8300,129.5500,28.0950,30.4500,Thursday,1
...,...,...,...,...,...,...,...,...
2019-01-30,41.3125,387.7200,110.1300,134.3800,47.8600,106.3800,Wednesday,1
2019-01-31,41.6100,385.6200,111.5200,134.4200,48.1300,104.4300,Thursday,1
2019-02-01,41.6300,387.4300,111.3000,134.1000,48.7000,102.7800,Friday,1
2019-02-04,42.8125,397.0000,111.8000,135.1900,49.2500,105.7400,Monday,1


# Fill Missing Values

Weekend values are not available because trading activity is from Monday to Friday. We need to fill those missing value appropriately

In [37]:
all_days = pd.date_range(start="2009-12-31", end="2019-02-06", freq="D")
close = close.reindex(all_days)
close.head(10)

Unnamed: 0,AAPL,BA,DIS,IBM,KO,MSFT,Day,Quarter
2009-12-31,7.5261,54.13,32.25,130.9,28.5,30.48,Thursday,4.0
2010-01-01,,,,,,,,
2010-01-02,,,,,,,,
2010-01-03,,,,,,,,
2010-01-04,7.6432,56.18,32.07,132.45,28.52,30.95,Monday,1.0
2010-01-05,7.6564,58.02,31.99,130.85,28.175,30.96,Tuesday,1.0
2010-01-06,7.5346,59.78,31.82,130.0,28.165,30.77,Wednesday,1.0
2010-01-07,7.5207,62.2,31.83,129.55,28.095,30.45,Thursday,1.0
2010-01-08,7.5707,61.6,31.88,130.85,27.575,30.66,Friday,1.0
2010-01-09,,,,,,,,


In [40]:
# Fill missing value for Day and Quarter
close.Day = close.index.day_name()
close.Quarter = close.index.quarter
close.head()

Unnamed: 0,AAPL,BA,DIS,IBM,KO,MSFT,Day,Quarter
2009-12-31,7.5261,54.13,32.25,130.9,28.5,30.48,Thursday,4
2010-01-01,,,,,,,Friday,1
2010-01-02,,,,,,,Saturday,1
2010-01-03,,,,,,,Sunday,1
2010-01-04,7.6432,56.18,32.07,132.45,28.52,30.95,Monday,1


In [42]:
# Fill other missing values with the next available value
close.bfill()

Unnamed: 0,AAPL,BA,DIS,IBM,KO,MSFT,Day,Quarter
2009-12-31,7.5261,54.1300,32.2500,130.9000,28.5000,30.4800,Thursday,4
2010-01-01,7.6432,56.1800,32.0700,132.4500,28.5200,30.9500,Friday,1
2010-01-02,7.6432,56.1800,32.0700,132.4500,28.5200,30.9500,Saturday,1
2010-01-03,7.6432,56.1800,32.0700,132.4500,28.5200,30.9500,Sunday,1
2010-01-04,7.6432,56.1800,32.0700,132.4500,28.5200,30.9500,Monday,1
...,...,...,...,...,...,...,...,...
2019-02-02,42.8125,397.0000,111.8000,135.1900,49.2500,105.7400,Saturday,1
2019-02-03,42.8125,397.0000,111.8000,135.1900,49.2500,105.7400,Sunday,1
2019-02-04,42.8125,397.0000,111.8000,135.1900,49.2500,105.7400,Monday,1
2019-02-05,43.5450,410.1800,112.6600,135.5500,49.2600,107.2200,Tuesday,1


# Timezones Compatibility

In [45]:
ge = pd.read_csv("GE_prices.csv", parse_dates=["date"], index_col=["date"])
ge.head()

Unnamed: 0_level_0,1. open,2. high,3. low,4. close,5. volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-07-29 10:00:00,10.5,10.58,10.43,10.44,4840329.0
2019-07-29 10:30:00,10.4404,10.4847,10.3997,10.4104,3860280.0
2019-07-29 11:00:00,10.42,10.5,10.41,10.5,1912153.0
2019-07-29 11:30:00,10.4931,10.53,10.48,10.485,1916453.0
2019-07-29 12:00:00,10.49,10.5,10.44,10.45,2467638.0


In [51]:
# Check for time zone
print(ge.index.tz)
# Localize datatime
ge = ge.tz_localize("America/New_York")
ge.head()

None


Unnamed: 0_level_0,1. open,2. high,3. low,4. close,5. volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-07-29 10:00:00-04:00,10.5,10.58,10.43,10.44,4840329.0
2019-07-29 10:30:00-04:00,10.4404,10.4847,10.3997,10.4104,3860280.0
2019-07-29 11:00:00-04:00,10.42,10.5,10.41,10.5,1912153.0
2019-07-29 11:30:00-04:00,10.4931,10.53,10.48,10.485,1916453.0
2019-07-29 12:00:00-04:00,10.49,10.5,10.44,10.45,2467638.0


In [52]:
# Check for time zone
print(ge.index.tz)

America/New_York


In [55]:
# Convert to different time zone
ge_la = ge.tz_convert("America/Los_Angeles")
ge_la.head()

Unnamed: 0_level_0,1. open,2. high,3. low,4. close,5. volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-07-29 07:00:00-07:00,10.5,10.58,10.43,10.44,4840329.0
2019-07-29 07:30:00-07:00,10.4404,10.4847,10.3997,10.4104,3860280.0
2019-07-29 08:00:00-07:00,10.42,10.5,10.41,10.5,1912153.0
2019-07-29 08:30:00-07:00,10.4931,10.53,10.48,10.485,1916453.0
2019-07-29 09:00:00-07:00,10.49,10.5,10.44,10.45,2467638.0
