# Basic Portfolio Analysis

#### Notebook: Initial Descriptive Analysis


In [18]:
# standard python libraries
import json
import datetime
import pandas as pd
import numpy as np
import glob
import os

# library for getting historical stock price data
import fastquant as fq


# Part 1: Collecting the Data

### INSERT DESCRIPTION

In [19]:
stocks_df = pd.DataFrame()

for file in glob.glob('data/20230315/stocks/*'):
    stock_df = pd.read_csv(file, index_col=[0])
    stock_name = os.path.basename(file)[:-4]
    stock_df.rename({'Price':stock_name}, axis=1, inplace=True)
    stock_df = stock_df[[stock_name]].copy()
    if stocks_df.empty:
        stocks_df = stock_df.copy()
    else:
        stocks_df = stocks_df.join(stock_df)

stocks_df.index = pd.to_datetime(stocks_df.index)
stocks_df.sort_index()

Unnamed: 0_level_0,AREIT,MER,CNVRG,SECB,DNL,MBT,ICT,BPI,CREIT,FPH,RCR,SMC2I,GLO,CNPF,TEL,AEV
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2020-08-14,24.10,278.6,,98.02,4.97,35.95,108.0,65.50,,59.15,,78.3,2136.00,16.16,1350.00,50.25
2020-08-17,25.95,274.8,,96.28,4.85,35.55,107.5,65.15,,58.80,,,2118.00,15.96,1348.00,50.20
2020-08-18,25.80,280.0,,97.02,5.00,36.10,110.0,65.70,,58.70,,,2130.00,16.20,1360.00,51.60
2020-08-19,25.95,271.0,,97.52,5.10,36.00,108.1,62.50,,60.00,,78.3,2130.00,16.36,1380.00,49.60
2020-08-20,25.90,270.0,,96.03,5.05,34.70,108.0,63.50,,60.65,,78.3,2112.00,16.20,1368.00,49.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-09,34.95,314.0,15.72,101.00,7.86,60.10,212.4,108.00,2.45,61.95,5.65,74.8,1800.00,26.00,1280.00,51.45
2023-03-10,34.80,320.0,15.50,99.00,7.79,59.70,213.0,109.00,2.42,62.20,5.60,74.8,1820.00,25.45,1316.00,50.30
2023-03-13,34.40,317.8,15.04,94.10,7.69,59.40,207.4,105.10,2.42,61.95,5.58,74.8,1800.00,25.60,1278.00,52.50
2023-03-14,34.95,303.0,14.70,93.00,7.50,57.00,199.9,103.80,2.42,61.85,5.58,,1777.00,25.00,1278.00,50.00


In [20]:
stocks_df.head()

Unnamed: 0_level_0,AREIT,MER,CNVRG,SECB,DNL,MBT,ICT,BPI,CREIT,FPH,RCR,SMC2I,GLO,CNPF,TEL,AEV
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2023-03-15,34.45,315.2,15.0,93.05,7.77,57.0,197.7,104.3,2.45,61.85,5.59,74.8,1758.0,24.5,1310.0,49.35
2023-03-14,34.95,303.0,14.7,93.0,7.5,57.0,199.9,103.8,2.42,61.85,5.58,,1777.0,25.0,1278.0,50.0
2023-03-13,34.4,317.8,15.04,94.1,7.69,59.4,207.4,105.1,2.42,61.95,5.58,74.8,1800.0,25.6,1278.0,52.5
2023-03-10,34.8,320.0,15.5,99.0,7.79,59.7,213.0,109.0,2.42,62.2,5.6,74.8,1820.0,25.45,1316.0,50.3
2023-03-09,34.95,314.0,15.72,101.0,7.86,60.1,212.4,108.0,2.45,61.95,5.65,74.8,1800.0,26.0,1280.0,51.45


In [21]:
stocks_df.GLO = stocks_df.GLO.str.replace(',','').astype(float)
stocks_df.TEL = stocks_df.TEL.str.replace(',','').astype(float)

In [22]:
stocks_df.isnull().sum()
stocks_df[['CNVRG','RCR','SMC2I']].fillna(0, inplace=True)
stocks_df['FPH'].ffill(inplace=True)

In [29]:
infl_df = pd.read_excel('data/bsp/prices2018.xls',sheet_name='Monthly',header=[6],usecols=range(1,4))
infl_df['Data'] = pd.to_datetime(inft)


# Part 2: Data Preprocessing

In [30]:
infl_df

Unnamed: 0,Year,Month,All Items
0,1957,Jan,0.838754
1,1957,Feb,0.838754
2,1957,Mar,0.838754
3,1957,Apr,0.838754
4,1957,May,0.838754
...,...,...,...
2385,2022,Oct,118.5
2386,2022,Nov,119.7
2387,2022,Dec,120.1
2388,2023,Jan,122.256287


## 2.1 Converting prices to returns

#### Many models are approaches used for time series modelling require the time series to be stationary. Stationarity assumes that the statistics of a process do not change over times. Using that assumption, we can build models that aim to forecast the future value of the process.

#### However, asset prices are usually non-stationary. Their statistic not only change over time, but we can also observe some trends (general patterns over time) or seasonality (patterns repeating over fixed time intervals). By transforming the prices into returns, we attempt to make the time series stationary. 

#### Another benefits of using returns, as opposed to prices, is normalization. It means that we can easily compare various return series, which would not be that simple with raw staock prices.

##### There are two types of returns.
<ol>
<li>Simple Returns
<li>Log Returns
</ol>
 

In [14]:
stocks_simple_rtn_df = stocks_df.pct_change().fillna(0,)
stocks_log_rtn_df = np.log(stocks_df/stocks_df.shift(1)).fillna(0)

In [15]:
stocks_simple_rtn_df

Unnamed: 0_level_0,AREIT,MER,CNVRG,SECB,DNL,MBT,ICT,BPI,CREIT,FPH,RCR,SMC2I,GLO,CNPF,TEL,AEV
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2023-03-15,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2023-03-14,0.014514,-0.038706,-0.020000,-0.000537,-0.034749,0.000000,0.011128,-0.004794,-0.012245,0.000000,-0.001789,0.000000,0.010808,0.020408,-0.024427,0.013171
2023-03-13,-0.015737,0.048845,0.023129,0.011828,0.025333,0.042105,0.037519,0.012524,0.000000,0.001617,0.000000,0.000000,0.012943,0.024000,0.000000,0.050000
2023-03-10,0.011628,0.006923,0.030585,0.052072,0.013004,0.005051,0.027001,0.037108,0.000000,0.004036,0.003584,0.000000,0.011111,-0.005859,0.029734,-0.041905
2023-03-09,0.004310,-0.018750,0.014194,0.020202,0.008986,0.006700,-0.002817,-0.009174,0.012397,-0.004019,0.008929,0.000000,-0.010989,0.021611,-0.027356,0.022863
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08-20,0.007782,0.000000,0.000000,-0.004664,0.028513,0.019090,0.004651,-0.007812,0.000000,-0.000824,0.000000,-0.002548,0.005714,-0.010989,-0.022857,0.023638
2020-08-19,0.001931,0.003704,0.000000,0.015516,0.009901,0.037464,0.000926,-0.015748,0.000000,-0.010717,0.000000,0.000000,0.008523,0.009877,0.008772,-0.004016
2020-08-18,-0.005780,0.033210,0.000000,-0.005127,-0.019608,0.002778,0.017576,0.051200,0.000000,-0.021667,0.000000,0.000000,0.000000,-0.009780,-0.014493,0.040323
2020-08-17,0.005814,-0.018571,0.000000,-0.007627,-0.030000,-0.015235,-0.022727,-0.008371,0.000000,0.001704,0.000000,0.000000,-0.005634,-0.014815,-0.008824,-0.027132


In [None]:
pip install nasdaq-data-link