In [1]:
import numpy as np
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import scipy.optimize as optimization
import pandas_datareader as pdr

In [2]:
import datetime as dt

In [3]:
class PasteMissingRows:
    
    def __init__(self, ticker, start_date, end_date):
        
        self.df = None
        self.dates_check = None
        self.df_result = None
        self.ticker = ticker
        self.start_date = start_date
        self.end_date = end_date

          
    def Download_df(self):
        
        # Download data from yahoo
        stock_data = pdr.get_data_yahoo([self.ticker], start = self.start_date, end = self.end_date)['Adj Close']
        
        df = pd.DataFrame(stock_data)
        # Make copy and reset index
        df.reset_index(inplace=True)
        
        return df
            
        
    def Create_df_with_all_dates(self):
        
        df = self.Download_df()

        # Range of dates
        dates_check = pd.DataFrame({"dates":pd.date_range(start = self.start_date, end = self.end_date)})

        return dates_check   
    
    # Function to insert row in the dataframe
    def Insert_value(self,row_number, df, row_value):

        # Slice above rows
        df1 = df[0:row_number]

        # Store below rows
        df2 = df[row_number:]

        # Insert the row in the upper half dataframe
        df1.loc[row_number] = row_value

        # Concat the two dataframes
        df_result = pd.concat([df1, df2])

        # Reassign the index labels
        df_result.index = [*range(df_result.shape[0])]
        
        return df_result

    # Function that detect mising rows in df and add previous values where missing
    def Insert_row_to_df(self):
        
        dates_check = self.Create_df_with_all_dates()
        df = self.Download_df()
        
        if len(df) < len(dates_check):
        
            for i in range(1, len(dates_check)):

                # if diff between next days is > 1
                if (df['Date'].loc[i] - df['Date'].loc[i-1]).days > 1:

                    # first [0]+1 = 1
                    row_number = i
                    row_value = [dates_check['dates'].iloc[i-1], df[self.ticker].iloc[i-1]]

                    # Let's call the function and insert the row
                    df = self.Insert_value(row_number, df, row_value)
                    df = df.sort_values(by="Date")

                else:
                    df = df.sort_values(by="Date")
                    
            df.drop_duplicates(subset=None, keep='first', inplace=True)
                    
        else:
            pass
            df = df[:-1]
  
    
        return df

In [4]:
# I choese period and stocks based on calculations from "ChoeseStockAndPerio"
# In final notebook I will compare all functions together
start = '2014-09-17'
end = '2022-04-20'

start = pd.Timestamp(start)
end = pd.Timestamp(end)

tickers = ['BTC-USD', 'CSPX.L', 'IEF', 'FILL', 'PALL']

In [5]:
def createPrepDataFrame(ticker):    
    
    data = pd.DataFrame()

    # loop by tickers
    for ticker in tickers:

        # Use function to insert missing rows for each ticker
        dt = PasteMissingRows(ticker=ticker, start_date=start, end_date=end).Insert_row_to_df()

        # Set index for proper concating
        dt.set_index('Date', inplace=True)

        # Add column named as ticker with values from function
        data[ticker] = dt
    
    return data

In [6]:
df = createPrepDataFrame(tickers)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [7]:
df.to_csv('DATA/PrepData.csv', index=True)

In [14]:
dt = PasteMissingRows(ticker=tickers[0], start_date=start, end_date=end).Insert_row_to_df()

In [17]:
dt[tickers[0]]

0       181.830002
1       182.869995
2       183.274994
4       183.274994
5       183.274994
           ...    
2769    453.880005
2770    453.880005
2771    453.880005
2772    455.679993
2773    457.929993
Name: CSPX.L, Length: 2773, dtype: float64

In [23]:
x[tickers[0]] = dt[tickers[0]]

In [27]:
x

Unnamed: 0,Date,CSPX.L,BTC-USD,IEF,dates
0,2014-09-17,181.830002,,,2014-09-17
1,2014-09-18,182.869995,,,2014-09-18
2,2014-09-19,183.274994,,,2014-09-19
4,2014-09-20,183.274994,,,2014-09-20
5,2014-09-21,183.274994,,,2014-09-21
...,...,...,...,...,...
2769,2022-04-16,453.880005,,,2022-04-16
2770,2022-04-17,453.880005,,,2022-04-17
2771,2022-04-18,453.880005,,,2022-04-18
2772,2022-04-19,455.679993,,,2022-04-19


In [56]:
data = pd.DataFrame({"dates":pd.date_range(start = start, end = end)})
for ticker in tickers:
    df = PasteMissingRows(ticker=ticker, start_date=start, end_date=end).Insert_row_to_df()
    df_ticker = df[ticker]
    data.append(df_ticker)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


Index has no missing dates


In [58]:
df_ticker

0        90.402336
1        90.340668
2        90.684189
4        90.684189
5        90.684189
           ...    
2769    103.440002
2770    103.440002
2771    103.139999
2772    102.459999
2773    103.260002
Name: IEF, Length: 2773, dtype: float64