In [1]:
import numpy as np

In [2]:
import datetime

start = datetime.datetime(2020, 1, 1, 0, 1)
end = datetime.datetime(2022, 12, 31, 23, 59)

delta = datetime.timedelta(minutes=1)
dates_list = []
while start <= end:
    dates_list.append(start.strftime('%Y-%m-%d %H:%M:%S'))
    start += delta

In [None]:
import pandas as pd

def compute_match_percentage(csv_file, dates_list):
    df = pd.read_csv(csv_file)
    df['datetime'] = pd.to_datetime(df['datetime'])
    total_dates = len(dates_list)
    matched_dates = df['datetime'].isin(dates_list).sum()
    match_percentage = (matched_dates / total_dates) * 100
    return match_percentage


In [None]:
percentage = compute_match_percentage("intraday/AAPL_US_data.csv", dates_list)

In [None]:
print(percentage)

In [3]:
import datetime
import pandas as pd

def generate_baseline_dates(start_date, end_date):
    interval = datetime.timedelta(minutes=1)
    dates = []
    current_date = start_date

    while current_date <= end_date:
        # Check if the current date is a weekday and within trading hours
        if current_date.weekday() < 5 and current_date.hour >= 8 and current_date.hour < 22:
            dates.append(current_date)
        current_date += interval

    return dates

def calculate_completeness(csv_file=None, df=None):
    # Generate the list of baseline dates
    start_date = datetime.datetime(2020, 1, 1, 8, 0, 0)
    end_date = datetime.datetime(2022, 12, 31, 22, 0, 0)
    baseline_dates = generate_baseline_dates(start_date, end_date)

    # Calculate the percentage of completeness
    if csv_file:
        df = pd.read_csv(csv_file)
    elif df is None:
        raise ValueError("Either a CSV file or a DataFrame must be provided.")
    df["datetime"] = pd.to_datetime(df["datetime"])
    matching_dates = set(df["datetime"]).intersection(set(baseline_dates))
    percentage_complete = round(len(matching_dates) / len(baseline_dates) * 100, 2)
    
    print(f"The following data set {csv_file} is {percentage_complete}")


In [4]:
calculate_completeness("intraday/AAPL_US_data.csv")

The following data set intraday/AAPL_US_data.csv is 81.26


In [5]:
calculate_completeness("intraday/ABNB_US_data.csv")

The following data set intraday/ABNB_US_data.csv is 36.32


In [6]:
calculate_completeness("intraday/ACB_US_data.csv")

The following data set intraday/ACB_US_data.csv is 56.41


In [7]:
calculate_completeness("intraday/AMC_US_data.csv")

The following data set intraday/AMC_US_data.csv is 72.17


In [8]:
calculate_completeness("intraday/AMZN_US_data.csv")

The following data set intraday/AMZN_US_data.csv is 58.29


In [9]:
calculate_completeness("intraday/APHA_US_data.csv")

The following data set intraday/APHA_US_data.csv is 24.88


In [10]:
calculate_completeness("intraday/ARKK_US_data.csv")

The following data set intraday/ARKK_US_data.csv is 55.37


In [11]:
calculate_completeness("intraday/BABA_US_data.csv")

The following data set intraday/BABA_US_data.csv is 71.12


In [12]:
calculate_completeness("intraday/BB_US_data.csv")

The following data set intraday/BB_US_data.csv is 53.07


In [13]:
calculate_completeness("intraday/BILI_US_data.csv")

The following data set intraday/BILI_US_data.csv is 57.0


In [14]:
calculate_completeness("intraday/BITO_US_data.csv")

The following data set intraday/BITO_US_data.csv is 23.02


In [15]:
calculate_completeness("intraday/BNTX_US_data.csv")

The following data set intraday/BNTX_US_data.csv is 49.01


In [16]:
calculate_completeness("intraday/BYND_US_data.csv")

The following data set intraday/BYND_US_data.csv is 51.67


In [17]:
calculate_completeness("intraday/CGC_US_data.csv")

The following data set intraday/CGC_US_data.csv is 53.29


In [18]:
calculate_completeness("intraday/COIN_US_data.csv")

The following data set intraday/COIN_US_data.csv is 34.99


In [19]:
calculate_completeness("intraday/CRON_US_data.csv")

The following data set intraday/CRON_US_data.csv is 47.37


In [20]:
calculate_completeness("intraday/DKNG_US_data.csv")

The following data set intraday/DKNG_US_data.csv is 55.12


In [21]:
calculate_completeness("intraday/GDX_US_data.csv")

The following data set intraday/GDX_US_data.csv is 58.6


In [22]:
calculate_completeness("intraday/GDXJ_US_data.csv")

The following data set intraday/GDXJ_US_data.csv is 52.46


In [23]:
calculate_completeness("intraday/GLD_US_data.csv")

The following data set intraday/GLD_US_data.csv is 59.86


In [24]:
calculate_completeness("intraday/GME_US_data.csv")

The following data set intraday/GME_US_data.csv is 54.32


In [25]:
calculate_completeness("intraday/GOOGL_US_data.csv")

The following data set intraday/GOOGL_US_data.csv is 49.08


In [26]:
calculate_completeness("intraday/HOOD_US_data.csv")

The following data set intraday/HOOD_US_data.csv is 28.78


In [27]:
calculate_completeness("intraday/MRNA_US_data.csv")

The following data set intraday/MRNA_US_data.csv is 59.13


In [28]:
calculate_completeness("intraday/MSFT_US_data.csv")

The following data set intraday/MSFT_US_data.csv is 67.1


In [29]:
calculate_completeness("intraday/MSTR_US_data.csv")

The following data set intraday/MSTR_US_data.csv is 31.79


In [30]:
calculate_completeness("intraday/NFLX_US_data.csv")

The following data set intraday/NFLX_US_data.csv is 53.53


In [31]:
calculate_completeness("intraday/NIO_US_data.csv")

The following data set intraday/NIO_US_data.csv is 82.78


In [32]:
calculate_completeness("intraday/NOK_US_data.csv")

The following data set intraday/NOK_US_data.csv is 61.03


In [33]:
calculate_completeness("intraday/NVDA_US_data.csv")

The following data set intraday/NVDA_US_data.csv is 67.85


In [34]:
calculate_completeness("intraday/PENN_US_data.csv")

The following data set intraday/PENN_US_data.csv is 50.37


In [35]:
calculate_completeness("intraday/PFE_US_data.csv")

The following data set intraday/PFE_US_data.csv is 59.54


In [36]:
calculate_completeness("intraday/PYPL_US_data.csv")

The following data set intraday/PYPL_US_data.csv is 55.29


In [37]:
calculate_completeness("intraday/SLV_US_data.csv")

The following data set intraday/SLV_US_data.csv is 62.19


In [38]:
calculate_completeness("intraday/SPY_US_data.csv")

The following data set intraday/SPY_US_data.csv is 84.86


In [39]:
calculate_completeness("intraday/SQ_US_data.csv")

The following data set intraday/SQ_US_data.csv is 58.62


In [40]:
calculate_completeness("intraday/TLRY_US_data.csv")

The following data set intraday/TLRY_US_data.csv is 64.18


In [41]:
calculate_completeness("intraday/TSLA_US_data.csv")

The following data set intraday/TSLA_US_data.csv is 76.94


In [42]:
calculate_completeness("intraday/TWTR_US_data.csv")

The following data set intraday/TWTR_US_data.csv is 55.68


In [43]:
calculate_completeness("intraday/UBER_US_data.csv")

The following data set intraday/UBER_US_data.csv is 58.77


In [44]:
calculate_completeness("intraday/USO_US_data.csv")

The following data set intraday/USO_US_data.csv is 57.86


In [45]:
calculate_completeness("intraday/ZM_US_data.csv")

The following data set intraday/ZM_US_data.csv is 53.02
