In [78]:
import pandas as pd

import numpy as np

def generate_random_timeseries_data(proportion_to_delete=0.2):

    # Create a date range for the year 2020,  + 20 business days 

    date_range = pd.date_range(start='2020-01-01', end='2021-01-20', freq='B')  # 'B' for business day frequency


    # Generate random data for each business day

    random_data = np.random.rand(len(date_range))
    

    # Create a DataFrame with the date and the generated random data

    timeseries_data = pd.DataFrame({'Date': date_range, 'Value': random_data})
    

    # Randomly delete some observations based on the given proportion

    num_observations_to_delete = int(proportion_to_delete * len(timeseries_data))

    indices_to_delete = np.random.choice(timeseries_data.index, num_observations_to_delete, replace=False)

    timeseries_data.drop(indices_to_delete, inplace=True)

    # Find the missing indices
    missing_indices = timeseries_data[timeseries_data['Value'].isnull()].index

    # Replace the missing values with the nearest data
    for idx in missing_indices:
        distances = np.abs(timeseries_data.index - idx)
        nearest_idx = distances.argmin()
        nearest_value = timeseries_data.loc[nearest_idx, 'Value']
        timeseries_data.loc[idx, 'Value'] = nearest_value

    return timeseries_data

#storing the timeseries_data in input_data
input_data = generate_random_timeseries_data()
print(input_data)

          Date     Value
0   2020-01-01  0.886238
1   2020-01-02  0.155864
2   2020-01-03  0.834128
3   2020-01-06  0.155463
4   2020-01-07  0.156131
..         ...       ...
270 2021-01-13  0.296578
271 2021-01-14  0.272251
272 2021-01-15  0.009493
273 2021-01-18  0.687170
275 2021-01-20  0.993973

[221 rows x 2 columns]


In [79]:
def find_nearest_date(target_date, date_list):
   
    nearest_date = min(date_list, key=lambda x: abs((x - target_date).days))
    return nearest_date


def calculate_10_business_day_returns(timeseries_data):
    # Filter data for the period [2020-01-01, 2020-12-31]
    start_date = pd.to_datetime('2020-01-01')
    end_date = pd.to_datetime('2020-12-31')
    filtered_data = timeseries_data[(timeseries_data['Date'] >= start_date) & (timeseries_data['Date'] <= end_date)]

    # Calculate the 10-business-days returns
    returns_series = filtered_data['Value'].pct_change(periods=10)

    # Find the missing indices
    missing_indices = returns_series[returns_series.isnull()].index

    # Replace the missing values with the nearest non-missing data
    for idx in missing_indices:
        target_date = filtered_data.loc[idx, 'Date']
        nearest_date = find_nearest_candidate(target_date, filtered_data['Date'])
        nearest_value = filtered_data[filtered_data['Date'] == nearest_date]['Value'].iloc[0]
        returns_series.loc[idx] = nearest_value

    # Combine 'Date' and 'Returns Series' into a new DataFrame
    returns_df = pd.DataFrame({'Date': filtered_data['Date'].iloc[10:], '10-Day-Returns': returns_series.iloc[10:]})

    return returns_df


def main():
    # Generating the timeseries_data
    input_data = generate_random_timeseries_data(proportion_to_delete=0.2)

    # Call the calculate_10_business_day_returns function
    returns_series = calculate_10_business_day_returns(input_data)
    
    print(returns_series)
    
    
main()

          Date  10-Day-Returns
13  2020-01-20       -0.733025
14  2020-01-21        2.499334
15  2020-01-22        0.096764
16  2020-01-23        2.261061
17  2020-01-24       -0.085241
..         ...             ...
255 2020-12-23       -0.156625
258 2020-12-28       -0.891606
259 2020-12-29        0.216521
260 2020-12-30       -0.387266
261 2020-12-31       -0.816859

[202 rows x 2 columns]
