In [18]:
import pandas as pd
import numpy as np

## Loading Datasets

In [24]:
labels = ['Lagged', 'MA', 'WMA', 'MA-Lagged', 'WMA-Lagged']

def load_datasets():
    datasets = dict()
    for lb in labels:
        new_df = pd.read_excel(f"River-Data-{lb}.xlsx")
        new_df.drop(["Unnamed: 0"], axis=1, inplace=True)
        datasets[lb] = new_df
    
    return datasets

data = load_datasets()

In [25]:
for l in labels:
    print(l, data[l].shape)

Lagged (1448, 26)
MA (1445, 42)
WMA (1451, 42)
MA-Lagged (1444, 42)
WMA-Lagged (1450, 42)


## Building Feature Sets

In [135]:
def build_feature_set(*datasets):
    assert len(datasets) > 0, "No data sets entered"
    datasets = list(datasets)
    min_rows = min(d.shape[0] for d in datasets)
    
    for i, ds in enumerate(datasets):
        datasets[i] = ds.truncate(before=ds.shape[0]-min_rows).reset_index()
        datasets[i].drop(["index"], axis=1, inplace=True)
        
    merged_df = datasets[0].iloc[:, :2]
    for ds in datasets:
        merged_df = pd.concat([merged_df, ds.iloc[:, 2:]], axis=1)
    
    merged_cols = list(merged_df.columns)
    selected_cols = []
    
    for i in range(0, len(merged_cols), 2):
        format_str = f"{i+1}) {merged_cols[i]}"
        if i != len(merged_cols) - 1:
            second_part = f"{i+2}) {merged_cols[i+1]}"
            num_spaces = 50 - len(format_str)
            format_str += num_spaces*" " + second_part
        print(format_str)
    
    selected_indices = input("\nSelect columns: ")
    for index in selected_indices.split(","):
        if "-" in index:
            first_i, second_i = index.split("-")
            selected_cols += merged_cols[int(first_i) - 1: int(second_i)]
        else:
            selected_cols.append(merged_cols[int(index) - 1])
    
    return merged_df[selected_cols]
        
    
fs = build_feature_set(data['WMA-Lagged'], data['WMA'], data['Lagged'], data['MA'], data['MA-Lagged'])

1) Date                                           2) Skelton MDF (Cumecs)
3) Crakehill MDF (WMA3) (t-1)                     4) Skip Bridge MDF (WMA3) (t-1)
5) Westwick MDF (WMA3) (t-1)                      6) Skelton MDF (WMA3) (t-1)
7) Crakehill MDF (WMA4) (t-1)                     8) Skip Bridge MDF (WMA4) (t-1)
9) Westwick MDF (WMA4) (t-1)                      10) Skelton MDF (WMA4) (t-1)
11) Crakehill MDF (WMA5) (t-1)                    12) Skip Bridge MDF (WMA5) (t-1)
13) Westwick MDF (WMA5) (t-1)                     14) Skelton MDF (WMA5) (t-1)
15) Crakehill MDF (WMA6) (t-1)                    16) Skip Bridge MDF (WMA6) (t-1)
17) Westwick MDF (WMA6) (t-1)                     18) Skelton MDF (WMA6) (t-1)
19) Crakehill MDF (WMA7) (t-1)                    20) Skip Bridge MDF (WMA7) (t-1)
21) Westwick MDF (WMA7) (t-1)                     22) Skelton MDF (WMA7) (t-1)
23) Arkengarthdale DRT (WMA3) (t-1)               24) East Cowton DRT (WMA3) (t-1)
25) Malham Tarn DRT (WMA3) (t-1)    

In [136]:
fs

Unnamed: 0,Date,Skelton MDF (Cumecs),Crakehill MDF (WMA3) (t-1),Skip Bridge MDF (WMA3) (t-1),Westwick MDF (WMA3) (t-1),Skelton MDF (WMA3) (t-1),Arkengarthdale DRT (t-2),East Cowton DRT (t-2),Malham Tarn DRT (t-2),Snaizeholme DRT (t-2)
0,1993-01-08,56.66,22.256850,8.248055,28.248591,63.880787,0.0,0.0,1.6,0.8
1,1993-01-09,78.10,22.127922,7.755102,28.920420,60.256235,5.6,4.0,17.6,36.0
2,1993-01-10,125.70,28.827045,7.954440,44.617368,69.195577,1.6,0.0,1.6,2.4
3,1993-01-11,195.90,39.924360,10.617821,69.308296,97.475406,14.4,0.8,55.2,104.8
4,1993-01-12,125.40,52.718427,18.093061,69.405696,146.711744,20.8,2.4,76.0,136.8
...,...,...,...,...,...,...,...,...,...,...
1439,1996-12-27,29.52,14.392193,9.506391,15.440107,43.046081,0.0,3.2,0.8,1.6
1440,1996-12-28,28.67,12.646096,7.613695,13.499053,36.283041,0.0,0.8,0.0,0.0
1441,1996-12-29,29.31,11.873048,6.549848,12.455027,32.476520,1.6,14.4,8.8,3.2
1442,1996-12-30,34.28,11.986524,5.939424,12.118013,30.893260,11.2,11.2,4.8,4.8
