# Experimental Designs Example
experimental_designs.py is a module to create statistical experimental designs. Each design is started with a nested dictionary and outputed in a list of dictionaries format that is equivalent to a flat table where the column names are factor names, the rows are a realization of a test and the values are test values or factor levels.

## Imports

In [1]:
import sys
import os
import pandas as pd
import experimental_designs

ModuleNotFoundError: No module named 'pandas'

## 2x2 Factorial Example
A very simple design would be a 2 factors with two levels. Here we use + and - as factor levels and -1 and 1 as factor values.

In [27]:
design = {}
design["Factor_1"] = {"-":-1,"+":1}
design["Factor_2"] = {"-":-1,"+":1}

In [28]:
test_conditions = experimental_designs.fully_factorial(design)

In [29]:
test_conditions

[{'Factor_1': 1, 'Factor_2': -1},
 {'Factor_1': -1, 'Factor_2': 1},
 {'Factor_1': 1, 'Factor_2': 1},
 {'Factor_1': -1, 'Factor_2': -1}]

In [30]:
# to format and manipulate the test_conditions transforming it to a pandas dataframe is easy
test_conditions_df = pd.DataFrame(test_conditions)

In [31]:
test_conditions_df

Unnamed: 0,Factor_1,Factor_2
0,1,-1
1,-1,1
2,1,1
3,-1,-1


### Optional parameters

In [7]:
# there are several optional parameters that determine if test_condtions is randomized,
# if the level or values are returned, a reproducible random seed
# this is not randomized
test_conditions_options = experimental_designs.fully_factorial(design_dictionary= design, 
                                          randomized= False, 
                                          run_values= "values",
                                          random_seed= 42)
test_conditions_options_df = pd.DataFrame(test_conditions_options)
test_conditions_options_df

Unnamed: 0,Factor_1,Factor_2
0,-1,-1
1,-1,1
2,1,-1
3,1,1


In [8]:
# this is factor levels instead of values
test_conditions_options = experimental_designs.fully_factorial(design_dictionary= design, 
                                          randomized= False, 
                                          run_values= "keys",
                                          random_seed= 42)
test_conditions_options_df = pd.DataFrame(test_conditions_options)
test_conditions_options_df

Unnamed: 0,Factor_1,Factor_2
0,-,-
1,-,+
2,+,-
3,+,+


In [9]:
# this is randomized with a different randoms seed
test_conditions_options = experimental_designs.fully_factorial(design_dictionary= design, 
                                          randomized= True, 
                                          run_values= "values",
                                          random_seed= 41)
test_conditions_options_df = pd.DataFrame(test_conditions_options)
test_conditions_options_df

Unnamed: 0,Factor_1,Factor_2
0,1,-1
1,-1,-1
2,-1,1
3,1,1


In [10]:
# it will give you the same shuffled test_conditions 
test_conditions_options = experimental_designs.fully_factorial(design_dictionary= design, 
                                          randomized= True, 
                                          run_values= "values",
                                          random_seed= 41)
test_conditions_options_df = pd.DataFrame(test_conditions_options)
test_conditions_options_df

Unnamed: 0,Factor_1,Factor_2
0,1,-1
1,-1,-1
2,-1,1
3,1,1


In [11]:
# it will give you a reproducible shuffle based on the random seed
test_conditions_options = experimental_designs.fully_factorial(design_dictionary= design, 
                                          randomized= True, 
                                          run_values= "values",
                                          random_seed= 42)
test_conditions_options_df = pd.DataFrame(test_conditions_options)
test_conditions_options_df

Unnamed: 0,Factor_1,Factor_2
0,1,-1
1,-1,1
2,1,1
3,-1,-1


## Different experimental designs

### Fully factorial with a default

In [12]:
n_factors = 3
design = {f"F{i}":{"-":-1,"+":1} for i in range(1,n_factors+1)}
default = {f"F{i}":{0:"Default"} for i in range(1,n_factors+1)}

In [13]:
default_design = experimental_designs.fully_factorial_default(design_dictionary=design,default_state=default,
                                                              randomized= False,run_values="values")
dd_df = pd.DataFrame(default_design)
dd_df

Unnamed: 0,F1,F2,F3
0,Default,Default,Default
1,-1,-1,-1
2,-1,-1,1
3,Default,Default,Default
4,-1,1,-1
5,-1,1,1
6,Default,Default,Default
7,1,-1,-1
8,1,-1,1
9,Default,Default,Default


### Split plot

In [14]:
# A general design of 2x2 and 3x2
n_wp_factors = 2
wp = {f"WPF{i}":{"-":-1,"+":1} for i in range(1,n_wp_factors+1)}
n_sp_factors = 3
sp = {f"SPF{i}":{"-":-1,"+":1} for i in range(1,n_sp_factors+1)}

In [15]:
split_plot_design = experimental_designs.fully_factorial_split_plot(wp,sp,randomized= False,run_values="keys")
sp_df = pd.DataFrame(split_plot_design)
sp_df

Unnamed: 0,WPF1,WPF2,SPF1,SPF2,SPF3
0,-,-,-,-,-
1,-,-,-,-,+
2,-,-,-,+,-
3,-,-,-,+,+
4,-,-,+,-,-
5,-,-,+,-,+
6,-,-,+,+,-
7,-,-,+,+,+
8,-,+,-,-,-
9,-,+,-,-,+


### Split plot with a default
It expects the default to be whole plot only with the same split plot as the main desisng

In [16]:
# A general design of 2x2 and 3x2
n_wp_factors = 2
wp = {f"WPF{i}":{"-":-1,"+":1} for i in range(1,n_wp_factors+1)}
n_sp_factors = 3
sp = {f"SPF{i}":{"-":-1,"+":1} for i in range(1,n_sp_factors+1)}
default_state = {f"WPF{i}":{0:"default"} for i in range(1,n_wp_factors+1)}

In [17]:
split_plot_design_default = experimental_designs.fully_factorial_split_plot_default(wp,
                                                                                   sp,
                                                                                   default_state,
                                                                                   randomized= False,
                                                                                   run_values="keys")
sp_d_df = pd.DataFrame(split_plot_design_default)
sp_d_df

Unnamed: 0,WPF1,WPF2,SPF1,SPF2,SPF3
0,0,0,-,-,-
1,0,0,-,-,+
2,0,0,-,+,-
3,0,0,-,+,+
4,0,0,+,-,-
5,0,0,+,-,+
6,0,0,+,+,-
7,0,0,+,+,+
8,-,-,-,-,-
9,-,-,-,-,+


## 2x3 example with a conditional factor 
A common design is having 3 factors with 2 levels each, with the third factor confounded by the first two such that Factor_3 = Factor_1 * Factor_2. Here I have done it with the pandas dataframe, however you can do it in the list of dictionaries like this:

```python
for test_condtion in test_conditions:
    temp = dict(test_condition)
    new_column ={"Factor_3":temp["Factor_1"]*temp["Factor_2"]}
    test_condition.update(new_column)
    ```

In [18]:
design = {}
design["Factor_1"] = {"-":-1,"+":1}
design["Factor_2"] = {"-":-1,"+":1}

In [19]:
# it will give you a reproducible shuffle based on the random seed
test_conditions_options = experimental_designs.fully_factorial(design_dictionary= design, 
                                          randomized= True, 
                                          run_values= "values",
                                          random_seed= 42)
test_conditions_options_df = pd.DataFrame(test_conditions_options)
test_conditions_options_df["Factor_3"] = test_conditions_options_df.apply(lambda row: row.Factor_1 * row.Factor_2,
                                                                                   axis=1)
test_conditions_options_df

Unnamed: 0,Factor_1,Factor_2,Factor_3
0,1,-1,-1
1,-1,1,-1
2,1,1,1
3,-1,-1,1


## AMT design as an example (4 factors, 4 Levels x 4Levels X 7Levels X 11Levels)
The experiment also has the conditions ('Modulation Type' = "PCM/FM","Data Rate" = 20) and ('Modulation Type'= "ARTM-CPM","Data Rate" = 1) excluded. The experiment outlined in the report is actually a split plot design.

In [32]:
amt_main = {"Modulation Type":{0:"PCM/FM",1:"SOQPSK", 2:"SOQPSK-FEC", 3:"ARTM-CPM"},
       "Data Rate":{0:1, 1:5, 2:10, 3:20},
       "ABE Type":{0:'None', 1:'AWGN 20', 2:'AWGN 18', 3:'AWGN 16.5',4:'Multiple UE 1',5:'Multiple UE 2',6:'Single UE 1'},
       "AMT Signal Level":{0:0,1:1,2:2,3:3,4:4,5:5,6:6,7:7,8:8,9:9,10:10}}
exclusions = [{'Modulation Type':"PCM/FM","Data Rate":20},{'Modulation Type':"ARTM-CPM","Data Rate":1}]

In [33]:
test_conditions= experimental_designs.fully_factorial(amt_main,randomized = False)
test_conditions= experimental_designs.filter_rows(test_conditions,exclusions)
test_conditions_df= pd.DataFrame(test_conditions) 
test_conditions_df

Unnamed: 0,Modulation Type,Data Rate,ABE Type,AMT Signal Level
0,PCM/FM,1,,0
1,PCM/FM,1,,1
2,PCM/FM,1,,2
3,PCM/FM,1,,3
4,PCM/FM,1,,4
...,...,...,...,...
1073,ARTM-CPM,20,Single UE 1,6
1074,ARTM-CPM,20,Single UE 1,7
1075,ARTM-CPM,20,Single UE 1,8
1076,ARTM-CPM,20,Single UE 1,9


## Closed loop power control study as an example
The closed loop power control study was an interleaved whole plot / split plot design. It has two experiments conducted at the same time with different whole and split plot conditions. 

In [34]:
clpc_modelling_sp = {"Additional Loss (dB)":{1:5, 2:10, 3:15, 4:20, 5:25, 6:30, 7:35, 8:40},
                     "Crosstalk":{1:"HIGH",2:"LOW"},
                     "Offered Load":{1:.1, 2:.2, 3:.4}}
clpc_modelling_wp = {"UL Scheduling":{1:"Channel Aware",2:"Interference Aware"},
                     "P0":{1:-80, 2:-85, 3:-90, 4:-95, 5:-100, 6:-105},
                     "alpha":{1:.8, 2:1.0}}

clpc_monitoring_wp ={"UL Scheduling":{-1:"Channel Unaware"},
                     "P0":{-1:-85},
                     "alpha":{-1:.8}}

clpc_monitoring_sp = {"Additional Loss (dB)":{1:0,2:5,3:10,4:15,5:20,6:25,7:30,8:35,9:37.5,10:40,11:42.5,12:45},
                     "Crosstalk":{1:"HIGH",2:"LOW"},
                     "Offered Load":{1:.1, 2:.2, 3:.4}}

In [35]:
clpc_design = experimental_designs.fully_factorial_split_plot_interleaved(whole_plot_design_dictionary = clpc_modelling_wp,
                                                                          split_plot_design_dictionary = clpc_modelling_sp,
                                                                          whole_plot_design_dictionary_interleaved = clpc_monitoring_wp,
                                                                          split_plot_design_dictionary_interleaved = clpc_monitoring_sp,
                                                                          interleave_modulo = 4,
                                                                          randomized = True,
                                                                          run_values = 'values')

In [36]:
clpc_design_df = pd.DataFrame(clpc_design)

In [37]:
clpc_design_df

Unnamed: 0,UL Scheduling,P0,alpha,Additional Loss (dB),Crosstalk,Offered Load
0,Channel Unaware,-85,0.8,42.5,HIGH,0.1
1,Channel Unaware,-85,0.8,20.0,HIGH,0.1
2,Channel Unaware,-85,0.8,45.0,LOW,0.1
3,Channel Unaware,-85,0.8,45.0,LOW,0.4
4,Channel Unaware,-85,0.8,40.0,HIGH,0.1
...,...,...,...,...,...,...
1579,Interference Aware,-100,0.8,15.0,HIGH,0.4
1580,Interference Aware,-100,0.8,30.0,HIGH,0.4
1581,Interference Aware,-100,0.8,40.0,LOW,0.2
1582,Interference Aware,-100,0.8,20.0,HIGH,0.1


In [38]:
clpc_design_df[0:2]

Unnamed: 0,UL Scheduling,P0,alpha,Additional Loss (dB),Crosstalk,Offered Load
0,Channel Unaware,-85,0.8,42.5,HIGH,0.1
1,Channel Unaware,-85,0.8,20.0,HIGH,0.1
