In [41]:
import pandas as pd
import numpy as np

from scipy.stats import rankdata
from scipy.stats import ks_2samp

In [55]:
path_reinforce_class = "../data/results/REINFORCE_class.csv"
path_reinforce_method = "../data/results/REINFORCE_method.csv"
path_sfs_class = "../data/results/SFS_class.csv"
path_sfs_method = "../data/results/SFS_method.csv"

In [56]:
df_reinforce_class = pd.read_csv(path_reinforce_class)
df_reinforce_method = pd.read_csv(path_reinforce_method)
df_sfs_class =pd.read_csv(path_sfs_class)
df_sfs_method = pd.read_csv(path_sfs_method)

df_reinforce_class

Unnamed: 0,number_of_features,min_error,max_error,avg_error
0,0,1.0,1.0,1.0
1,1,0.2571245,0.3630284,0.2681062
2,2,0.09754496,0.1328978,0.1256821
3,3,0.04319395,0.06600571,0.05463213
4,4,0.01850169,0.01850169,0.01850169
5,5,0.007790031,0.01058666,0.008076199
6,6,0.003192803,0.003677137,0.00358027
7,7,0.0006633146,0.0006633146,0.0006633146
8,8,0.0002659712,0.0002659712,0.0002659712
9,9,9.46305e-05,0.0001016169,9.742507e-05


### Class

In [77]:
# Combine data
reinforce = list(df_reinforce_class['avg_error'][1:])
sfs = list(df_sfs_class['error'][1:])
combined = sfs + reinforce

# Rank the data
ranks = rankdata(combined)

# Split the ranks back into groups
reinforce_ranks = ranks[:len(sfs)]
sfs_ranks = ranks[len(sfs):]

print("SFS Ranks:", sfs_ranks)
print("REINFORCE Ranks:", reinforce_ranks)

SFS Ranks: [42.  40.  38.  35.5 34.  32.  29.5 27.5 26.  23.  22.  20.  18.  15.5
 14.  12.  10.   8.   6.   4.   1.5]
REINFORCE Ranks: [41.  39.  37.  35.5 33.  31.  29.5 27.5 25.  24.  21.  19.  17.  15.5
 13.  11.   9.   7.   5.   3.   1.5]


In [78]:
R_SFS = np.sum(sfs_ranks)
R_REINFORCE = np.sum(reinforce_ranks)

print(f"Sum of SFS Ranks: {R_SFS}")
print(f"Sum of REINFORCE Ranks: {R_REINFORCE}")

Sum of SFS Ranks: 458.5
Sum of REINFORCE Ranks: 444.5


In [79]:
n1 = len(sfs_ranks)
n2 = len(reinforce_ranks)

U_SFS = n1 * n2 + (n1 * (n1 + 1)) / 2 - R_SFS
U_REINFORCE = n1 * n2 - U_SFS

print(f"U statistic for SFS: {U_SFS}")
print(f"U statistic for REINFORCE: {U_REINFORCE}")


U statistic for SFS: 213.5
U statistic for REINFORCE: 227.5


In [80]:
from scipy.stats import mannwhitneyu

# Perform the Mann-Whitney U test
stat, p_value = mannwhitneyu(sfs_ranks, reinforce_ranks, alternative='two-sided')

print(f"Mann-Whitney U statistic: {stat}")
print(f"P-value: {p_value}")


Mann-Whitney U statistic: 227.5
P-value: 0.8700889948173781


In [81]:
ALPHA = 0.05

### 4. Report the Results
   Once you have the \(U\)-statistics and \(p\)-value:
   - Summarize the results in a table.
   - Mention whether the null hypothesis was rejected or not.

For example:

| Method        | Sum of Ranks (\(R\)) | \(U\)-Statistic | \(p\)-Value | Conclusion                        |
|---------------|-----------------------|-----------------|-------------|----------------------------------|
| SFS           | [Calculated Value]   | [Calculated Value] | [Calculated Value] | No significant difference if \(p \geq 0.05\) |
| REINFORCE     | [Calculated Value]   | [Calculated Value] | [Calculated Value] | Same conclusion as above |

Let me know if you encounter any issues or need further clarification!

### Method

In [73]:
# Combine data
reinforce = list(df_reinforce_method['avg_error'][1:])
sfs = list(df_sfs_method['error'][1:])
combined = sfs + reinforce

# Rank the data
ranks = rankdata(combined)

# Split the ranks back into groups
reinforce_ranks = ranks[:len(sfs)]
sfs_ranks = ranks[len(sfs):]

print("SFS Ranks:", sfs_ranks)
print("REINFORCE Ranks:", reinforce_ranks)

SFS Ranks: [41.5 40.  38.  36.  35.  33.  31.  29.  27.  25.  24.  23.  21.  19.
 17.  12.  10.   9.   6.   4.   1.5]
REINFORCE Ranks: [41.5 39.  37.  34.  32.  30.  28.  26.  22.  20.  18.  16.  15.  14.
 13.  11.   8.   7.   5.   3.   1.5]


In [74]:
R_SFS = np.sum(sfs_ranks)
R_REINFORCE = np.sum(reinforce_ranks)

print(f"Sum of SFS Ranks: {R_SFS}")
print(f"Sum of REINFORCE Ranks: {R_REINFORCE}")

Sum of SFS Ranks: 482.0
Sum of REINFORCE Ranks: 421.0


In [75]:
n1 = len(sfs_ranks)
n2 = len(reinforce_ranks)

U_SFS = n1 * n2 + (n1 * (n1 + 1)) / 2 - R_SFS
U_REINFORCE = n1 * n2 - U_SFS

print(f"U statistic for SFS: {U_SFS}")
print(f"U statistic for REINFORCE: {U_REINFORCE}")


U statistic for SFS: 190.0
U statistic for REINFORCE: 251.0


In [76]:
from scipy.stats import mannwhitneyu

# Perform the Mann-Whitney U test
stat, p_value = mannwhitneyu(sfs_ranks, reinforce_ranks, alternative='two-sided')

print(f"Mann-Whitney U statistic: {stat}")
print(f"P-value: {p_value}")


Mann-Whitney U statistic: 251.0
P-value: 0.4504089831168169
