In [39]:
import pandas as pd
import numpy as np

In [71]:
df_original = pd.read_csv("cd32a_00000_log_train_run_PPO_4_edges_action_clipping.csv", sep=";")

In [72]:
df_original.reward.unique()

array([-10, 100,  -1])

In [73]:
#count how many times each value in reward is in df:
df_original.reward.value_counts()


 100    1490834
-1        65282
-10       16781
Name: reward, dtype: int64

In [74]:
# 1_572_897
number_steps = 1490834 + 65282 + 16781
print(number_steps)

length_df = len(df_original)
print(length_df)
# Halleluja, counts match!

1572897
1572897


In [80]:
n_100 = df_original.reward.value_counts()[100]
n_minus1 = df_original.reward.value_counts()[-1]
n_minus10 = df_original.reward.value_counts()[-10]

print(round(n_100/number_steps*100, 2))
print(round(n_minus1/number_steps*100, 2))
print(round(n_minus10/number_steps*100, 2))

94.78
4.15
1.07


### Result 1:
* within approx. 1,600 steps  
    * about 95% of steps lead to a solved problem
    * about 1% lead to a loop in the board
    * about 4% lead to a wrong value as a result, reached the maximum episode length or any other error

In [43]:
# Inspecting each group of reward values:
# reward = 100

# Succesfully solved problems:
df_100 = df_original[df_original['reward']==100]
df_solved = df_100['problem_id'].value_counts() # count how many times each index is in df_100
df_solved = df_solved.rename_axis('problem_id').reset_index(name='count_100') # rename columns
df = pd.merge(df_original, df_solved, on='problem_id', how='left') # merge with original df
df.head()

Unnamed: 0,problem_id,reward,config,count_100
0,20349,-10,[9 6 30 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 ...,32
1,12731,-10,[6 7 12 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 ...,40
2,20533,-10,[5 7 56 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 ...,23
3,41315,-10,[6 9 24 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 ...,33
4,22524,-10,[6 5 8 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1 0 0 0 0...,30


In [45]:
# reward = -1 (error, no solution, max. steps reached, etc.)

df_minus1 = df_original[df_original['reward']==-1]
df_error = df_minus1['problem_id'].value_counts()
df_error = df_error.rename_axis('problem_id').reset_index(name='count_minus1')
df = pd.merge(df, df_error, on='problem_id', how='left')
df.head()

Unnamed: 0,problem_id,reward,config,count_100,count_minus1
0,20349,-10,[9 6 30 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 ...,32,2.0
1,12731,-10,[6 7 12 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 ...,40,1.0
2,20533,-10,[5 7 56 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 ...,23,
3,41315,-10,[6 9 24 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 ...,33,2.0
4,22524,-10,[6 5 8 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1 0 0 0 0...,30,


In [47]:
# reward = -10 (loop)

df_minus10 = df_original[df_original['reward']==-10]
df_loop = df_minus10['problem_id'].value_counts()
df_loop = df_loop.rename_axis('problem_id').reset_index(name='count_minus10')
df = pd.merge(df, df_loop, on='problem_id', how='left')
df.head()

Unnamed: 0,problem_id,reward,config,count_100,count_minus1,count_minus10
0,20349,-10,[9 6 30 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 ...,32,2.0,1.0
1,12731,-10,[6 7 12 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 ...,40,1.0,2.0
2,20533,-10,[5 7 56 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 ...,23,,2.0
3,41315,-10,[6 9 24 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 ...,33,2.0,1.0
4,22524,-10,[6 5 8 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1 0 0 0 0...,30,,2.0


In [84]:
# check if any problem was never solved
df['unsolved'] = np.where(df['count_100'].isnull(), 1, 0)
df.unsolved.describe()

In [89]:
# check how many times a problem was not solved
df['count_error'] = df['count_minus1'] + df['count_minus10']
df.count_error.describe()

count    1.572897e+06
mean     1.953636e+00
std      1.534249e+00
min      0.000000e+00
25%      1.000000e+00
50%      2.000000e+00
75%      3.000000e+00
max      1.000000e+01
Name: count_error, dtype: float64

In [48]:
# replace NaN with 0
df = df.fillna(0)

In [82]:
# change type in column count_minus10 from float to int
df['count_minus10'] = df['count_minus10'].astype(int)
df['count_minus1'] = df['count_minus1'].astype(int)
df.head()

Unnamed: 0,problem_id,reward,config,count_100,count_minus1,count_minus10,count_sum,ratio
26125,42467,-1,[7 9 43 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 ...,9,6,0,15,0.428571
26123,42467,-1,[7 9 43 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 ...,9,6,0,15,0.428571
1434202,42467,100,[7 9 43 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 ...,9,6,0,15,0.428571
558405,10149,100,[5 9 36 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 ...,6,4,0,10,0.428571
263762,42467,-1,[7 9 43 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 ...,9,6,0,15,0.428571


In [83]:
print('min 100:', df.count_100.min())
print('max 100:', df.count_100.max())


min 100: 3
max 100: 79


In [51]:
df.count_minus1.describe()

count    1.572897e+06
mean     1.551748e+00
std      1.361691e+00
min      0.000000e+00
25%      1.000000e+00
50%      1.000000e+00
75%      2.000000e+00
max      1.000000e+01
Name: count_minus1, dtype: float64

In [52]:
df.count_minus10.describe()

count    1.572897e+06
mean     4.018877e-01
std      6.469249e-01
min      0.000000e+00
25%      0.000000e+00
50%      0.000000e+00
75%      1.000000e+00
max      5.000000e+00
Name: count_minus10, dtype: float64

### Result 2:
* solved problems:
    * each problem (successfully) solved at least 3 times, max. 79 tines
* loops:
    * some problems never lead to a loop
    * max. 5 times a problem lead to a loop
* other errors:
    * max. 10 times for unsolved problems
* overall errors:
    * no problem was unsolved more than 10 times

In [53]:
df['count_sum'] = df['count_100'] + df['count_minus1'] + df['count_minus10']

In [54]:
df['ratio'] = df['count_100'] / (df['count_sum'] + df['count_minus1'] + df['count_minus10'])

In [62]:
df_erfolgreich_immer = df[(df['count_minus1']== 0) & (df['count_minus10']== 0)] # create df with only problems that were solved without error
df_erfolgreich_immer.head()

Unnamed: 0,problem_id,reward,config,count_100,count_minus1,count_minus10,count_sum,ratio
1499701,3724,100,[7 7 28 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 ...,15,0,0,15,1.0
309041,10464,100,[6 9 36 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 ...,43,0,0,43,1.0
302046,33313,100,[6 7 35 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 ...,30,0,0,30,1.0
25128,4707,100,[7 9 36 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 ...,42,0,0,42,1.0
1542131,4245,100,[7 9 42 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 ...,36,0,0,36,1.0


In [57]:
array_erfolgreiche_ids = df_erfolgreich_immer['problem_id'].unique()
anzahl_immer_gelöst = len(array_erfolgreiche_ids)
anzahl_unique_ids = len(df['problem_id'].unique())
anteil_erfolgreiche_ids = anzahl_immer_gelöst / anzahl_unique_ids
print(f"Anzahl der immer gelösten Probleme: {anzahl_immer_gelöst}")
print(f"Anteil der immer gelösten Probleme: {round(anteil_erfolgreiche_ids*100, 2)}%")

Anzahl der immer gelösten Probleme: 8651
Anteil der immer gelösten Probleme: 19.22%


### Result 3:
* Anzahl der immer gelösten Probleme: 8651
* Anteil der immer gelösten Probleme: 19.22%


In [58]:
df.head()

Unnamed: 0,problem_id,reward,config,count_100,count_minus1,count_minus10,count_sum,ratio
0,20349,-10,[9 6 30 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 ...,32,2,1,35,0.842105
1,12731,-10,[6 7 12 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 ...,40,1,2,43,0.869565
2,20533,-10,[5 7 56 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 ...,23,0,2,25,0.851852
3,41315,-10,[6 9 24 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 ...,33,2,1,36,0.846154
4,22524,-10,[6 5 8 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1 0 0 0 0...,30,0,2,32,0.882353


In [59]:
df.count_sum.describe()

count    1.572897e+06
mean     3.790875e+01
std      1.026549e+01
min      4.000000e+00
25%      3.100000e+01
50%      3.700000e+01
75%      4.400000e+01
max      8.300000e+01
Name: count_sum, dtype: float64

In [61]:
# sort df by count_sum
df = df.sort_values(by=['ratio'], ascending=True)
df.ratio.describe()

count    1.572897e+06
mean     9.035994e-01
std      7.155704e-02
min      4.285714e-01
25%      8.604651e-01
50%      9.111111e-01
75%      9.523810e-01
max      1.000000e+00
Name: ratio, dtype: float64

### Result 4:
* Each problem was solved at least 42% of the time during this training run
* half of the problems were solved 91%-100% of the time
* Obviously, these numbers can be pushed with longer training.. Nevertheless,  we see that all problems were solved several times, suggesting that we can argue for the effect of the training (/learning) of our agent