In [1]:
import pandas as pd
import numpy as np
import random
import os
import sys
sys.path.append('../..')
from modules.many_features import utils, constants, env
from modules.many_features.random_agent import RandomAgent
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)

In [3]:
#df = pd.read_csv('../../data/anemia_synth_dataset_some_nans_unspecified_more_feats.csv')
df = pd.read_csv('../../data/more_features/more_feats_new_labels_0.1.csv')
df = df.fillna(-1)
df.head()

Unnamed: 0,hemoglobin,ferritin,ret_count,segmented_neutrophils,tibc,mcv,serum_iron,rbc,age,gender,indirect_bilirubin,transferrin,creatinine,cholestrol,copper,ethanol,folate,glucose,label
0,11.183192,187.573466,4.951674,1.661383,316.593436,95.006424,181.242992,6.758433,60.586525,1,2.976104,282.291951,-1.0,28.040619,96.721542,49.530542,23.186628,-1.0,Hemolytic anemia
1,14.387445,-1.0,-1.0,-1.0,-1.0,-1.0,125.249617,6.350652,86.964793,1,2.968983,158.844879,-1.0,98.357508,112.758764,62.464566,7.16892,-1.0,No anemia
2,12.749357,5.012158,3.5028,6.179371,498.418768,76.759285,159.834784,5.018156,67.065069,1,2.705719,442.293823,1.022939,56.850479,75.739552,72.072041,20.600875,44.872138,Iron deficiency anemia
3,11.50887,197.180945,1.200125,0.0,457.033309,102.900301,131.177927,6.656823,66.403181,1,1.869338,223.896404,-1.0,111.220307,66.999185,18.353272,14.132423,-1.0,Unspecified anemia
4,9.456656,427.952052,-1.0,0.660252,-1.0,104.543774,-1.0,-1.0,-1.0,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,Vitamin B12/Folate deficiency anemia


In [4]:
class_dict = constants.CLASS_DICT
df['label'] = df['label'].replace(class_dict)
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=SEED)
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((56000, 18), (14000, 18), (56000,), (14000,))

#### The Random Agent

In [6]:
%%time
rand_agent = RandomAgent(X_test[0:], y_test[0:])
test_df = rand_agent.test()
test_df.head()

Testing done.....
Wall time: 1min 44s


Unnamed: 0,episode_length,index,is_success,reward,terminated,trajectory,y_actual,y_pred
0,2.0,0.0,0.0,-1.0,0.0,"[ret_count, Vitamin B12/Folate deficiency anemia]",6.0,1.0
1,1.0,1.0,0.0,-1.0,0.0,[No anemia],2.0,0.0
2,2.0,2.0,0.0,-1.0,0.0,"[segmented_neutrophils, Iron deficiency anemia]",2.0,4.0
3,1.0,3.0,0.0,-1.0,0.0,[Anemia of chronic disease],5.0,3.0
4,1.0,4.0,0.0,-1.0,0.0,[Anemia of chronic disease],5.0,3.0


In [7]:
%%time
rand_agent.test_sample(2)

Wall time: 0 ns


['segmented_neutrophils', 'Inconclusive diagnosis']

In [8]:
from datetime import datetime
start = datetime.now()
rand_agent.test_sample(2)
end = datetime.now()
duration = end-start
duration

datetime.timedelta(0)

In [9]:
len(X_test), len(test_df)

(14000, 14000)

In [10]:
test_df.to_csv('../../test_dfs/many_features/random_test_df_0.1.csv', index=False)

In [11]:
success_rate, success_df = utils.success_rate(test_df)
success_rate

11.95

In [12]:
avg_length, avg_return = utils.get_avg_length_reward(test_df)
avg_length, avg_return

(1.6277857142857144, -0.8301428571428572)

In [13]:
acc, f1, roc_auc = utils.test(test_df['y_actual'], test_df['y_pred'])
acc, f1, roc_auc

(0.1195, 0.11850572076556337, 0.49691026010673406)

In [14]:
test_df.y_pred.unique()

array([1., 0., 4., 3., 2., 6., 7., 5.])