# Import & Set Variables

In [90]:
import pandas as pd
import numpy as np

fname = "cow.csv"

# Inclusive
LOWER_BOUND = 0
UPPER_BOUND = 1

NORMAL_TEMPERATURE = 37.8
NORMAL_PULSE = (30, 40)
NORMAL_RESPIRATORY_RATE = (8, 10)
NORMAL_NASOGASTRIC_REFLUX_PH = (3, 4)
NORMAL_PACKED_CELL_VOLUME = (30, 50)
NORMAL_TOTAL_PROTEIN = (6, 7.5)
NORMAL_ABDOMO_APPEARANCE = "clear"

# Load Data

In [91]:
df = pd.read_csv(fname)

In [92]:
df

Unnamed: 0,surgery,age,hospital_number,temperature,pulse,respiratory_rate,temp_of_extremities,peripheral_pulse,mucous_membrane,capillary_refill_time,...,nasogastric_reflux_ph,rectal_exam_feces,abdomen,packed_cell_volume,total_protein,abdomo_appearance,abdomo_protein,outcome Class,surgical_lesion,lesion
0,no,adult,530101,38.5,66.0,28.0,cool,reduced,,more_3_sec,...,,decreased,distend_large,45.0,8.4,,,died,no,11300
1,yes,adult,534817,39.2,88.0,20.0,,,pale_cyanotic,less_3_sec,...,,absent,other,50.0,85.0,cloudy,2.0,euthanized,no,2208
2,no,adult,530334,38.3,40.0,24.0,normal,normal,pale_pink,less_3_sec,...,,normal,normal,33.0,6.7,,,lived,no,0
3,yes,young,5290409,39.1,164.0,84.0,cold,normal,dark_cyanotic,more_3_sec,...,5.0,decreased,,48.0,7.2,serosanguious,5.3,died,yes,2208
4,no,adult,530255,37.3,104.0,35.0,,,dark_cyanotic,more_3_sec,...,,,,74.0,7.4,,,died,no,4300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
294,yes,adult,533886,,120.0,70.0,cold,,pale_cyanotic,more_3_sec,...,,,distend_large,55.0,65.0,,,euthanized,no,3205
295,no,adult,527702,37.2,72.0,24.0,cool,increased,pale_cyanotic,more_3_sec,...,,absent,distend_small,44.0,,serosanguious,3.3,euthanized,yes,2208
296,yes,adult,529386,37.5,72.0,30.0,cold,reduced,pale_cyanotic,less_3_sec,...,,decreased,distend_large,60.0,6.8,,,died,yes,3205
297,yes,adult,530612,36.5,100.0,24.0,cool,reduced,pale_pink,less_3_sec,...,,absent,distend_small,50.0,6.0,serosanguious,3.4,lived,yes,2208


# Preprocess Data

In [93]:
df['surgery'] = df['surgery'].map({"yes" : 1, "no" : 2})
df['age'] = df['age'].map({"adult" : 1, "young" : 2})
df = df.drop(columns=['hospital_number'])
df['temperature'] = df['temperature'].replace(np.nan, NORMAL_TEMPERATURE)
df['pulse'] = df['pulse'].replace(np.nan, np.mean(NORMAL_PULSE))
df['respiratory_rate'] = df['respiratory_rate'].replace(np.nan, np.mean(NORMAL_RESPIRATORY_RATE))
df['temp_of_extremities'] = df['temp_of_extremities'].map({"normal" : 1, "warm" : 2, "cool": 3, "cold": 4})
df['peripheral_pulse'] = df['peripheral_pulse'].map({"normal" : 1, "increased" : 2, "reduced": 3, "absent": 4})
df['mucous_membrane'] = df['mucous_membrane'].map({"normal_pink": 1 , "bright_pink": 2, "pale_pink": 3, "pale_cyanotic": 4, "bright_red": 5, "dark_cyanotic": 6})
df['capillary_refill_time'] = df['capillary_refill_time'].map({"less_3_sec":1, "3": 2, "more_3_sec": 2})
df['pain'] = df['pain'].map({"alert": 1, "depressed": 2, "mild_pain": 3, "severe_pain": 4, "extreme_pain": 5})
df['peristalsis'] = df['peristalsis'].map({"hypermotile": 1, "normal": 2, "hypomotile": 3, "absent": 4})
df['abdominal_distention'] = df['abdominal_distention'].map({"none": 1, "slight": 2, "moderate": 3, "severe": 4})
df['nasogastric_tube'] = df['nasogastric_tube'].map({"none": 1, "slight": 2, "significant": 3})
df['nasogastric_reflux'] = df['nasogastric_reflux'].map({"none": 1, "less_1_liter": 2, "more_1_liter": 3})
df['nasogastric_reflux_ph'] = df['nasogastric_reflux_ph'].replace(np.nan, np.mean(NORMAL_NASOGASTRIC_REFLUX_PH))
df['rectal_exam_feces'] = df['rectal_exam_feces'].map({"normal": 1, "increased": 2, "decreased": 3, "absent": 4})
df['abdomen'] = df['abdomen'].map({"normal": 1, "other": 2, "firm": 3, "distend_small": 4, "distend_large": 5})
df['packed_cell_volume'] = df['packed_cell_volume'].replace(np.nan, np.mean(NORMAL_PACKED_CELL_VOLUME))
df['total_protein'] = df['total_protein'].replace(np.nan, np.mean(NORMAL_TOTAL_PROTEIN))
df['abdomo_appearance'] = df['abdomo_appearance'].map({"clear": 1, "cloudy": 2, "serosanguinous": 3})
df = df.drop(columns=["abdomo_protein"])
df['surgical_lesion'] = df['surgical_lesion'].map({"yes": 1, "no": 2})
df['outcome Class'] = df['outcome Class'].map({"lived": 1, "died": 2, "euthanized": 3})
df = df.drop(columns=["lesion"])

df = df.dropna()
df = df.reset_index(drop=True)

Y = df['outcome Class'].to_numpy()
df = df.drop(columns="outcome Class", axis=1)

X = df.to_numpy()

In [94]:
X

array([[  1. ,   1. ,  38.1,  66. ,  12. ,   3. ,   3. ,   5. ,   1. ,
          3. ,   3. ,   1. ,   2. ,   1. ,   3. ,   2. ,   5. ,  44. ,
          6. ,   2. ,   1. ],
       [  1. ,   1. ,  38.2,  76. ,  28. ,   3. ,   1. ,   1. ,   1. ,
          3. ,   4. ,   1. ,   2. ,   2. ,   3.5,   4. ,   4. ,  46. ,
         81. ,   1. ,   1. ],
       [  1. ,   1. ,  37.6,  64. ,  21. ,   1. ,   1. ,   2. ,   1. ,
          2. ,   3. ,   1. ,   1. ,   1. ,   3.5,   2. ,   5. ,  40. ,
          7. ,   1. ,   1. ],
       [  1. ,   1. ,  39.9,  72. ,  60. ,   1. ,   1. ,   5. ,   2. ,
          5. ,   4. ,   4. ,   3. ,   1. ,   3.5,   4. ,   4. ,  46. ,
          6.1,   2. ,   1. ],
       [  1. ,   1. ,  38.3,  72. ,  30. ,   4. ,   3. ,   3. ,   2. ,
          3. ,   3. ,   3. ,   2. ,   1. ,   3.5,   3. ,   5. ,  43. ,
          7. ,   2. ,   1. ],
       [  2. ,   1. ,  38.6,  52. ,   9. ,   1. ,   1. ,   1. ,   1. ,
          3. ,   3. ,   2. ,   1. ,   1. ,   3.5,   1. ,   3. ,  32. 

In [95]:
Y

array([1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 3, 2, 2,
       1, 1, 1, 1, 1, 1])

In [96]:
X.shape, Y.shape

((28, 21), (28,))