In [1]:
import numpy as np
import pandas as pd

In [2]:
filename = "datasets/coluna_vertebral/column_3C.dat"
indata_features = np.loadtxt(filename, usecols=(0,1,2,3,4,5))

In [3]:
indata_features

array([[ 63.03,  22.55,  39.61,  40.48,  98.67,  -0.25],
       [ 39.06,  10.06,  25.02,  29.  , 114.41,   4.56],
       [ 68.83,  22.22,  50.09,  46.61, 105.99,  -3.53],
       ...,
       [ 61.45,  22.69,  46.17,  38.75, 125.67,  -2.71],
       [ 45.25,   8.69,  41.58,  36.56, 118.55,   0.21],
       [ 33.84,   5.07,  36.64,  28.77, 123.95,  -0.2 ]])

In [4]:
features = ["pelvic incidence", "pelvic tilt", "lumbar lordosis angle", "sacral slope",
            "pelvic radius", "grade of spondylolisthesis"]
df = pd.DataFrame(data=indata_features, columns=features)
df.head()

Unnamed: 0,pelvic incidence,pelvic tilt,lumbar lordosis angle,sacral slope,pelvic radius,grade of spondylolisthesis
0,63.03,22.55,39.61,40.48,98.67,-0.25
1,39.06,10.06,25.02,29.0,114.41,4.56
2,68.83,22.22,50.09,46.61,105.99,-3.53
3,69.3,24.65,44.31,44.64,101.87,11.21
4,49.71,9.65,28.32,40.06,108.17,7.92


In [5]:
indata_class = np.loadtxt(filename, usecols=(6), dtype='str')

In [6]:
col_class = ["labels"]
df_class = pd.DataFrame(data=indata_class, columns=col_class)
df_class.head()

Unnamed: 0,labels
0,DH
1,DH
2,DH
3,DH
4,DH


In [7]:
data_all = pd.concat([df, df_class], axis=1)

In [8]:
data_all.head()

Unnamed: 0,pelvic incidence,pelvic tilt,lumbar lordosis angle,sacral slope,pelvic radius,grade of spondylolisthesis,labels
0,63.03,22.55,39.61,40.48,98.67,-0.25,DH
1,39.06,10.06,25.02,29.0,114.41,4.56,DH
2,68.83,22.22,50.09,46.61,105.99,-3.53,DH
3,69.3,24.65,44.31,44.64,101.87,11.21,DH
4,49.71,9.65,28.32,40.06,108.17,7.92,DH


In [9]:
data_all.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 310 entries, 0 to 309
Data columns (total 7 columns):
pelvic incidence              310 non-null float64
pelvic tilt                   310 non-null float64
lumbar lordosis angle         310 non-null float64
sacral slope                  310 non-null float64
pelvic radius                 310 non-null float64
grade of spondylolisthesis    310 non-null float64
labels                        310 non-null object
dtypes: float64(6), object(1)
memory usage: 17.0+ KB


In [10]:
data_all.labels.unique()

array(['DH', 'SL', 'NO'], dtype=object)

In [11]:
data_all.labels.value_counts()

SL    150
NO    100
DH     60
Name: labels, dtype: int64

In [12]:
d1 = []
d2 = []
d3 = []

for lin in range(0, len(data_all.labels.values)):
    if data_all.labels[lin] == 'DH':
        d1.append(1)
        d2.append(0)
        d3.append(0)
    elif data_all.labels[lin] == 'SL':
        d1.append(0)
        d2.append(1)
        d3.append(0)
    elif data_all.labels[lin] == 'NO':
        d1.append(0)
        d2.append(0)
        d3.append(1)
        
data_all["d1"] = d1
data_all["d2"] = d2
data_all["d3"] = d3
data_all = data_all.drop('labels', axis=1, inplace=False)

In [13]:
data_all.head()

Unnamed: 0,pelvic incidence,pelvic tilt,lumbar lordosis angle,sacral slope,pelvic radius,grade of spondylolisthesis,d1,d2,d3
0,63.03,22.55,39.61,40.48,98.67,-0.25,1,0,0
1,39.06,10.06,25.02,29.0,114.41,4.56,1,0,0
2,68.83,22.22,50.09,46.61,105.99,-3.53,1,0,0
3,69.3,24.65,44.31,44.64,101.87,11.21,1,0,0
4,49.71,9.65,28.32,40.06,108.17,7.92,1,0,0


In [14]:
data_all.to_csv('datasets/coluna_vertebral/df_coluna', sep=',', index=False)