In [1]:
# Dependencies and dataload
# Link to associated webkit
# http://scikit-learn.org/stable/modules/feature_selection.html#l1-feature-selection
# Module 1.13.4.1
# Regularized Regression Model called GLMET 
# The elements of statistical learning 
# Good for sparse data and correlated variables 


import pandas as pd
from numpy import genfromtxt
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.feature_selection import SelectFromModel


my_data = pd.read_csv('/Users/stevensun/Prometheus/0_data/prometheus.csv')
my_data.shape

(2377, 127)

In [2]:
#drop incomplete rows

my_data = my_data.dropna()
my_data.shape

(690, 127)

In [3]:
# vectorize data

X = my_data.ix[:,0:125]
y = my_data.ix[:,126]

In [4]:
# C controls sparcity: The smaller C, the fewer features selcted
#
lsvc = LinearSVC(C=0.05, penalty="l1", dual=False).fit(X,y)
model = SelectFromModel(lsvc, prefit=True)
X_new = model.transform(X)
X_new.shape

(690, 32)

In [5]:
# Selected Features

features_selected = X.ix[:,model.get_support()]
pd.DataFrame(features_selected).columns

Index(['SEX', 'RC_WHT', 'RC_HISPLAT', 'PTRT_NONE_YN', 'PTRT_ARTHRO_YN',
       'PTRT_INTRA_INJECT_YN', 'GM_BACK_PAIN_YN', 'GM_END_METABLIC_YN',
       'GM_GENIT_DIS_YN', 'GM_HEENT_YN', 'GM_RESP_DIS_YN', 'GM_OTHER_YN',
       'AP Alignment Angle', 'Distal Femoral Angle', 'Proximal Tibial Angle',
       'Femoral Component Flexion', 'Posterior Tibial Slope', 'DEVIMP_PSRP',
       'DEVIMP_CRFB', 'DEVIMP_CRRP', 'SRG_APPROACH_MEDIAL_PARAPATELLAR',
       'SRG_APPROACH_TRIVECTOR', 'PAT_MANAGE_EVERTED', 'PAT_MANAGE_SUBLUXED',
       'RESECTION_GAPBALANCE', 'ANTI_CEMENT', 'ST_NONE_YN', 'ST_SYN_YN',
       'ST_DEEP_MCL_YN', 'ST_POST_MED_CAP_YN', 'preop PKIP modify',
       'preop KS_PAIN_STAIRS'],
      dtype='object')

In [6]:
# Adjust to make human readable
coefficients = pd.DataFrame(lsvc.coef_)
coefficients.columns = X.columns
features_selected = coefficients.loc[:,(coefficients != 0).any(axis=0)]
features_selected.max()

SEX                                 0.143515
RC_WHT                              0.179088
RC_HISPLAT                          0.030903
PTRT_NONE_YN                        0.035813
PTRT_ARTHRO_YN                      0.000000
PTRT_INTRA_INJECT_YN                0.201918
GM_BACK_PAIN_YN                     0.042756
GM_END_METABLIC_YN                  0.169647
GM_GENIT_DIS_YN                     0.082588
GM_HEENT_YN                         0.036072
GM_RESP_DIS_YN                      0.035664
GM_OTHER_YN                         0.094574
AP Alignment Angle                  0.009793
Distal Femoral Angle                0.002201
Proximal Tibial Angle               0.011031
Femoral Component Flexion           0.002225
Posterior Tibial Slope              0.006258
DEVIMP_PSRP                         0.041094
DEVIMP_CRFB                         0.259391
DEVIMP_CRRP                         0.154067
SRG_APPROACH_MEDIAL_PARAPATELLAR    0.000000
SRG_APPROACH_TRIVECTOR              0.556966
PAT_MANAGE