## This notebook shows how to select the important features for the organics and plasticizers.

In [2]:
import sys
sys.path.append('../modules')
import likelihood_predictor
from likelihood_predictor import PlastPredictor
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from scipy.stats import zscore
import pickle
from sklearn.feature_selection import SelectFromModel

#### Importing data

In [3]:
pl_full = pd.read_pickle('../database/plasticizer_data_v10_polarity.pkl')
pl_pol = pd.concat([pl_full[pl_full.columns[1:195]], pl_full['Polarity']], axis=1)
all_cols = pl_pol.columns.to_numpy()
pl_data = pl_pol[all_cols].to_numpy()
lin_data = pd.read_pickle('../database/linolein_test.pkl')
lin_data['Polarity'] = 0.048856
lin_data = lin_data[all_cols].to_numpy()
org_full = pd.read_pickle('../database/org_polarity_v2.pkl')

In [4]:
psim1 = open("pubs_similarity.txt", 'r')
psim11 = [line.rstrip('\n') for line in psim1]
psim2 = open("pubs_othersim.txt", 'r')
psim22 = [line.rstrip('\n') for line in psim2]

In [5]:
org_full
org_full['Dsim'] = psim11
org_full['Nasim'] = psim22
org_full = org_full.sort_values(by ='Dsim')

#### We are narrowing down our organics based on the similarity with DEHP phthalate and Sodium polynaphthalenesulfonate. Out of ~66k organics, we just selected 5k organics based on its similarity with plasticizers.

In [6]:
org_full = org_full[:5000]
org_data = org_full[all_cols].to_numpy()

This is where your quest for selecting the features begins. Reminder: We've 195 features in our db right now.

In [7]:
reg_param = 10
pp = PlastPredictor(reg_param)
pp_model = pp.fit_model(pl_data, org_data)


In [8]:
org_acc = pp.predict(org_data, type='binary', class_id='neg')
pl_acc = pp.predict(pl_data, type='binary', class_id='pos')

In [9]:
cc=pp.clf.coef_

I've printed the nonzero coeffs, org and pl accuracy after each run so that we know how they are changing after every step.

In [10]:
np.count_nonzero(cc)

104

**compar** places the True/False values for the features where coefficients are nonzero/zero respectively.

In [11]:
compar = cc[0] != 0
compar = compar.tolist()

In [162]:
pl_temp = pl_data.tolist()
org_temp = org_data.tolist()

Modifying the pl_list and org_list as per the nonzero coefficients and the respective features and these will be used for the next run.

In [163]:
pl_list = []

for i in range(0,210):
    count = 0
    p1 = []
    for j in compar:
        #print(j)
        if j:
#             print(type(j))
            p1.append(pl_temp[i][count])
        else:
            p1.append(False)
            
        count += 1

    pl_list.append(p1)
    

In [164]:
org_list = []

for i in range(0, 5000):
    count = 0
    o1 = []
    for j in compar:
        if j:
            o1.append(org_temp[i][count]) 
        else:
            o1.append(False)
           
        count += 1
            
    org_list.append(o1)

now we've both new orgs and pl list and we're gonna repeat the process.

In [165]:
pl_num = np.asarray(pl_list)
org_num = np.asarray(org_list)

reg_param = 10
pp2 = PlastPredictor(reg_param)
pp2_model = pp2.fit_model(pl_num, org_num)
org_acc2 = pp2.predict(org_num, type='binary', class_id='neg')
pl_acc2 = pp2.predict(pl_num, type='binary', class_id='pos')

In [166]:
dd = pp2.clf.coef_

compar2 = dd[0] != 0
compar2 = compar2.tolist()

In [167]:
np.count_nonzero(dd)

71

In [168]:
pl_list2 = []

for i in range(0, 210):
    count = 0
    p2 = []
    for j in compar2:
        if j:
            p2.append(pl_list[i][count])
        else:
            p2.append(False)
            
        count += 1
            
    pl_list2.append(p2)

In [169]:
org_list2 = []

for i in range(0, 5000):
    count = 0
    o2 = []
    for j in compar2:
        if j:
            o2.append(org_list[i][count]) 
        else:
            o2.append(False)
           
        count += 1
            
    org_list2.append(o2)

In [193]:
pl_num2 = np.asarray(pl_list2)
org_num2 = np.asarray(org_list2)

reg_param = 10
pp3 = PlastPredictor(reg_param)
pp3_model = pp3.fit_model(pl_num2, org_num2)
org_acc3 = pp3.predict(org_num2, type='binary', class_id='neg')
pl_acc3 = pp3.predict(pl_num2, type='binary', class_id='pos')

In [194]:
ee = pp3.clf.coef_

compar3 = ee[0] != 0
compar3 = compar3.tolist()

In [195]:
np.count_nonzero(ee)

59

#### yeah and this process keeps happening for eternity. Now you can skip at the end of the notebook, where I saved the final pl_list and org_list to an excel file and use that file to get the relevant features which are discussed in the other notebook.

In [196]:
pl_list3 = []

for i in range(0, 210):
    count = 0
    p3 = []
    for j in compar3:
        if j:
            p3.append(pl_list2[i][count])
        else:
            p3.append(False)
            
        count += 1
            
    pl_list3.append(p3)

In [197]:
org_list3 = []

for i in range(0, 5000):
    count = 0
    o3 = []
    for j in compar3:
        if j:
            o3.append(org_list2[i][count]) 
        else:
            o3.append(False)
           
        count += 1
            
    org_list3.append(o3)

In [198]:
pl_num3 = np.asarray(pl_list3)
org_num3 = np.asarray(org_list3)

reg_param = 10
pp4 = PlastPredictor(reg_param)
pp4_model = pp4.fit_model(pl_num3, org_num3)
org_acc4 = pp4.predict(org_num3, type='binary', class_id='neg')
pl_acc4 = pp4.predict(pl_num3, type='binary', class_id='pos')

In [199]:
ff = pp4.clf.coef_

compar4 = ff[0] != 0
compar4 = compar4.tolist()

In [200]:
np.count_nonzero(ff)

56

In [201]:
pl_list4 = []

for i in range(0, 210):
    count = 0
    p4 = []
    for j in compar4:
        if j:
            p4.append(pl_list3[i][count])
        else:
            p4.append(False)
            
        count += 1
            
    pl_list4.append(p4)

In [202]:
org_list4 = []

for i in range(0, 5000):
    count = 0
    o4 = []
    for j in compar4:
        if j:
            o4.append(org_list3[i][count]) 
        else:
            o4.append(False)
           
        count += 1
            
    org_list4.append(o4)

In [203]:
np.count_nonzero(ff), org_acc4, pl_acc4

(56, 0.9664, 0.9238095238095239)

In [225]:
pl_num4 = np.asarray(pl_list4)
org_num4 = np.asarray(org_list4)

reg_param = 10
pp5 = PlastPredictor(reg_param)
pp5_model = pp5.fit_model(pl_num4, org_num4)
org_acc5 = pp5.predict(org_num4, type='binary', class_id='neg')
pl_acc5 = pp5.predict(pl_num4, type='binary', class_id='pos')

In [226]:
gg = pp5.clf.coef_

compar5 = gg[0] != 0
compar5 = compar5.tolist()

In [227]:
np.count_nonzero(gg), org_acc5, pl_acc5

(51, 0.954, 0.9238095238095239)

In [228]:
pl_list5 = []

for i in range(0, 210):
    count = 0
    p5 = []
    for j in compar5:
        if j:
            p5.append(pl_list4[i][count])
        else:
            p5.append(False)
            
        count += 1
            
    pl_list5.append(p5)

In [229]:
org_list5 = []

for i in range(0, 5000):
    count = 0
    o5 = []
    for j in compar5:
        if j:
            o5.append(org_list4[i][count]) 
        else:
            o5.append(False)
           
        count += 1
            
    org_list5.append(o5)

In [239]:
pl_num5 = np.asarray(pl_list5)
org_num5 = np.asarray(org_list5)

reg_param = 10
pp6 = PlastPredictor(reg_param)
pp6_model = pp6.fit_model(pl_num5, org_num5)
org_acc6 = pp6.predict(org_num5, type='binary', class_id='neg')
pl_acc6 = pp6.predict(pl_num5, type='binary', class_id='pos')

In [240]:
hh = pp6.clf.coef_

compar6 = hh[0] != 0
compar6 = compar6.tolist()

In [241]:
np.count_nonzero(hh), org_acc6, pl_acc6

(48, 0.9682, 0.9380952380952381)

In [242]:
pl_list6 = []

for i in range(0, 210):
    count = 0
    p6 = []
    for j in compar6:
        if j:
            p6.append(pl_list5[i][count])
        else:
            p6.append(False)
            
        count += 1
            
    pl_list6.append(p6)

In [243]:
org_list6 = []

for i in range(0, 5000):
    count = 0
    o6 = []
    for j in compar6:
        if j:
            o6.append(org_list5[i][count]) 
        else:
            o6.append(False)
           
        count += 1
            
    org_list6.append(o6)

In [262]:
pl_num6 = np.asarray(pl_list6)
org_num6 = np.asarray(org_list6)

reg_param = 10
pp7 = PlastPredictor(reg_param)
pp7_model = pp7.fit_model(pl_num6, org_num6)
org_acc7 = pp7.predict(org_num6, type='binary', class_id='neg')
pl_acc7 = pp7.predict(pl_num6, type='binary', class_id='pos')

In [263]:
ii = pp7.clf.coef_

compar7 = ii[0] != 0
compar7 = compar7.tolist()

In [264]:
np.count_nonzero(ii), org_acc7, pl_acc7

(45, 0.9766, 0.9285714285714286)

In [265]:
pl_list7 = []

for i in range(0, 210):
    count = 0
    p7 = []
    for j in compar7:
        if j:
            p7.append(pl_list6[i][count])
        else:
            p7.append(False)
            
        count += 1
            
    pl_list7.append(p7)

In [266]:
org_list7 = []

for i in range(0, 5000):
    count = 0
    o7 = []
    for j in compar7:
        if j:
            o7.append(org_list6[i][count]) 
        else:
            o7.append(False)
           
        count += 1
            
    org_list7.append(o7)

In [270]:
pl_num7 = np.asarray(pl_list7)
org_num7 = np.asarray(org_list7)

reg_param = 10
pp8 = PlastPredictor(reg_param)
pp8_model = pp8.fit_model(pl_num7, org_num7)
org_acc8 = pp8.predict(org_num7, type='binary', class_id='neg')
pl_acc8 = pp8.predict(pl_num7, type='binary', class_id='pos')

In [271]:
jj = pp8.clf.coef_

compar8 = jj[0] != 0
compar8 = compar8.tolist()

In [272]:
np.count_nonzero(jj), org_acc8, pl_acc8

(44, 0.9734, 0.9285714285714286)

In [273]:
pl_list8 = []

for i in range(0, 210):
    count = 0
    p8 = []
    for j in compar8:
        if j:
            p8.append(pl_list7[i][count])
        else:
            p8.append(False)
            
        count += 1
            
    pl_list8.append(p8)

In [274]:
org_list8 = []

for i in range(0, 5000):
    count = 0
    o8 = []
    for j in compar8:
        if j:
            o8.append(org_list7[i][count]) 
        else:
            o8.append(False)
           
        count += 1
            
    org_list8.append(o8)

In [275]:
pl_num8 = np.asarray(pl_list8)
org_num8 = np.asarray(org_list8)

reg_param = 10
pp9 = PlastPredictor(reg_param)
pp9_model = pp9.fit_model(pl_num8, org_num8)
org_acc9 = pp9.predict(org_num8, type='binary', class_id='neg')
pl_acc9 = pp9.predict(pl_num8, type='binary', class_id='pos')

In [276]:
kk = pp9.clf.coef_

compar9 = kk[0] != 0
compar9 = compar9.tolist()

In [277]:
np.count_nonzero(kk), org_acc9, pl_acc9

(43, 0.9692, 0.9571428571428572)

In [278]:
pl_list9 = []

for i in range(0, 210):
    count = 0
    p9 = []
    for j in compar9:
        if j:
            p9.append(pl_list8[i][count])
        else:
            p9.append(False)
            
        count += 1
            
    pl_list9.append(p9)

In [279]:
org_list9 = []

for i in range(0, 5000):
    count = 0
    o9 = []
    for j in compar9:
        if j:
            o9.append(org_list8[i][count]) 
        else:
            o9.append(False)
           
        count += 1
            
    org_list9.append(o9)

In [280]:
pl_num9 = np.asarray(pl_list9)
org_num9 = np.asarray(org_list9)

reg_param = 10
pp10 = PlastPredictor(reg_param)
pp10_model = pp10.fit_model(pl_num9, org_num9)
org_acc10 = pp10.predict(org_num9, type='binary', class_id='neg')
pl_acc10 = pp10.predict(pl_num9, type='binary', class_id='pos')

In [281]:
ll = pp10.clf.coef_

compar10 = ll[0] != 0
compar10 = compar10.tolist()

In [282]:
np.count_nonzero(ll), org_acc10, pl_acc10

(42, 0.9616, 0.9238095238095239)

In [283]:
pl_list10 = []

for i in range(0, 210):
    count = 0
    p10 = []
    for j in compar10:
        if j:
            p10.append(pl_list9[i][count])
        else:
            p10.append(False)
            
        count += 1
            
    pl_list10.append(p10)

In [284]:
org_list10 = []

for i in range(0, 5000):
    count = 0
    o10 = []
    for j in compar10:
        if j:
            o10.append(org_list9[i][count]) 
        else:
            o10.append(False)
           
        count += 1
            
    org_list10.append(o10)

In [285]:
pl_num10 = np.asarray(pl_list10)
org_num10 = np.asarray(org_list10)

reg_param = 10
pp11 = PlastPredictor(reg_param)
pp11_model = pp11.fit_model(pl_num10, org_num10)
org_acc11 = pp11.predict(org_num10, type='binary', class_id='neg')
pl_acc11 = pp11.predict(pl_num10, type='binary', class_id='pos')

In [286]:
mm = pp11.clf.coef_

compar11 = mm[0] != 0
compar11 = compar11.tolist()

In [287]:
np.count_nonzero(mm), org_acc11, pl_acc11

(40, 0.9632, 0.9428571428571428)

In [288]:
pl_list11 = []

for i in range(0, 210):
    count = 0
    p11 = []
    for j in compar11:
        if j:
            p11.append(pl_list10[i][count])
        else:
            p11.append(False)
            
        count += 1
            
    pl_list11.append(p11)

In [289]:
org_list11 = []

for i in range(0, 5000):
    count = 0
    o11 = []
    for j in compar11:
        if j:
            o11.append(org_list10[i][count]) 
        else:
            o11.append(False)
           
        count += 1
            
    org_list11.append(o11)

In [290]:
pl_num11 = np.asarray(pl_list11)
org_num11 = np.asarray(org_list11)

reg_param = 10
pp12 = PlastPredictor(reg_param)
pp12_model = pp12.fit_model(pl_num11, org_num11)
org_acc12 = pp12.predict(org_num11, type='binary', class_id='neg')
pl_acc12 = pp12.predict(pl_num11, type='binary', class_id='pos')

In [291]:
nn = pp12.clf.coef_

compar12 = mm[0] != 0
compar12 = compar12.tolist()

In [292]:
np.count_nonzero(nn), org_acc12, pl_acc12

(40, 0.9594, 0.9238095238095239)

In [293]:
pl_list12 = []

for i in range(0, 210):
    count = 0
    p12 = []
    for j in compar12:
        if j:
            p12.append(pl_list11[i][count])
        else:
            p12.append(False)
            
        count += 1
            
    pl_list12.append(p12)

In [294]:
org_list12 = []

for i in range(0, 5000):
    count = 0
    o12 = []
    for j in compar12:
        if j:
            o12.append(org_list11[i][count]) 
        else:
            o12.append(False)
           
        count += 1
            
    org_list12.append(o12)

In [304]:
pl_num12 = np.asarray(pl_list12)
org_num12 = np.asarray(org_list12)

reg_param = 10
pp13 = PlastPredictor(reg_param)
pp13_model = pp13.fit_model(pl_num12, org_num12)
org_acc13 = pp13.predict(org_num12, type='binary', class_id='neg')
pl_acc13 = pp13.predict(pl_num12, type='binary', class_id='pos')

In [305]:
oo = pp13.clf.coef_

compar13 = oo[0] != 0
compar13 = compar13.tolist()

In [306]:
np.count_nonzero(oo), org_acc13, pl_acc13

(39, 0.9676, 0.9428571428571428)

In [307]:
pl_list13 = []

for i in range(0, 210):
    count = 0
    p13 = []
    for j in compar13:
        if j:
            p13.append(pl_list12[i][count])
        else:
            p13.append(False)
            
        count += 1
            
    pl_list13.append(p13)

In [308]:
org_list13 = []

for i in range(0, 5000):
    count = 0
    o13 = []
    for j in compar13:
        if j:
            o13.append(org_list12[i][count]) 
        else:
            o13.append(False)
           
        count += 1
            
    org_list13.append(o13)

In [315]:
pl_num13 = np.asarray(pl_list13)
org_num13 = np.asarray(org_list13)

reg_param = 10
pp14 = PlastPredictor(reg_param)
pp14_model = pp14.fit_model(pl_num13, org_num13)
org_acc14 = pp14.predict(org_num13, type='binary', class_id='neg')
pl_acc14 = pp14.predict(pl_num13, type='binary', class_id='pos')

In [316]:
qq = pp14.clf.coef_

compar14 = qq[0] != 0
compar14 = compar14.tolist()

In [317]:
np.count_nonzero(qq), org_acc14, pl_acc14

(38, 0.9602, 0.9428571428571428)

In [318]:
pl_list14 = []

for i in range(0, 210):
    count = 0
    p14 = []
    for j in compar14:
        if j:
            p14.append(pl_list13[i][count])
        else:
            p14.append(False)
            
        count += 1
            
    pl_list14.append(p14)
    


In [319]:
org_list14 = []

for i in range(0, 5000):
    count = 0
    o14 = []
    for j in compar14:
        if j:
            o14.append(org_list13[i][count]) 
        else:
            o14.append(False)
           
        count += 1
            
    org_list14.append(o14)

In [324]:
pl_num14 = np.asarray(pl_list14)
org_num14 = np.asarray(org_list14)

reg_param = 10
pp15 = PlastPredictor(reg_param)
pp15_model = pp15.fit_model(pl_num14, org_num14)
org_acc15 = pp15.predict(org_num14, type='binary', class_id='neg')
pl_acc15 = pp15.predict(pl_num14, type='binary', class_id='pos')

rr = pp15.clf.coef_

compar15 = rr[0] != 0
compar15 = compar15.tolist()

In [325]:
np.count_nonzero(rr), org_acc15, pl_acc15

(36, 0.9654, 0.9428571428571428)

In [326]:
pl_list15 = []

for i in range(0, 210):
    count = 0
    p15 = []
    for j in compar15:
        if j:
            p15.append(pl_list14[i][count])
        else:
            p15.append(False)
            
        count += 1
            
    pl_list15.append(p15)
    


In [327]:
org_list15 = []

for i in range(0, 5000):
    count = 0
    o15 = []
    for j in compar15:
        if j:
            o15.append(org_list14[i][count]) 
        else:
            o15.append(False)
           
        count += 1
            
    org_list15.append(o15)

In [336]:
pl_num15 = np.asarray(pl_list15)
org_num15 = np.asarray(org_list15)

reg_param = 10
pp16 = PlastPredictor(reg_param)
pp16_model = pp16.fit_model(pl_num15, org_num15)
org_acc16 = pp16.predict(org_num15, type='binary', class_id='neg')
pl_acc16 = pp16.predict(pl_num15, type='binary', class_id='pos')

ss = pp16.clf.coef_

compar16 = ss[0] != 0
compar16 = compar16.tolist()

In [337]:
np.count_nonzero(ss), org_acc16, pl_acc16

(35, 0.9724, 0.919047619047619)

In [338]:
pl_list16 = []

for i in range(0, 210):
    count = 0
    p16 = []
    for j in compar16:
        if j:
            p16.append(pl_list15[i][count])
        else:
            p16.append(False)
            
        count += 1
            
    pl_list16.append(p16)
    
org_list16 = []

for i in range(0, 5000):
    count = 0
    o16 = []
    for j in compar16:
        if j:
            o16.append(org_list15[i][count]) 
        else:
            o16.append(False)
           
        count += 1
            
    org_list16.append(o16)

In [339]:
pl_num16 = np.asarray(pl_list16)
org_num16 = np.asarray(org_list16)

reg_param = 10
pp17 = PlastPredictor(reg_param)
pp17_model = pp17.fit_model(pl_num16, org_num16)
org_acc17 = pp17.predict(org_num16, type='binary', class_id='neg')
pl_acc17 = pp17.predict(pl_num16, type='binary', class_id='pos')

tt = pp17.clf.coef_

compar17 = tt[0] != 0
compar17 = compar17.tolist()

In [340]:
np.count_nonzero(tt)

34

In [341]:
pl_list17 = []

for i in range(0, 210):
    count = 0
    p17 = []
    for j in compar17:
        if j:
            p17.append(pl_list16[i][count])
        else:
            p17.append(False)
            
        count += 1
            
    pl_list17.append(p17)

org_list17 = []

for i in range(0, 5000):
    count = 0
    o17 = []
    for j in compar17:
        if j:
            o17.append(org_list16[i][count]) 
        else:
            o17.append(False)
           
        count += 1
            
    org_list17.append(o17)

In [342]:
pl_num17 = np.asarray(pl_list17)
org_num17 = np.asarray(org_list17)

reg_param = 10
pp18 = PlastPredictor(reg_param)
pp18_model = pp18.fit_model(pl_num17, org_num17)
org_acc18 = pp18.predict(org_num17, type='binary', class_id='neg')
pl_acc18 = pp18.predict(pl_num17, type='binary', class_id='pos')

uu = pp18.clf.coef_

compar18 = uu[0] != 0
compar18 = compar18.tolist()

In [343]:
np.count_nonzero(uu)

33

In [344]:
pl_list18 = []

for i in range(0, 210):
    count = 0
    p18 = []
    for j in compar18:
        if j:
            p18.append(pl_list17[i][count])
        else:
            p18.append(False)
            
        count += 1
            
    pl_list18.append(p18)

org_list18 = []

for i in range(0, 5000):
    count = 0
    o18 = []
    for j in compar18:
        if j:
            o18.append(org_list17[i][count]) 
        else:
            o18.append(False)
           
        count += 1
            
    org_list18.append(o18)

In [347]:
pl_num18 = np.asarray(pl_list18)
org_num18 = np.asarray(org_list18)

reg_param = 10
pp19 = PlastPredictor(reg_param)
pp19_model = pp19.fit_model(pl_num18, org_num18)
org_acc19 = pp19.predict(org_num18, type='binary', class_id='neg')
pl_acc19 = pp19.predict(pl_num18, type='binary', class_id='pos')

vv = pp19.clf.coef_

compar19 = vv[0] != 0
compar19 = compar19.tolist()

In [348]:
np.count_nonzero(vv)

32

In [349]:
pl_list19 = []

for i in range(0, 210):
    count = 0
    p19 = []
    for j in compar19:
        if j:
            p19.append(pl_list18[i][count])
        else:
            p19.append(False)
            
        count += 1
            
    pl_list19.append(p19)

org_list19 = []

for i in range(0, 5000):
    count = 0
    o19 = []
    for j in compar19:
        if j:
            o19.append(org_list18[i][count]) 
        else:
            o19.append(False)
           
        count += 1
            
    org_list19.append(o19)

In [350]:
pl_num19 = np.asarray(pl_list19)
org_num19 = np.asarray(org_list19)

reg_param = 10
pp20 = PlastPredictor(reg_param)
pp20_model = pp20.fit_model(pl_num19, org_num19)
org_acc20 = pp20.predict(org_num19, type='binary', class_id='neg')
pl_acc20 = pp20.predict(pl_num19, type='binary', class_id='pos')

ww = pp20.clf.coef_

compar20 = ww[0] != 0
compar20 = compar20.tolist()

In [351]:
np.count_nonzero(ww)

31

In [352]:
pl_list20 = []

for i in range(0, 210):
    count = 0
    p20 = []
    for j in compar20:
        if j:
            p20.append(pl_list19[i][count])
        else:
            p20.append(False)
            
        count += 1
            
    pl_list20.append(p20)

org_list20 = []

for i in range(0, 5000):
    count = 0
    o20 = []
    for j in compar20:
        if j:
            o20.append(org_list19[i][count]) 
        else:
            o20.append(False)
           
        count += 1
            
    org_list20.append(o20)

In [363]:
pl_num20 = np.asarray(pl_list20)
org_num20 = np.asarray(org_list20)

reg_param = 10
pp21 = PlastPredictor(reg_param)
pp21_model = pp21.fit_model(pl_num20, org_num20)
org_acc21 = pp21.predict(org_num20, type='binary', class_id='neg')
pl_acc21 = pp21.predict(pl_num20, type='binary', class_id='pos')

xx = pp21.clf.coef_

compar21 = xx[0] != 0
compar21 = compar21.tolist()

In [364]:
np.count_nonzero(xx)

30

In [365]:
pl_list21 = []

for i in range(0, 210):
    count = 0
    p21 = []
    for j in compar21:
        if j:
            p21.append(pl_list20[i][count])
        else:
            p21.append(False)
            
        count += 1
            
    pl_list21.append(p21)

org_list21 = []

for i in range(0, 5000):
    count = 0
    o21 = []
    for j in compar21:
        if j:
            o21.append(org_list20[i][count]) 
        else:
            o21.append(False)
           
        count += 1
            
    org_list21.append(o21)

In [368]:
pl_num21 = np.asarray(pl_list21)
org_num21 = np.asarray(org_list21)

reg_param = 10
pp22 = PlastPredictor(reg_param)
pp22_model = pp22.fit_model(pl_num21, org_num21)
org_acc22 = pp22.predict(org_num21, type='binary', class_id='neg')
pl_acc22 = pp22.predict(pl_num21, type='binary', class_id='pos')

yy = pp22.clf.coef_

compar22 = yy[0] != 0
compar22 = compar22.tolist()

In [369]:
np.count_nonzero(yy)

29

In [370]:
pl_list22 = []

for i in range(0, 210):
    count = 0
    p22 = []
    for j in compar22:
        if j:
            p22.append(pl_list21[i][count])
        else:
            p22.append(False)
            
        count += 1
            
    pl_list22.append(p22)

org_list22 = []

for i in range(0, 5000):
    count = 0
    o22 = []
    for j in compar22:
        if j:
            o22.append(org_list21[i][count]) 
        else:
            o22.append(False)
           
        count += 1
            
    org_list22.append(o22)

In [377]:
pl_num22 = np.asarray(pl_list22)
org_num22 = np.asarray(org_list22)

reg_param = 10
pp23 = PlastPredictor(reg_param)
pp23_model = pp23.fit_model(pl_num22, org_num22)
org_acc23 = pp23.predict(org_num22, type='binary', class_id='neg')
pl_acc23 = pp23.predict(pl_num22, type='binary', class_id='pos')

zz = pp23.clf.coef_

compar23 = zz[0] != 0
compar23 = compar23.tolist()

In [378]:
np.count_nonzero(zz)

27

In [379]:
pl_list23 = []

for i in range(0, 210):
    count = 0
    p23 = []
    for j in compar23:
        if j:
            p23.append(pl_list22[i][count])
        else:
            p23.append(False)
            
        count += 1
            
    pl_list23.append(p23)

org_list23 = []

for i in range(0, 5000):
    count = 0
    o23 = []
    for j in compar23:
        if j:
            o23.append(org_list22[i][count]) 
        else:
            o23.append(False)
           
        count += 1
            
    org_list23.append(o23)

In [398]:
pl_num23 = np.asarray(pl_list23)
org_num23 = np.asarray(org_list23)

reg_param = 10
pp24 = PlastPredictor(reg_param)
pp24_model = pp24.fit_model(pl_num23, org_num23)
org_acc24 = pp24.predict(org_num23, type='binary', class_id='neg')
pl_acc24 = pp24.predict(pl_num23, type='binary', class_id='pos')

z1 = pp24.clf.coef_

compar24 = z1[0] != 0
compar24 = compar24.tolist()

In [399]:
np.count_nonzero(z1)

26

In [400]:
pl_list24 = []

for i in range(0, 210):
    count = 0
    p24 = []
    for j in compar24:
        if j:
            p24.append(pl_list23[i][count])
        else:
            p24.append(False)
            
        count += 1
            
    pl_list24.append(p24)

org_list24 = []

for i in range(0, 5000):
    count = 0
    o24 = []
    for j in compar24:
        if j:
            o24.append(org_list23[i][count]) 
        else:
            o24.append(False)
           
        count += 1
            
    org_list24.append(o24)

In [403]:
pl_num24 = np.asarray(pl_list24)
org_num24 = np.asarray(org_list24)

reg_param = 10
pp25 = PlastPredictor(reg_param)
pp25_model = pp25.fit_model(pl_num24, org_num24)
org_acc25 = pp25.predict(org_num24, type='binary', class_id='neg')
pl_acc25 = pp25.predict(pl_num24, type='binary', class_id='pos')

z2 = pp25.clf.coef_

compar25 = z2[0] != 0
compar25 = compar25.tolist()

In [404]:
np.count_nonzero(z2)

25

In [405]:
pl_list25 = []

for i in range(0, 210):
    count = 0
    p25 = []
    for j in compar25:
        if j:
            p25.append(pl_list24[i][count])
        else:
            p25.append(False)
            
        count += 1
            
    pl_list25.append(p25)

org_list25 = []

for i in range(0, 5000):
    count = 0
    o25 = []
    for j in compar25:
        if j:
            o25.append(org_list24[i][count]) 
        else:
            o25.append(False)
           
        count += 1
            
    org_list25.append(o25)

In [412]:
pl_num25 = np.asarray(pl_list25)
org_num25 = np.asarray(org_list25)

reg_param = 10
pp26 = PlastPredictor(reg_param)
pp26_model = pp26.fit_model(pl_num25, org_num25)
org_acc26 = pp26.predict(org_num25, type='binary', class_id='neg')
pl_acc26 = pp26.predict(pl_num25, type='binary', class_id='pos')

z3 = pp26.clf.coef_

compar26 = z3[0] != 0
compar26 = compar26.tolist()

In [413]:
np.count_nonzero(z3)

24

In [414]:
pl_list26 = []

for i in range(0, 210):
    count = 0
    p26 = []
    for j in compar26:
        if j:
            p26.append(pl_list25[i][count])
        else:
            p26.append(False)
            
        count += 1
            
    pl_list26.append(p26)

org_list26 = []

for i in range(0, 5000):
    count = 0
    o26 = []
    for j in compar26:
        if j:
            o26.append(org_list25[i][count]) 
        else:
            o26.append(False)
           
        count += 1
            
    org_list26.append(o26)

In [415]:
pl_num26 = np.asarray(pl_list26)
org_num26 = np.asarray(org_list26)

reg_param = 10
pp27 = PlastPredictor(reg_param)
pp27_model = pp27.fit_model(pl_num26, org_num26)
org_acc27 = pp27.predict(org_num26, type='binary', class_id='neg')
pl_acc27 = pp27.predict(pl_num26, type='binary', class_id='pos')

z4 = pp27.clf.coef_

compar27 = z4[0] != 0
compar27 = compar27.tolist()

In [416]:
np.count_nonzero(z4)

23

In [417]:
pl_list27 = []

for i in range(0, 210):
    count = 0
    p27 = []
    for j in compar27:
        if j:
            p27.append(pl_list26[i][count])
        else:
            p27.append(False)
            
        count += 1
            
    pl_list27.append(p27)

org_list27 = []

for i in range(0, 5000):
    count = 0
    o27 = []
    for j in compar27:
        if j:
            o27.append(org_list26[i][count]) 
        else:
            o27.append(False)
           
        count += 1
            
    org_list27.append(o27)

In [420]:
pl_num27 = np.asarray(pl_list27)
org_num27 = np.asarray(org_list27)

reg_param = 10
pp28 = PlastPredictor(reg_param)
pp28_model = pp28.fit_model(pl_num27, org_num27)
org_acc28 = pp28.predict(org_num27, type='binary', class_id='neg')
pl_acc28 = pp28.predict(pl_num27, type='binary', class_id='pos')

z5 = pp28.clf.coef_

compar28 = z5[0] != 0
compar28 = compar28.tolist()

In [421]:
np.count_nonzero(z5)

22

In [422]:
pl_list28 = []

for i in range(0, 210):
    count = 0
    p28 = []
    for j in compar28:
        if j:
            p28.append(pl_list27[i][count])
        else:
            p28.append(False)
            
        count += 1
            
    pl_list28.append(p28)

org_list28 = []

for i in range(0, 5000):
    count = 0
    o28 = []
    for j in compar28:
        if j:
            o28.append(org_list27[i][count]) 
        else:
            o28.append(False)
           
        count += 1
            
    org_list28.append(o28)

In [451]:
pl_num28 = np.asarray(pl_list28)
org_num28 = np.asarray(org_list28)

reg_param = 10
pp29 = PlastPredictor(reg_param)
pp29_model = pp29.fit_model(pl_num28, org_num28)
org_acc29 = pp29.predict(org_num28, type='binary', class_id='neg')
pl_acc29 = pp29.predict(pl_num28, type='binary', class_id='pos')

z6 = pp29.clf.coef_

compar29 = z6[0] != 0
compar29 = compar29.tolist()

In [452]:
np.count_nonzero(z6), pl_acc29, org_acc29

(22, 0.919047619047619, 0.9634)

In [453]:
pl_list29 = []

for i in range(0, 210):
    count = 0
    p29 = []
    for j in compar29:
        if j:
            p29.append(pl_list28[i][count])
        else:
            p29.append(False)
            
        count += 1
            
    pl_list29.append(p29)

org_list29 = []

for i in range(0, 5000):
    count = 0
    o29 = []
    for j in compar29:
        if j:
            o29.append(org_list28[i][count]) 
        else:
            o29.append(False)
           
        count += 1
            
    org_list29.append(o29)

In [466]:
pl_num29 = np.asarray(pl_list29)
org_num29 = np.asarray(org_list29)

reg_param = 10
pp30 = PlastPredictor(reg_param)
pp30_model = pp30.fit_model(pl_num29, org_num29)
org_acc30 = pp30.predict(org_num29, type='binary', class_id='neg')
pl_acc30 = pp30.predict(pl_num29, type='binary', class_id='pos')

z7 = pp30.clf.coef_

compar30 = z7[0] != 0
compar30 = compar30.tolist()

In [467]:
np.count_nonzero(z7)

22

In [468]:
pl_list30 = []

for i in range(0, 210):
    count = 0
    p30 = []
    for j in compar30:
        if j:
            p30.append(pl_list29[i][count])
        else:
            p30.append(False)
            
        count += 1
            
    pl_list30.append(p30)

org_list30 = []

for i in range(0, 5000):
    count = 0
    o30 = []
    for j in compar30:
        if j:
            o30.append(org_list29[i][count]) 
        else:
            o30.append(False)
           
        count += 1
            
    org_list30.append(o30)

In [479]:
pl_num30 = np.asarray(pl_list30)
org_num30 = np.asarray(org_list30)

reg_param = 10
pp31 = PlastPredictor(reg_param)
pp31_model = pp31.fit_model(pl_num30, org_num30)
org_acc31 = pp31.predict(org_num30, type='binary', class_id='neg')
pl_acc31 = pp31.predict(pl_num30, type='binary', class_id='pos')

z8 = pp31.clf.coef_

compar31 = z8[0] != 0
compar31 = compar31.tolist()

In [480]:
np.count_nonzero(z8), org_acc31, pl_acc31

(22, 0.965, 0.9333333333333333)

In [481]:
pl_list31 = []

for i in range(0, 210):
    count = 0
    p31 = []
    for j in compar31:
        if j:
            p31.append(pl_list30[i][count])
        else:
            p31.append(False)
            
        count += 1
            
    pl_list31.append(p31)

org_list31 = []

for i in range(0, 5000):
    count = 0
    o31 = []
    for j in compar31:
        if j:
            o31.append(org_list30[i][count]) 
        else:
            o31.append(False)
           
        count += 1
            
    org_list31.append(o31)

In [486]:
pl_num31 = np.asarray(pl_list31)
org_num31 = np.asarray(org_list31)

reg_param = 10
pp32 = PlastPredictor(reg_param)
pp32_model = pp32.fit_model(pl_num31, org_num31)
org_acc32 = pp32.predict(org_num31, type='binary', class_id='neg')
pl_acc32 = pp32.predict(pl_num31, type='binary', class_id='pos')

z9 = pp32.clf.coef_

compar32 = z9[0] != 0
compar32 = compar32.tolist()

In [487]:
np.count_nonzero(z9)

22

In [488]:
pl_list32 = []

for i in range(0, 210):
    count = 0
    p32 = []
    for j in compar32:
        if j:
            p32.append(pl_list31[i][count])
        else:
            p32.append(False)
            
        count += 1
            
    pl_list32.append(p32)

org_list32 = []

for i in range(0, 5000):
    count = 0
    o32 = []
    for j in compar32:
        if j:
            o32.append(org_list31[i][count]) 
        else:
            o32.append(False)
           
        count += 1
            
    org_list32.append(o32)

In [491]:
pl_num32 = np.asarray(pl_list32)
org_num32 = np.asarray(org_list32)

reg_param = 10
pp33 = PlastPredictor(reg_param)
pp33_model = pp33.fit_model(pl_num32, org_num32)
org_acc33 = pp33.predict(org_num32, type='binary', class_id='neg')
pl_acc33 = pp33.predict(pl_num32, type='binary', class_id='pos')

z10 = pp33.clf.coef_

compar33 = z10[0] != 0
compar33 = compar33.tolist()

In [492]:
np.count_nonzero(z10)

21

In [493]:
pl_list33 = []

for i in range(0, 210):
    count = 0
    p33 = []
    for j in compar33:
        if j:
            p33.append(pl_list32[i][count])
        else:
            p33.append(False)
            
        count += 1
            
    pl_list33.append(p33)

org_list33 = []

for i in range(0, 5000):
    count = 0
    o33 = []
    for j in compar33:
        if j:
            o33.append(org_list32[i][count]) 
        else:
            o33.append(False)
           
        count += 1
            
    org_list33.append(o33)

In [502]:
pl_num33 = np.asarray(pl_list33)
org_num33 = np.asarray(org_list33)

reg_param = 10
pp34 = PlastPredictor(reg_param)
pp34_model = pp34.fit_model(pl_num33, org_num33)
org_acc34 = pp34.predict(org_num33, type='binary', class_id='neg')
pl_acc34 = pp34.predict(pl_num33, type='binary', class_id='pos')

z11 = pp34.clf.coef_

compar34 = z11[0] != 0
compar34 = compar34.tolist()

In [503]:
np.count_nonzero(z11)

21

In [504]:
pl_list34 = []

for i in range(0, 210):
    count = 0
    p34 = []
    for j in compar34:
        if j:
            p34.append(pl_list33[i][count])
        else:
            p34.append(False)
            
        count += 1
            
    pl_list34.append(p34)

org_list34 = []

for i in range(0, 5000):
    count = 0
    o34 = []
    for j in compar34:
        if j:
            o34.append(org_list33[i][count]) 
        else:
            o34.append(False)
           
        count += 1
            
    org_list34.append(o34)

In [509]:
pl_num34 = np.asarray(pl_list34)
org_num34 = np.asarray(org_list34)

reg_param = 10
pp35 = PlastPredictor(reg_param)
pp35_model = pp35.fit_model(pl_num34, org_num34)
org_acc35 = pp35.predict(org_num34, type='binary', class_id='neg')
pl_acc35 = pp35.predict(pl_num34, type='binary', class_id='pos')

z12 = pp35.clf.coef_

compar35 = z12[0] != 0
compar35 = compar35.tolist()

In [510]:
np.count_nonzero(z12)

21

In [511]:
pl_list35 = []

for i in range(0, 210):
    count = 0
    p35 = []
    for j in compar35:
        if j:
            p35.append(pl_list34[i][count])
        else:
            p35.append(False)
            
        count += 1
            
    pl_list35.append(p35)

org_list35 = []

for i in range(0, 5000):
    count = 0
    o35 = []
    for j in compar35:
        if j:
            o35.append(org_list34[i][count]) 
        else:
            o35.append(False)
           
        count += 1
            
    org_list35.append(o35)

In [518]:
pl_num35 = np.asarray(pl_list35)
org_num35 = np.asarray(org_list35)

reg_param = 10
pp36 = PlastPredictor(reg_param)
pp36_model = pp36.fit_model(pl_num35, org_num35)
org_acc36 = pp36.predict(org_num35, type='binary', class_id='neg')
pl_acc36 = pp36.predict(pl_num35, type='binary', class_id='pos')

z13 = pp36.clf.coef_

compar36 = z13[0] != 0
compar36 = compar36.tolist()

In [519]:
np.count_nonzero(z13)

21

In [520]:
len(pl_list35[0])

195

In [521]:
pl_list36 = []

for i in range(0, 210):
    count = 0
    p36 = []
    for j in compar36:
        if j:
            p36.append(pl_list35[i][count])
        else:
            p36.append(False)
            
        count += 1
            
    pl_list36.append(p36)

org_list36 = []

for i in range(0, 5000):
    count = 0
    o36 = []
    for j in compar36:
        if j:
            o36.append(org_list35[i][count]) 
        else:
            o36.append(False)
           
        count += 1
            
    org_list36.append(o36)

In [522]:
pl_num36 = np.asarray(pl_list36)
org_num36 = np.asarray(org_list36)

reg_param = 10
pp37 = PlastPredictor(reg_param)
pp37_model = pp37.fit_model(pl_num36, org_num36)
org_acc37 = pp37.predict(org_num36, type='binary', class_id='neg')
pl_acc37 = pp37.predict(pl_num36, type='binary', class_id='pos')

z14 = pp37.clf.coef_

compar37 = z14[0] != 0
compar37 = compar37.tolist()

In [523]:
np.count_nonzero(z14)

21

In [524]:
pl_list37 = []

for i in range(0, 210):
    count = 0
    p37 = []
    for j in compar37:
        if j:
            p37.append(pl_list36[i][count])
        else:
            p37.append(False)
            
        count += 1
            
    pl_list37.append(p37)

org_list37 = []

for i in range(0, 5000):
    count = 0
    o37 = []
    for j in compar37:
        if j:
            o37.append(org_list36[i][count]) 
        else:
            o37.append(False)
           
        count += 1
            
    org_list37.append(o37)

In [527]:
pl_num37 = np.asarray(pl_list37)
org_num37 = np.asarray(org_list37)

reg_param = 10
pp38 = PlastPredictor(reg_param)
pp38_model = pp38.fit_model(pl_num37, org_num37)
org_acc38 = pp38.predict(org_num37, type='binary', class_id='neg')
pl_acc38 = pp38.predict(pl_num37, type='binary', class_id='pos')

z15 = pp38.clf.coef_

compar38 = z15[0] != 0
compar38 = compar38.tolist()

In [528]:
np.count_nonzero(z15)

21

In [529]:
pl_list37[0]

[11.863074963596551,
 -0.0444314061878166,
 False,
 False,
 False,
 0.0,
 False,
 False,
 False,
 False,
 False,
 3.555746631440125,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 0.0,
 False,
 False,
 False,
 False,
 False,
 False,
 0.0,
 False,
 False,
 False,
 False,
 False,
 11.938610575903699,
 False,
 False,
 False,
 False,
 13.213763929025836,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 25.683286491704038,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 0.0,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 0.0,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 21.0,
 False,
 False,
 False,
 0.0,
 False,
 False,
 False,
 False,
 0.0,
 False,
 False,
 False,
 False,
 False,
 

In [530]:
df1=pd.DataFrame(data=pl_list37[0])

In [538]:
df1.to_excel('feature.xls')

In [533]:
pl_list38 = []

for i in range(0, 210):
    count = 0
    p38 = []
    for j in compar38:
        if j:
            p38.append(pl_list37[i][count])
        else:
            p38.append(False)
            
        count += 1
            
    pl_list38.append(p38)

org_list38 = []

for i in range(0, 5000):
    count = 0
    o38 = []
    for j in compar38:
        if j:
            o38.append(org_list37[i][count]) 
        else:
            o38.append(False)
           
        count += 1
            
    org_list38.append(o38)

In [536]:
pl_num38 = np.asarray(pl_list38)
org_num38 = np.asarray(org_list38)

reg_param = 10
pp39 = PlastPredictor(reg_param)
pp39_model = pp39.fit_model(pl_num38, org_num38)
org_acc39 = pp39.predict(org_num38, type='binary', class_id='neg')
pl_acc39 = pp39.predict(pl_num38, type='binary', class_id='pos')

z16 = pp39.clf.coef_

compar39 = z16[0] != 0
compar39 = compar39.tolist()

In [537]:
np.count_nonzero(z16)

21

In [540]:
pl_list38[0] == pl_list37

False

In [541]:
df1['pl_list38'] = pl_list38[0]

In [546]:
df1['org_list37'] = org_list37[0]

this is where we save the stuff to an excel file.

In [565]:
df1.to_excel('feature.xls')