In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import f1_score, precision_recall_curve, roc_curve, roc_auc_score
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np

Matplotlib is building the font cache; this may take a moment.


In [2]:
df=pd.read_csv('grant_data_imb.csv')

In [3]:
df.sample()

Unnamed: 0,Grant.Status,Sponsor.Code,Grant.Category.Code,Contract.Value.Band...see.note.A,RFCD.Code.1,RFCD.Percentage.1,RFCD.Code.2,RFCD.Percentage.2,RFCD.Code.3,RFCD.Percentage.3,...,Dept.No..1,Faculty.No..1,With.PHD.1,No..of.Years.in.Uni.at.Time.of.Grant.1,Number.of.Successful.Grant.1,Number.of.Unsuccessful.Grant.1,A..1,A.1,B.1,C.1
2733,0,62B,10B,B,320299.0,80.0,321006.0,20.0,0.0,0.0,...,1258.0,34.0,Yes,>=0 to 5,0.0,5.0,2.0,1.0,1.0,0.0


In [4]:
pd.set_option('display.max_columns',None)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4113 entries, 0 to 4112
Data columns (total 39 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   Grant.Status                            4113 non-null   int64  
 1   Sponsor.Code                            3856 non-null   object 
 2   Grant.Category.Code                     3856 non-null   object 
 3   Contract.Value.Band...see.note.A        1953 non-null   object 
 4   RFCD.Code.1                             3853 non-null   float64
 5   RFCD.Percentage.1                       3853 non-null   float64
 6   RFCD.Code.2                             3853 non-null   float64
 7   RFCD.Percentage.2                       3853 non-null   float64
 8   RFCD.Code.3                             3853 non-null   float64
 9   RFCD.Percentage.3                       3853 non-null   float64
 10  RFCD.Code.4                             3853 non-null   floa

In [5]:
target = df['Grant.Status']
features = df.drop(['Grant.Status'], axis=1)

In [6]:
target.value_counts()

Grant.Status
0    3259
1     854
Name: count, dtype: int64

In [7]:
def upsample(features, target, repaet=10):
    features_zeros=features[target==0]
    features_ones=features[target==1]
    target_zeros=target[target==0]
    target_ones=target[target==1]
    if len(target_ones)>len(target_zeros):
        repaet=round(len(target_ones)/len(target_zeros))
        features_upsampled=pd.concat([features_ones]+ [features_zeros]*repaet)
        target_upsampled=pd.concat([target_ones]+[target_zeros]*repaet)
    else:
        repaet=round(len(target_zeros)/len(target_ones))
        features_upsampled=pd.concat([features_zeros]+ [features_ones]*repaet)
        target_upsampled=pd.concat([target_zeros]+[target_ones]*repaet)
        
    features_upsampled, target_upsampled=shuffle(features_upsampled, target_upsampled, random_state=23)
    return features_upsampled, target_upsampled


In [8]:
features_train_upsampled, target_train_upsampled=upsample(features, target)

In [9]:
target_train_upsampled.value_counts()

Grant.Status
1    3416
0    3259
Name: count, dtype: int64

In [21]:
# for column in features.columns:
#     if np.issubdtype(features[column].dtype, np.float64):
#         newcolumn=f'(column)_0'
#         features[newcolumn]=features[column].fillna(0)
#         features[column].fillna(features[column].mean(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  features[column].fillna(features[column].mean(), inplace=True)


In [22]:
features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4113 entries, 0 to 4112
Data columns (total 39 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   Sponsor.Code                            4113 non-null   object 
 1   Grant.Category.Code                     4113 non-null   object 
 2   Contract.Value.Band...see.note.A        4113 non-null   object 
 3   RFCD.Code.1                             4113 non-null   float64
 4   RFCD.Percentage.1                       4113 non-null   float64
 5   RFCD.Code.2                             4113 non-null   float64
 6   RFCD.Percentage.2                       4113 non-null   float64
 7   RFCD.Code.3                             4113 non-null   float64
 8   RFCD.Percentage.3                       4113 non-null   float64
 9   RFCD.Code.4                             4113 non-null   float64
 10  RFCD.Percentage.4                       4113 non-null   floa

In [23]:
for col in features.columns:
    if (features[col].dtype=='object'):
        print(col)
        print(features[col].value_counts())
        print('--------------')

Sponsor.Code
Sponsor.Code
4D               1006
2B                915
21A               375
not indicated     257
24D               114
                 ... 
199C                1
313A                1
373A                1
298B                1
225A                1
Name: count, Length: 227, dtype: int64
--------------
Grant.Category.Code
Grant.Category.Code
10A              2050
30B               707
50A               375
not indicated     257
10B               211
20C               180
30C               147
30D                93
20A                49
30G                35
30E                 5
30A                 2
40C                 1
30F                 1
Name: count, dtype: int64
--------------
Contract.Value.Band...see.note.A
Contract.Value.Band...see.note.A
not indicated    2160
A                 961
B                 305
C                 159
D                 151
G                 135
E                  98
F                  75
H                  33
J                  18
I  

In [24]:
features['With.PHD.1'].fillna('No', inplace=True)
for col in features.columns:
    if (features[col].dtype=='object'):
        features[col].fillna('not indicated', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  features['With.PHD.1'].fillna('No', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  features[col].fillna('not indicated', inplace=True)


In [25]:
features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4113 entries, 0 to 4112
Data columns (total 39 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   Sponsor.Code                            4113 non-null   object 
 1   Grant.Category.Code                     4113 non-null   object 
 2   Contract.Value.Band...see.note.A        4113 non-null   object 
 3   RFCD.Code.1                             4113 non-null   float64
 4   RFCD.Percentage.1                       4113 non-null   float64
 5   RFCD.Code.2                             4113 non-null   float64
 6   RFCD.Percentage.2                       4113 non-null   float64
 7   RFCD.Code.3                             4113 non-null   float64
 8   RFCD.Percentage.3                       4113 non-null   float64
 9   RFCD.Code.4                             4113 non-null   float64
 10  RFCD.Percentage.4                       4113 non-null   floa

In [26]:
features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4113 entries, 0 to 4112
Data columns (total 39 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   Sponsor.Code                            4113 non-null   object 
 1   Grant.Category.Code                     4113 non-null   object 
 2   Contract.Value.Band...see.note.A        4113 non-null   object 
 3   RFCD.Code.1                             4113 non-null   float64
 4   RFCD.Percentage.1                       4113 non-null   float64
 5   RFCD.Code.2                             4113 non-null   float64
 6   RFCD.Percentage.2                       4113 non-null   float64
 7   RFCD.Code.3                             4113 non-null   float64
 8   RFCD.Percentage.3                       4113 non-null   float64
 9   RFCD.Code.4                             4113 non-null   float64
 10  RFCD.Percentage.4                       4113 non-null   floa

In [27]:
features_one=pd.get_dummies(features, drop_first=True)
features_one.sample()

Unnamed: 0,RFCD.Code.1,RFCD.Percentage.1,RFCD.Code.2,RFCD.Percentage.2,RFCD.Code.3,RFCD.Percentage.3,RFCD.Code.4,RFCD.Percentage.4,RFCD.Code.5,RFCD.Percentage.5,SEO.Code.1,SEO.Percentage.1,SEO.Code.2,SEO.Percentage.2,SEO.Code.3,SEO.Percentage.3,SEO.Code.4,SEO.Percentage.4,SEO.Code.5,SEO.Percentage.5,Person.ID.1,Year.of.Birth.1,Dept.No..1,Faculty.No..1,Number.of.Successful.Grant.1,Number.of.Unsuccessful.Grant.1,A..1,A.1,B.1,C.1,(column)_0,Sponsor.Code_101A,Sponsor.Code_103C,Sponsor.Code_105A,Sponsor.Code_107C,Sponsor.Code_111C,Sponsor.Code_112D,Sponsor.Code_113A,Sponsor.Code_11C,Sponsor.Code_120D,Sponsor.Code_126B,Sponsor.Code_128D,Sponsor.Code_12D,Sponsor.Code_130B,Sponsor.Code_132D,Sponsor.Code_133A,Sponsor.Code_135C,Sponsor.Code_136D,Sponsor.Code_137A,Sponsor.Code_138B,Sponsor.Code_139C,Sponsor.Code_13A,Sponsor.Code_141A,Sponsor.Code_143C,Sponsor.Code_144D,Sponsor.Code_145A,Sponsor.Code_146B,Sponsor.Code_147C,Sponsor.Code_148D,Sponsor.Code_149A,Sponsor.Code_14B,Sponsor.Code_150B,Sponsor.Code_151C,Sponsor.Code_153A,Sponsor.Code_154B,Sponsor.Code_156D,Sponsor.Code_157A,Sponsor.Code_158B,Sponsor.Code_159C,Sponsor.Code_15C,Sponsor.Code_160D,Sponsor.Code_161A,Sponsor.Code_163C,Sponsor.Code_164D,Sponsor.Code_165A,Sponsor.Code_166B,Sponsor.Code_168D,Sponsor.Code_169A,Sponsor.Code_16D,Sponsor.Code_170B,Sponsor.Code_172D,Sponsor.Code_173A,Sponsor.Code_174B,Sponsor.Code_176D,Sponsor.Code_177A,Sponsor.Code_178B,Sponsor.Code_179C,Sponsor.Code_180D,Sponsor.Code_183C,Sponsor.Code_184D,Sponsor.Code_187C,Sponsor.Code_188D,Sponsor.Code_18B,Sponsor.Code_191C,Sponsor.Code_193A,Sponsor.Code_194B,Sponsor.Code_195C,Sponsor.Code_196D,Sponsor.Code_197A,Sponsor.Code_198B,Sponsor.Code_199C,Sponsor.Code_1A,Sponsor.Code_200D,Sponsor.Code_201A,Sponsor.Code_202B,Sponsor.Code_203C,Sponsor.Code_204D,Sponsor.Code_205A,Sponsor.Code_206B,Sponsor.Code_208D,Sponsor.Code_20D,Sponsor.Code_210B,Sponsor.Code_212D,Sponsor.Code_214B,Sponsor.Code_215C,Sponsor.Code_216D,Sponsor.Code_219C,Sponsor.Code_21A,Sponsor.Code_221A,Sponsor.Code_222B,Sponsor.Code_223C,Sponsor.Code_225A,Sponsor.Code_226B,Sponsor.Code_227C,Sponsor.Code_228D,Sponsor.Code_229A,Sponsor.Code_230B,Sponsor.Code_232D,Sponsor.Code_234B,Sponsor.Code_235C,Sponsor.Code_238B,Sponsor.Code_23C,Sponsor.Code_241A,Sponsor.Code_242B,Sponsor.Code_244D,Sponsor.Code_245A,Sponsor.Code_247C,Sponsor.Code_24D,Sponsor.Code_250B,Sponsor.Code_252D,Sponsor.Code_255C,Sponsor.Code_256D,Sponsor.Code_257A,Sponsor.Code_258B,Sponsor.Code_259C,Sponsor.Code_260D,Sponsor.Code_262B,Sponsor.Code_265A,Sponsor.Code_266B,Sponsor.Code_267C,Sponsor.Code_269A,Sponsor.Code_26B,Sponsor.Code_270B,Sponsor.Code_273A,Sponsor.Code_274B,Sponsor.Code_277A,Sponsor.Code_279C,Sponsor.Code_27C,Sponsor.Code_281A,Sponsor.Code_282B,Sponsor.Code_284D,Sponsor.Code_285A,Sponsor.Code_286B,Sponsor.Code_289A,Sponsor.Code_28D,Sponsor.Code_294B,Sponsor.Code_295C,Sponsor.Code_298B,Sponsor.Code_299C,Sponsor.Code_29A,Sponsor.Code_2B,Sponsor.Code_305A,Sponsor.Code_307C,Sponsor.Code_308D,Sponsor.Code_309A,Sponsor.Code_310B,Sponsor.Code_311C,Sponsor.Code_313A,Sponsor.Code_315C,Sponsor.Code_317A,Sponsor.Code_318B,Sponsor.Code_324D,Sponsor.Code_325A,Sponsor.Code_326B,Sponsor.Code_328D,Sponsor.Code_32D,Sponsor.Code_330B,Sponsor.Code_331C,Sponsor.Code_33A,Sponsor.Code_342B,Sponsor.Code_346B,Sponsor.Code_347C,Sponsor.Code_349A,Sponsor.Code_34B,Sponsor.Code_357A,Sponsor.Code_362B,Sponsor.Code_36D,Sponsor.Code_370B,Sponsor.Code_373A,Sponsor.Code_37A,Sponsor.Code_38B,Sponsor.Code_39C,Sponsor.Code_3C,Sponsor.Code_40D,Sponsor.Code_415C,Sponsor.Code_427C,Sponsor.Code_429A,Sponsor.Code_42B,Sponsor.Code_433A,Sponsor.Code_434B,Sponsor.Code_435C,Sponsor.Code_437A,Sponsor.Code_47C,Sponsor.Code_48D,Sponsor.Code_49A,Sponsor.Code_4D,Sponsor.Code_51C,Sponsor.Code_52D,Sponsor.Code_53A,Sponsor.Code_54B,Sponsor.Code_55C,Sponsor.Code_56D,Sponsor.Code_58B,Sponsor.Code_59C,Sponsor.Code_5A,Sponsor.Code_60D,Sponsor.Code_62B,Sponsor.Code_63C,Sponsor.Code_65A,Sponsor.Code_66B,Sponsor.Code_67C,Sponsor.Code_68D,Sponsor.Code_69A,Sponsor.Code_6B,Sponsor.Code_72D,Sponsor.Code_73A,Sponsor.Code_74B,Sponsor.Code_75C,Sponsor.Code_77A,Sponsor.Code_78B,Sponsor.Code_7C,Sponsor.Code_80D,Sponsor.Code_82B,Sponsor.Code_83C,Sponsor.Code_84D,Sponsor.Code_85A,Sponsor.Code_86B,Sponsor.Code_87C,Sponsor.Code_89A,Sponsor.Code_90B,Sponsor.Code_91C,Sponsor.Code_93A,Sponsor.Code_94B,Sponsor.Code_95C,Sponsor.Code_97A,Sponsor.Code_9A,Sponsor.Code_not indicated,Grant.Category.Code_10B,Grant.Category.Code_20A,Grant.Category.Code_20C,Grant.Category.Code_30A,Grant.Category.Code_30B,Grant.Category.Code_30C,Grant.Category.Code_30D,Grant.Category.Code_30E,Grant.Category.Code_30F,Grant.Category.Code_30G,Grant.Category.Code_40C,Grant.Category.Code_50A,Grant.Category.Code_not indicated,Contract.Value.Band...see.note.A_B,Contract.Value.Band...see.note.A_C,Contract.Value.Band...see.note.A_D,Contract.Value.Band...see.note.A_E,Contract.Value.Band...see.note.A_F,Contract.Value.Band...see.note.A_G,Contract.Value.Band...see.note.A_H,Contract.Value.Band...see.note.A_I,Contract.Value.Band...see.note.A_J,Contract.Value.Band...see.note.A_K,Contract.Value.Band...see.note.A_L,Contract.Value.Band...see.note.A_M,Contract.Value.Band...see.note.A_O,Contract.Value.Band...see.note.A_P,Contract.Value.Band...see.note.A_Q,Contract.Value.Band...see.note.A_not indicated,Role.1_DELEGATED_RESEARCHER,Role.1_EXTERNAL_ADVISOR,Role.1_EXT_CHIEF_INVESTIGATOR,Role.1_HONVISIT,Role.1_PRINCIPAL_SUPERVISOR,Role.1_STUD_CHIEF_INVESTIGATOR,Role.1_not indicated,Country.of.Birth.1_Australia,Country.of.Birth.1_Eastern Europe,Country.of.Birth.1_Great Britain,Country.of.Birth.1_Middle East and Africa,Country.of.Birth.1_New Zealand,Country.of.Birth.1_North America,Country.of.Birth.1_South Africa,Country.of.Birth.1_The Americas,Country.of.Birth.1_Western Europe,Country.of.Birth.1_not indicated,Home.Language.1_Other,Home.Language.1_not indicated,With.PHD.1_Yes,No..of.Years.in.Uni.at.Time.of.Grant.1_>5 to 10,No..of.Years.in.Uni.at.Time.of.Grant.1_>=0 to 5,No..of.Years.in.Uni.at.Time.of.Grant.1_Less than 0,No..of.Years.in.Uni.at.Time.of.Grant.1_more than 15,No..of.Years.in.Uni.at.Time.of.Grant.1_not indicated
1024,270199.0,70.0,270201.0,20.0,270102.0,10.0,0.0,0.0,0.0,0.0,730108.0,50.0,730199.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,497907.0,1970.0,2578.0,25.0,0.0,0.0,3.0,1.0,1.0,0.0,0.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,True,False,False,True,False,False


In [28]:
features_train, features_test, target_train, target_test = train_test_split(features_one, target, test_size=0.25, random_state=23)


In [29]:
scaler = StandardScaler()
scaler.fit(features_train)
features_train_scaled = scaler.transform(features_train)
features_test_scaled = scaler.transform(features_test)

In [30]:
pd.DataFrame(features_test_scaled,columns=features_train.columns)

Unnamed: 0,RFCD.Code.1,RFCD.Percentage.1,RFCD.Code.2,RFCD.Percentage.2,RFCD.Code.3,RFCD.Percentage.3,RFCD.Code.4,RFCD.Percentage.4,RFCD.Code.5,RFCD.Percentage.5,SEO.Code.1,SEO.Percentage.1,SEO.Code.2,SEO.Percentage.2,SEO.Code.3,SEO.Percentage.3,SEO.Code.4,SEO.Percentage.4,SEO.Code.5,SEO.Percentage.5,Person.ID.1,Year.of.Birth.1,Dept.No..1,Faculty.No..1,Number.of.Successful.Grant.1,Number.of.Unsuccessful.Grant.1,A..1,A.1,B.1,C.1,(column)_0,Sponsor.Code_101A,Sponsor.Code_103C,Sponsor.Code_105A,Sponsor.Code_107C,Sponsor.Code_111C,Sponsor.Code_112D,Sponsor.Code_113A,Sponsor.Code_11C,Sponsor.Code_120D,Sponsor.Code_126B,Sponsor.Code_128D,Sponsor.Code_12D,Sponsor.Code_130B,Sponsor.Code_132D,Sponsor.Code_133A,Sponsor.Code_135C,Sponsor.Code_136D,Sponsor.Code_137A,Sponsor.Code_138B,Sponsor.Code_139C,Sponsor.Code_13A,Sponsor.Code_141A,Sponsor.Code_143C,Sponsor.Code_144D,Sponsor.Code_145A,Sponsor.Code_146B,Sponsor.Code_147C,Sponsor.Code_148D,Sponsor.Code_149A,Sponsor.Code_14B,Sponsor.Code_150B,Sponsor.Code_151C,Sponsor.Code_153A,Sponsor.Code_154B,Sponsor.Code_156D,Sponsor.Code_157A,Sponsor.Code_158B,Sponsor.Code_159C,Sponsor.Code_15C,Sponsor.Code_160D,Sponsor.Code_161A,Sponsor.Code_163C,Sponsor.Code_164D,Sponsor.Code_165A,Sponsor.Code_166B,Sponsor.Code_168D,Sponsor.Code_169A,Sponsor.Code_16D,Sponsor.Code_170B,Sponsor.Code_172D,Sponsor.Code_173A,Sponsor.Code_174B,Sponsor.Code_176D,Sponsor.Code_177A,Sponsor.Code_178B,Sponsor.Code_179C,Sponsor.Code_180D,Sponsor.Code_183C,Sponsor.Code_184D,Sponsor.Code_187C,Sponsor.Code_188D,Sponsor.Code_18B,Sponsor.Code_191C,Sponsor.Code_193A,Sponsor.Code_194B,Sponsor.Code_195C,Sponsor.Code_196D,Sponsor.Code_197A,Sponsor.Code_198B,Sponsor.Code_199C,Sponsor.Code_1A,Sponsor.Code_200D,Sponsor.Code_201A,Sponsor.Code_202B,Sponsor.Code_203C,Sponsor.Code_204D,Sponsor.Code_205A,Sponsor.Code_206B,Sponsor.Code_208D,Sponsor.Code_20D,Sponsor.Code_210B,Sponsor.Code_212D,Sponsor.Code_214B,Sponsor.Code_215C,Sponsor.Code_216D,Sponsor.Code_219C,Sponsor.Code_21A,Sponsor.Code_221A,Sponsor.Code_222B,Sponsor.Code_223C,Sponsor.Code_225A,Sponsor.Code_226B,Sponsor.Code_227C,Sponsor.Code_228D,Sponsor.Code_229A,Sponsor.Code_230B,Sponsor.Code_232D,Sponsor.Code_234B,Sponsor.Code_235C,Sponsor.Code_238B,Sponsor.Code_23C,Sponsor.Code_241A,Sponsor.Code_242B,Sponsor.Code_244D,Sponsor.Code_245A,Sponsor.Code_247C,Sponsor.Code_24D,Sponsor.Code_250B,Sponsor.Code_252D,Sponsor.Code_255C,Sponsor.Code_256D,Sponsor.Code_257A,Sponsor.Code_258B,Sponsor.Code_259C,Sponsor.Code_260D,Sponsor.Code_262B,Sponsor.Code_265A,Sponsor.Code_266B,Sponsor.Code_267C,Sponsor.Code_269A,Sponsor.Code_26B,Sponsor.Code_270B,Sponsor.Code_273A,Sponsor.Code_274B,Sponsor.Code_277A,Sponsor.Code_279C,Sponsor.Code_27C,Sponsor.Code_281A,Sponsor.Code_282B,Sponsor.Code_284D,Sponsor.Code_285A,Sponsor.Code_286B,Sponsor.Code_289A,Sponsor.Code_28D,Sponsor.Code_294B,Sponsor.Code_295C,Sponsor.Code_298B,Sponsor.Code_299C,Sponsor.Code_29A,Sponsor.Code_2B,Sponsor.Code_305A,Sponsor.Code_307C,Sponsor.Code_308D,Sponsor.Code_309A,Sponsor.Code_310B,Sponsor.Code_311C,Sponsor.Code_313A,Sponsor.Code_315C,Sponsor.Code_317A,Sponsor.Code_318B,Sponsor.Code_324D,Sponsor.Code_325A,Sponsor.Code_326B,Sponsor.Code_328D,Sponsor.Code_32D,Sponsor.Code_330B,Sponsor.Code_331C,Sponsor.Code_33A,Sponsor.Code_342B,Sponsor.Code_346B,Sponsor.Code_347C,Sponsor.Code_349A,Sponsor.Code_34B,Sponsor.Code_357A,Sponsor.Code_362B,Sponsor.Code_36D,Sponsor.Code_370B,Sponsor.Code_373A,Sponsor.Code_37A,Sponsor.Code_38B,Sponsor.Code_39C,Sponsor.Code_3C,Sponsor.Code_40D,Sponsor.Code_415C,Sponsor.Code_427C,Sponsor.Code_429A,Sponsor.Code_42B,Sponsor.Code_433A,Sponsor.Code_434B,Sponsor.Code_435C,Sponsor.Code_437A,Sponsor.Code_47C,Sponsor.Code_48D,Sponsor.Code_49A,Sponsor.Code_4D,Sponsor.Code_51C,Sponsor.Code_52D,Sponsor.Code_53A,Sponsor.Code_54B,Sponsor.Code_55C,Sponsor.Code_56D,Sponsor.Code_58B,Sponsor.Code_59C,Sponsor.Code_5A,Sponsor.Code_60D,Sponsor.Code_62B,Sponsor.Code_63C,Sponsor.Code_65A,Sponsor.Code_66B,Sponsor.Code_67C,Sponsor.Code_68D,Sponsor.Code_69A,Sponsor.Code_6B,Sponsor.Code_72D,Sponsor.Code_73A,Sponsor.Code_74B,Sponsor.Code_75C,Sponsor.Code_77A,Sponsor.Code_78B,Sponsor.Code_7C,Sponsor.Code_80D,Sponsor.Code_82B,Sponsor.Code_83C,Sponsor.Code_84D,Sponsor.Code_85A,Sponsor.Code_86B,Sponsor.Code_87C,Sponsor.Code_89A,Sponsor.Code_90B,Sponsor.Code_91C,Sponsor.Code_93A,Sponsor.Code_94B,Sponsor.Code_95C,Sponsor.Code_97A,Sponsor.Code_9A,Sponsor.Code_not indicated,Grant.Category.Code_10B,Grant.Category.Code_20A,Grant.Category.Code_20C,Grant.Category.Code_30A,Grant.Category.Code_30B,Grant.Category.Code_30C,Grant.Category.Code_30D,Grant.Category.Code_30E,Grant.Category.Code_30F,Grant.Category.Code_30G,Grant.Category.Code_40C,Grant.Category.Code_50A,Grant.Category.Code_not indicated,Contract.Value.Band...see.note.A_B,Contract.Value.Band...see.note.A_C,Contract.Value.Band...see.note.A_D,Contract.Value.Band...see.note.A_E,Contract.Value.Band...see.note.A_F,Contract.Value.Band...see.note.A_G,Contract.Value.Band...see.note.A_H,Contract.Value.Band...see.note.A_I,Contract.Value.Band...see.note.A_J,Contract.Value.Band...see.note.A_K,Contract.Value.Band...see.note.A_L,Contract.Value.Band...see.note.A_M,Contract.Value.Band...see.note.A_O,Contract.Value.Band...see.note.A_P,Contract.Value.Band...see.note.A_Q,Contract.Value.Band...see.note.A_not indicated,Role.1_DELEGATED_RESEARCHER,Role.1_EXTERNAL_ADVISOR,Role.1_EXT_CHIEF_INVESTIGATOR,Role.1_HONVISIT,Role.1_PRINCIPAL_SUPERVISOR,Role.1_STUD_CHIEF_INVESTIGATOR,Role.1_not indicated,Country.of.Birth.1_Australia,Country.of.Birth.1_Eastern Europe,Country.of.Birth.1_Great Britain,Country.of.Birth.1_Middle East and Africa,Country.of.Birth.1_New Zealand,Country.of.Birth.1_North America,Country.of.Birth.1_South Africa,Country.of.Birth.1_The Americas,Country.of.Birth.1_Western Europe,Country.of.Birth.1_not indicated,Home.Language.1_Other,Home.Language.1_not indicated,With.PHD.1_Yes,No..of.Years.in.Uni.at.Time.of.Grant.1_>5 to 10,No..of.Years.in.Uni.at.Time.of.Grant.1_>=0 to 5,No..of.Years.in.Uni.at.Time.of.Grant.1_Less than 0,No..of.Years.in.Uni.at.Time.of.Grant.1_more than 15,No..of.Years.in.Uni.at.Time.of.Grant.1_not indicated
0,0.132846,-0.577166,1.038520,0.133354,1.600406,1.138814,-0.152343,-0.139354,-0.069868,-0.065297,0.036715,-0.413459,0.891541,-0.071077,1.545267,1.123830,-0.219869,-0.195251,-0.112847,-0.094575,-0.375098,-1.923609,-2.125591,0.120104,-0.725230,-0.854134,-0.514368,-0.558960,-0.789203,0.409703,0.409703,-0.0541,-0.036037,-0.031204,-0.01801,-0.025474,-0.040298,-0.031204,-0.01801,-0.01801,-0.044151,0.0,-0.040298,-0.01801,-0.01801,-0.036037,-0.040298,-0.025474,-0.047696,-0.01801,-0.025474,-0.036037,-0.050998,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.086683,-0.01801,-0.031204,-0.01801,-0.01801,0.0,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.01801,-0.036037,-0.040298,-0.031204,-0.025474,-0.0541,-0.025474,-0.025474,-0.01801,-0.025474,-0.036037,-0.040298,-0.025474,0.0,-0.01801,0.0,-0.036037,-0.036037,-0.057036,-0.01801,-0.025474,-0.047696,-0.047696,0.0,0.0,-0.025474,-0.01801,-0.0541,-0.01801,-0.031204,0.0,-0.057036,-0.01801,-0.01801,-0.031204,-0.01801,0.0,-0.0541,0.0,-0.025474,-0.082801,-0.031204,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.309756,-0.036037,-0.0541,-0.025474,-0.01801,0.0,-0.025474,-0.047696,-0.031204,-0.01801,-0.025474,-0.025474,0.0,-0.01801,-0.01801,-0.036037,0.0,-0.01801,-0.031204,-0.031204,-0.170379,-0.01801,-0.0541,-0.025474,-0.01801,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.01801,-0.057036,-0.01801,-0.076621,-0.031204,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.050998,-0.031204,-0.01801,-0.01801,-0.01801,-0.01801,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.104001,1.862677,-0.01801,-0.01801,0.0,-0.025474,-0.01801,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.025474,-0.01801,0.0,-0.01801,-0.137224,-0.01801,0.0,-0.078734,0.0,-0.01801,-0.01801,-0.01801,-0.142052,-0.01801,0.0,-0.069911,-0.025474,-0.01801,0.0,-0.025474,-0.01801,-0.047696,-0.152397,-0.01801,-0.01801,-0.01801,-0.031204,-0.01801,-0.01801,-0.01801,0.0,-0.065063,-0.01801,-0.01801,-0.567864,-0.0541,-0.01801,-0.036037,-0.031204,-0.031204,-0.040298,-0.01801,-0.104001,-0.108679,-0.086683,-0.102396,-0.040298,-0.06753,-0.044151,-0.036037,-0.036037,-0.031204,-0.09398,-0.01801,-0.031204,0.0,-0.069911,-0.065063,-0.031204,-0.0625,-0.01801,-0.01801,-0.059829,-0.031204,-0.025474,-0.040298,-0.044151,-0.084763,-0.0541,-0.01801,-0.01801,-0.031204,-0.050998,-0.102396,-0.044151,-0.257663,-0.241535,-0.110196,-0.214786,-0.025474,-0.456579,-0.194126,-0.151280,-0.025474,0.0,-0.092208,-0.01801,-0.309756,-0.257663,-0.277156,-0.205533,-0.192321,-0.160022,-0.13096,-0.186814,-0.088561,-0.044151,-0.072216,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.01801,0.937732,-0.088561,0.0,-0.249698,-0.044151,-0.195024,-0.050998,-0.102396,0.790486,-0.121686,-0.280533,-0.092208,-0.088561,-0.173379,-0.06753,-0.084763,-0.197696,-0.448779,-0.139657,0.324007,0.896975,-0.461238,-0.694248,-0.370928,3.170767,-0.484343
1,0.124280,-0.962671,1.035278,1.740023,-0.662551,-0.608113,-0.152343,-0.139354,-0.069868,-0.065297,0.033867,1.135240,-1.289924,-1.100028,-0.696245,-0.614366,-0.219869,-0.195251,-0.112847,-0.094575,-0.008394,0.004220,-0.008813,0.002571,0.003979,0.007420,-0.003587,0.007908,0.007463,-0.004638,-0.004638,-0.0541,-0.036037,-0.031204,-0.01801,-0.025474,-0.040298,-0.031204,-0.01801,-0.01801,-0.044151,0.0,-0.040298,-0.01801,-0.01801,-0.036037,-0.040298,-0.025474,-0.047696,-0.01801,-0.025474,-0.036037,-0.050998,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.086683,-0.01801,-0.031204,-0.01801,-0.01801,0.0,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.01801,-0.036037,-0.040298,-0.031204,-0.025474,-0.0541,-0.025474,-0.025474,-0.01801,-0.025474,-0.036037,-0.040298,-0.025474,0.0,-0.01801,0.0,-0.036037,-0.036037,-0.057036,-0.01801,-0.025474,-0.047696,-0.047696,0.0,0.0,-0.025474,-0.01801,-0.0541,-0.01801,-0.031204,0.0,-0.057036,-0.01801,-0.01801,-0.031204,-0.01801,0.0,-0.0541,0.0,-0.025474,-0.082801,-0.031204,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.309756,-0.036037,-0.0541,-0.025474,-0.01801,0.0,-0.025474,-0.047696,-0.031204,-0.01801,-0.025474,-0.025474,0.0,-0.01801,-0.01801,-0.036037,0.0,-0.01801,-0.031204,-0.031204,-0.170379,-0.01801,-0.0541,-0.025474,-0.01801,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.01801,-0.057036,-0.01801,-0.076621,-0.031204,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.050998,-0.031204,-0.01801,-0.01801,-0.01801,-0.01801,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.104001,-0.536862,-0.01801,-0.01801,0.0,-0.025474,-0.01801,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.025474,-0.01801,0.0,-0.01801,-0.137224,-0.01801,0.0,-0.078734,0.0,-0.01801,-0.01801,-0.01801,-0.142052,-0.01801,0.0,-0.069911,-0.025474,-0.01801,0.0,-0.025474,-0.01801,-0.047696,6.561794,-0.01801,-0.01801,-0.01801,-0.031204,-0.01801,-0.01801,-0.01801,0.0,-0.065063,-0.01801,-0.01801,-0.567864,-0.0541,-0.01801,-0.036037,-0.031204,-0.031204,-0.040298,-0.01801,-0.104001,-0.108679,-0.086683,-0.102396,-0.040298,-0.06753,-0.044151,-0.036037,-0.036037,-0.031204,-0.09398,-0.01801,-0.031204,0.0,-0.069911,-0.065063,-0.031204,-0.0625,-0.01801,-0.01801,-0.059829,-0.031204,-0.025474,-0.040298,-0.044151,-0.084763,-0.0541,-0.01801,-0.01801,-0.031204,-0.050998,-0.102396,-0.044151,-0.257663,-0.241535,-0.110196,-0.214786,-0.025474,2.190204,-0.194126,-0.151280,-0.025474,0.0,-0.092208,-0.01801,-0.309756,-0.257663,-0.277156,-0.205533,-0.192321,-0.160022,-0.13096,-0.186814,-0.088561,-0.044151,-0.072216,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.01801,-1.066402,-0.088561,0.0,4.004831,-0.044151,-0.195024,-0.050998,-0.102396,-1.265044,-0.121686,-0.280533,-0.092208,-0.088561,-0.173379,-0.06753,-0.084763,-0.197696,2.228269,-0.139657,0.324007,-1.114858,-0.461238,-0.694248,-0.370928,-0.315381,2.064655
2,0.126379,0.964852,-1.023501,-0.937759,-0.662551,-0.608113,-0.152343,-0.139354,-0.069868,-0.065297,0.033808,1.135240,-1.289924,-1.100028,-0.696245,-0.614366,-0.219869,-0.195251,-0.112847,-0.094575,-0.423607,0.256580,0.132020,0.120104,0.663498,0.920555,0.053975,-0.022982,-0.015441,-0.059173,-0.059173,-0.0541,-0.036037,-0.031204,-0.01801,-0.025474,-0.040298,-0.031204,-0.01801,-0.01801,-0.044151,0.0,-0.040298,-0.01801,-0.01801,-0.036037,-0.040298,-0.025474,-0.047696,-0.01801,-0.025474,-0.036037,-0.050998,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.086683,-0.01801,-0.031204,-0.01801,-0.01801,0.0,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.01801,-0.036037,-0.040298,-0.031204,-0.025474,-0.0541,-0.025474,-0.025474,-0.01801,-0.025474,-0.036037,-0.040298,-0.025474,0.0,-0.01801,0.0,-0.036037,-0.036037,-0.057036,-0.01801,-0.025474,-0.047696,-0.047696,0.0,0.0,-0.025474,-0.01801,-0.0541,-0.01801,-0.031204,0.0,-0.057036,-0.01801,-0.01801,-0.031204,-0.01801,0.0,-0.0541,0.0,-0.025474,-0.082801,-0.031204,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.309756,-0.036037,-0.0541,-0.025474,-0.01801,0.0,-0.025474,-0.047696,-0.031204,-0.01801,-0.025474,-0.025474,0.0,-0.01801,-0.01801,-0.036037,0.0,-0.01801,-0.031204,-0.031204,-0.170379,-0.01801,-0.0541,-0.025474,-0.01801,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.01801,-0.057036,-0.01801,-0.076621,-0.031204,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.050998,-0.031204,-0.01801,-0.01801,-0.01801,-0.01801,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.104001,-0.536862,-0.01801,-0.01801,0.0,-0.025474,-0.01801,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.025474,-0.01801,0.0,-0.01801,-0.137224,-0.01801,0.0,-0.078734,0.0,-0.01801,-0.01801,-0.01801,-0.142052,-0.01801,0.0,-0.069911,-0.025474,-0.01801,0.0,-0.025474,-0.01801,-0.047696,-0.152397,-0.01801,-0.01801,-0.01801,-0.031204,-0.01801,-0.01801,-0.01801,0.0,-0.065063,-0.01801,-0.01801,-0.567864,-0.0541,-0.01801,-0.036037,-0.031204,-0.031204,-0.040298,-0.01801,-0.104001,-0.108679,11.536332,-0.102396,-0.040298,-0.06753,-0.044151,-0.036037,-0.036037,-0.031204,-0.09398,-0.01801,-0.031204,0.0,-0.069911,-0.065063,-0.031204,-0.0625,-0.01801,-0.01801,-0.059829,-0.031204,-0.025474,-0.040298,-0.044151,-0.084763,-0.0541,-0.01801,-0.01801,-0.031204,-0.050998,-0.102396,-0.044151,-0.257663,-0.241535,-0.110196,-0.214786,-0.025474,-0.456579,-0.194126,6.610269,-0.025474,0.0,-0.092208,-0.01801,-0.309756,-0.257663,-0.277156,-0.205533,-0.192321,-0.160022,-0.13096,-0.186814,-0.088561,-0.044151,-0.072216,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.01801,0.937732,11.291590,0.0,-0.249698,-0.044151,-0.195024,-0.050998,-0.102396,0.790486,-0.121686,-0.280533,-0.092208,-0.088561,-0.173379,-0.06753,-0.084763,-0.197696,-0.448779,-0.139657,-3.086357,0.896975,-0.461238,1.440408,-0.370928,-0.315381,-0.484343
3,-1.576175,-0.577166,0.518499,0.668911,1.030425,0.265350,-0.152343,-0.139354,-0.069868,-0.065297,1.501955,1.135240,-1.289924,-1.100028,-0.696245,-0.614366,-0.219869,-0.195251,-0.112847,-0.094575,-0.172757,0.801627,1.087163,0.723247,1.357862,-0.499196,1.048575,2.254926,-0.208881,-0.528048,-0.528048,-0.0541,-0.036037,-0.031204,-0.01801,-0.025474,-0.040298,-0.031204,-0.01801,-0.01801,-0.044151,0.0,-0.040298,-0.01801,-0.01801,-0.036037,-0.040298,-0.025474,-0.047696,-0.01801,-0.025474,-0.036037,-0.050998,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.086683,-0.01801,-0.031204,-0.01801,-0.01801,0.0,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.01801,-0.036037,-0.040298,-0.031204,-0.025474,-0.0541,-0.025474,-0.025474,-0.01801,-0.025474,-0.036037,-0.040298,-0.025474,0.0,-0.01801,0.0,-0.036037,-0.036037,-0.057036,-0.01801,-0.025474,-0.047696,-0.047696,0.0,0.0,-0.025474,-0.01801,-0.0541,-0.01801,-0.031204,0.0,-0.057036,-0.01801,-0.01801,-0.031204,-0.01801,0.0,-0.0541,0.0,-0.025474,-0.082801,-0.031204,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.309756,-0.036037,-0.0541,-0.025474,-0.01801,0.0,-0.025474,-0.047696,-0.031204,-0.01801,-0.025474,-0.025474,0.0,-0.01801,-0.01801,-0.036037,0.0,-0.01801,-0.031204,-0.031204,-0.170379,-0.01801,-0.0541,-0.025474,-0.01801,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.01801,-0.057036,-0.01801,-0.076621,-0.031204,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.050998,-0.031204,-0.01801,-0.01801,-0.01801,-0.01801,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.104001,1.862677,-0.01801,-0.01801,0.0,-0.025474,-0.01801,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.025474,-0.01801,0.0,-0.01801,-0.137224,-0.01801,0.0,-0.078734,0.0,-0.01801,-0.01801,-0.01801,-0.142052,-0.01801,0.0,-0.069911,-0.025474,-0.01801,0.0,-0.025474,-0.01801,-0.047696,-0.152397,-0.01801,-0.01801,-0.01801,-0.031204,-0.01801,-0.01801,-0.01801,0.0,-0.065063,-0.01801,-0.01801,-0.567864,-0.0541,-0.01801,-0.036037,-0.031204,-0.031204,-0.040298,-0.01801,-0.104001,-0.108679,-0.086683,-0.102396,-0.040298,-0.06753,-0.044151,-0.036037,-0.036037,-0.031204,-0.09398,-0.01801,-0.031204,0.0,-0.069911,-0.065063,-0.031204,-0.0625,-0.01801,-0.01801,-0.059829,-0.031204,-0.025474,-0.040298,-0.044151,-0.084763,-0.0541,-0.01801,-0.01801,-0.031204,-0.050998,-0.102396,-0.044151,-0.257663,-0.241535,-0.110196,-0.214786,-0.025474,-0.456579,-0.194126,-0.151280,-0.025474,0.0,-0.092208,-0.01801,-0.309756,-0.257663,-0.277156,-0.205533,-0.192321,-0.160022,-0.13096,5.352929,-0.088561,-0.044151,-0.072216,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.01801,-1.066402,-0.088561,0.0,-0.249698,-0.044151,-0.195024,-0.050998,-0.102396,0.790486,-0.121686,-0.280533,-0.092208,-0.088561,-0.173379,-0.06753,-0.084763,-0.197696,-0.448779,-0.139657,0.324007,0.896975,-0.461238,1.440408,-0.370928,-0.315381,-0.484343
4,-0.486328,-0.962671,0.849751,0.668911,1.385491,1.138814,-0.152343,-0.139354,-0.069868,-0.065297,-2.010022,-0.413459,0.660685,0.443399,1.362606,0.254732,-0.219869,-0.195251,-0.112847,-0.094575,-0.187566,0.801627,-0.569866,-0.483038,-0.725230,0.210679,-0.230197,-0.558960,0.758322,-0.293610,-0.293610,-0.0541,-0.036037,-0.031204,-0.01801,-0.025474,-0.040298,-0.031204,-0.01801,-0.01801,-0.044151,0.0,-0.040298,-0.01801,-0.01801,-0.036037,-0.040298,-0.025474,-0.047696,-0.01801,-0.025474,-0.036037,-0.050998,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.086683,-0.01801,-0.031204,-0.01801,-0.01801,0.0,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.01801,-0.036037,-0.040298,-0.031204,-0.025474,-0.0541,-0.025474,-0.025474,-0.01801,-0.025474,-0.036037,-0.040298,-0.025474,0.0,-0.01801,0.0,-0.036037,-0.036037,-0.057036,-0.01801,-0.025474,-0.047696,-0.047696,0.0,0.0,-0.025474,-0.01801,-0.0541,-0.01801,-0.031204,0.0,-0.057036,-0.01801,-0.01801,-0.031204,-0.01801,0.0,-0.0541,0.0,-0.025474,-0.082801,-0.031204,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.309756,-0.036037,-0.0541,-0.025474,-0.01801,0.0,-0.025474,-0.047696,-0.031204,-0.01801,-0.025474,-0.025474,0.0,-0.01801,-0.01801,-0.036037,0.0,-0.01801,-0.031204,-0.031204,-0.170379,-0.01801,-0.0541,-0.025474,-0.01801,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.01801,-0.057036,-0.01801,-0.076621,-0.031204,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.050998,-0.031204,-0.01801,-0.01801,-0.01801,-0.01801,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.104001,1.862677,-0.01801,-0.01801,0.0,-0.025474,-0.01801,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.025474,-0.01801,0.0,-0.01801,-0.137224,-0.01801,0.0,-0.078734,0.0,-0.01801,-0.01801,-0.01801,-0.142052,-0.01801,0.0,-0.069911,-0.025474,-0.01801,0.0,-0.025474,-0.01801,-0.047696,-0.152397,-0.01801,-0.01801,-0.01801,-0.031204,-0.01801,-0.01801,-0.01801,0.0,-0.065063,-0.01801,-0.01801,-0.567864,-0.0541,-0.01801,-0.036037,-0.031204,-0.031204,-0.040298,-0.01801,-0.104001,-0.108679,-0.086683,-0.102396,-0.040298,-0.06753,-0.044151,-0.036037,-0.036037,-0.031204,-0.09398,-0.01801,-0.031204,0.0,-0.069911,-0.065063,-0.031204,-0.0625,-0.01801,-0.01801,-0.059829,-0.031204,-0.025474,-0.040298,-0.044151,-0.084763,-0.0541,-0.01801,-0.01801,-0.031204,-0.050998,-0.102396,-0.044151,-0.257663,-0.241535,-0.110196,-0.214786,-0.025474,-0.456579,-0.194126,-0.151280,-0.025474,0.0,-0.092208,-0.01801,-0.309756,-0.257663,-0.277156,-0.205533,-0.192321,-0.160022,-0.13096,-0.186814,-0.088561,-0.044151,-0.072216,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.01801,0.937732,-0.088561,0.0,-0.249698,-0.044151,-0.195024,-0.050998,-0.102396,0.790486,-0.121686,-0.280533,-0.092208,-0.088561,-0.173379,-0.06753,-0.084763,-0.197696,-0.448779,-0.139657,0.324007,0.896975,-0.461238,1.440408,-0.370928,-0.315381,-0.484343
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1024,0.137108,0.964852,-1.023501,-0.937759,-0.662551,-0.608113,-0.152343,-0.139354,-0.069868,-0.065297,0.037185,1.135240,-1.289924,-1.100028,-0.696245,-0.614366,-0.219869,-0.195251,-0.112847,-0.094575,-0.628291,-0.288467,0.689187,0.120104,-0.030866,-0.854134,-0.372283,-0.692955,0.371441,-0.528048,-0.528048,-0.0541,-0.036037,-0.031204,-0.01801,-0.025474,-0.040298,-0.031204,-0.01801,-0.01801,-0.044151,0.0,-0.040298,-0.01801,-0.01801,-0.036037,-0.040298,-0.025474,-0.047696,-0.01801,-0.025474,-0.036037,-0.050998,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.036037,11.536332,-0.01801,-0.031204,-0.01801,-0.01801,0.0,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.01801,-0.036037,-0.040298,-0.031204,-0.025474,-0.0541,-0.025474,-0.025474,-0.01801,-0.025474,-0.036037,-0.040298,-0.025474,0.0,-0.01801,0.0,-0.036037,-0.036037,-0.057036,-0.01801,-0.025474,-0.047696,-0.047696,0.0,0.0,-0.025474,-0.01801,-0.0541,-0.01801,-0.031204,0.0,-0.057036,-0.01801,-0.01801,-0.031204,-0.01801,0.0,-0.0541,0.0,-0.025474,-0.082801,-0.031204,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.309756,-0.036037,-0.0541,-0.025474,-0.01801,0.0,-0.025474,-0.047696,-0.031204,-0.01801,-0.025474,-0.025474,0.0,-0.01801,-0.01801,-0.036037,0.0,-0.01801,-0.031204,-0.031204,-0.170379,-0.01801,-0.0541,-0.025474,-0.01801,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.01801,-0.057036,-0.01801,-0.076621,-0.031204,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.050998,-0.031204,-0.01801,-0.01801,-0.01801,-0.01801,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.104001,-0.536862,-0.01801,-0.01801,0.0,-0.025474,-0.01801,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.025474,-0.01801,0.0,-0.01801,-0.137224,-0.01801,0.0,-0.078734,0.0,-0.01801,-0.01801,-0.01801,-0.142052,-0.01801,0.0,-0.069911,-0.025474,-0.01801,0.0,-0.025474,-0.01801,-0.047696,-0.152397,-0.01801,-0.01801,-0.01801,-0.031204,-0.01801,-0.01801,-0.01801,0.0,-0.065063,-0.01801,-0.01801,-0.567864,-0.0541,-0.01801,-0.036037,-0.031204,-0.031204,-0.040298,-0.01801,-0.104001,-0.108679,-0.086683,-0.102396,-0.040298,-0.06753,-0.044151,-0.036037,-0.036037,-0.031204,-0.09398,-0.01801,-0.031204,0.0,-0.069911,-0.065063,-0.031204,-0.0625,-0.01801,-0.01801,-0.059829,-0.031204,-0.025474,-0.040298,-0.044151,-0.084763,-0.0541,-0.01801,-0.01801,-0.031204,-0.050998,-0.102396,-0.044151,-0.257663,-0.241535,-0.110196,-0.214786,-0.025474,2.190204,-0.194126,-0.151280,-0.025474,0.0,-0.092208,-0.01801,-0.309756,-0.257663,-0.277156,-0.205533,-0.192321,-0.160022,-0.13096,-0.186814,-0.088561,-0.044151,-0.072216,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.01801,-1.066402,-0.088561,0.0,-0.249698,-0.044151,-0.195024,-0.050998,-0.102396,0.790486,-0.121686,-0.280533,-0.092208,-0.088561,-0.173379,-0.06753,-0.084763,-0.197696,-0.448779,-0.139657,0.324007,0.896975,2.168076,-0.694248,-0.370928,-0.315381,-0.484343
1025,-0.306087,0.964852,-1.023501,-0.937759,-0.662551,-0.608113,-0.152343,-0.139354,-0.069868,-0.065297,-3.193308,1.135240,-1.289924,-1.100028,-0.696245,-0.614366,-0.219869,-0.195251,-0.112847,-0.094575,0.499140,-0.288467,-2.096647,-2.292465,-0.725230,0.210679,-0.514368,-0.558960,-0.402322,-0.528048,-0.528048,-0.0541,-0.036037,-0.031204,-0.01801,-0.025474,-0.040298,-0.031204,-0.01801,-0.01801,22.649503,0.0,-0.040298,-0.01801,-0.01801,-0.036037,-0.040298,-0.025474,-0.047696,-0.01801,-0.025474,-0.036037,-0.050998,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.086683,-0.01801,-0.031204,-0.01801,-0.01801,0.0,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.01801,-0.036037,-0.040298,-0.031204,-0.025474,-0.0541,-0.025474,-0.025474,-0.01801,-0.025474,-0.036037,-0.040298,-0.025474,0.0,-0.01801,0.0,-0.036037,-0.036037,-0.057036,-0.01801,-0.025474,-0.047696,-0.047696,0.0,0.0,-0.025474,-0.01801,-0.0541,-0.01801,-0.031204,0.0,-0.057036,-0.01801,-0.01801,-0.031204,-0.01801,0.0,-0.0541,0.0,-0.025474,-0.082801,-0.031204,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.309756,-0.036037,-0.0541,-0.025474,-0.01801,0.0,-0.025474,-0.047696,-0.031204,-0.01801,-0.025474,-0.025474,0.0,-0.01801,-0.01801,-0.036037,0.0,-0.01801,-0.031204,-0.031204,-0.170379,-0.01801,-0.0541,-0.025474,-0.01801,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.01801,-0.057036,-0.01801,-0.076621,-0.031204,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.050998,-0.031204,-0.01801,-0.01801,-0.01801,-0.01801,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.104001,-0.536862,-0.01801,-0.01801,0.0,-0.025474,-0.01801,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.025474,-0.01801,0.0,-0.01801,-0.137224,-0.01801,0.0,-0.078734,0.0,-0.01801,-0.01801,-0.01801,-0.142052,-0.01801,0.0,-0.069911,-0.025474,-0.01801,0.0,-0.025474,-0.01801,-0.047696,-0.152397,-0.01801,-0.01801,-0.01801,-0.031204,-0.01801,-0.01801,-0.01801,0.0,-0.065063,-0.01801,-0.01801,-0.567864,-0.0541,-0.01801,-0.036037,-0.031204,-0.031204,-0.040298,-0.01801,-0.104001,-0.108679,-0.086683,-0.102396,-0.040298,-0.06753,-0.044151,-0.036037,-0.036037,-0.031204,-0.09398,-0.01801,-0.031204,0.0,-0.069911,-0.065063,-0.031204,-0.0625,-0.01801,-0.01801,-0.059829,-0.031204,-0.025474,-0.040298,-0.044151,-0.084763,-0.0541,-0.01801,-0.01801,-0.031204,-0.050998,-0.102396,-0.044151,-0.257663,-0.241535,-0.110196,-0.214786,-0.025474,-0.456579,-0.194126,-0.151280,-0.025474,0.0,-0.092208,-0.01801,-0.309756,-0.257663,3.608072,-0.205533,-0.192321,-0.160022,-0.13096,-0.186814,-0.088561,-0.044151,-0.072216,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.01801,-1.066402,-0.088561,0.0,-0.249698,-0.044151,-0.195024,-0.050998,-0.102396,-1.265044,-0.121686,-0.280533,10.845063,-0.088561,-0.173379,-0.06753,-0.084763,-0.197696,-0.448779,-0.139657,0.324007,0.896975,-0.461238,-0.694248,-0.370928,-0.315381,2.064655
1026,1.809894,-1.348175,1.738214,0.668911,2.297983,2.012278,-0.152343,-0.139354,-0.069868,-0.065297,0.650308,-1.187808,0.833378,0.443399,1.698425,1.992928,-0.219869,-0.195251,-0.112847,-0.094575,-0.008394,0.004220,-0.008813,0.002571,0.003979,0.007420,-0.003587,0.007908,0.007463,-0.004638,-0.004638,-0.0541,-0.036037,-0.031204,-0.01801,-0.025474,-0.040298,-0.031204,-0.01801,-0.01801,-0.044151,0.0,-0.040298,-0.01801,-0.01801,-0.036037,-0.040298,-0.025474,-0.047696,-0.01801,-0.025474,-0.036037,-0.050998,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.086683,-0.01801,-0.031204,-0.01801,-0.01801,0.0,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.01801,-0.036037,-0.040298,-0.031204,-0.025474,-0.0541,-0.025474,-0.025474,-0.01801,-0.025474,-0.036037,-0.040298,-0.025474,0.0,-0.01801,0.0,-0.036037,-0.036037,-0.057036,-0.01801,-0.025474,-0.047696,-0.047696,0.0,0.0,-0.025474,-0.01801,-0.0541,-0.01801,-0.031204,0.0,-0.057036,-0.01801,-0.01801,-0.031204,-0.01801,0.0,-0.0541,0.0,-0.025474,-0.082801,-0.031204,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.309756,-0.036037,-0.0541,-0.025474,-0.01801,0.0,-0.025474,-0.047696,-0.031204,-0.01801,-0.025474,-0.025474,0.0,-0.01801,-0.01801,-0.036037,0.0,-0.01801,-0.031204,-0.031204,-0.170379,-0.01801,-0.0541,-0.025474,-0.01801,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.01801,-0.057036,-0.01801,-0.076621,-0.031204,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.050998,-0.031204,-0.01801,-0.01801,-0.01801,-0.01801,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.104001,1.862677,-0.01801,-0.01801,0.0,-0.025474,-0.01801,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.025474,-0.01801,0.0,-0.01801,-0.137224,-0.01801,0.0,-0.078734,0.0,-0.01801,-0.01801,-0.01801,-0.142052,-0.01801,0.0,-0.069911,-0.025474,-0.01801,0.0,-0.025474,-0.01801,-0.047696,-0.152397,-0.01801,-0.01801,-0.01801,-0.031204,-0.01801,-0.01801,-0.01801,0.0,-0.065063,-0.01801,-0.01801,-0.567864,-0.0541,-0.01801,-0.036037,-0.031204,-0.031204,-0.040298,-0.01801,-0.104001,-0.108679,-0.086683,-0.102396,-0.040298,-0.06753,-0.044151,-0.036037,-0.036037,-0.031204,-0.09398,-0.01801,-0.031204,0.0,-0.069911,-0.065063,-0.031204,-0.0625,-0.01801,-0.01801,-0.059829,-0.031204,-0.025474,-0.040298,-0.044151,-0.084763,-0.0541,-0.01801,-0.01801,-0.031204,-0.050998,-0.102396,-0.044151,-0.257663,-0.241535,-0.110196,-0.214786,-0.025474,-0.456579,-0.194126,-0.151280,-0.025474,0.0,-0.092208,-0.01801,-0.309756,-0.257663,-0.277156,-0.205533,-0.192321,-0.160022,-0.13096,-0.186814,-0.088561,-0.044151,-0.072216,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.01801,0.937732,-0.088561,0.0,-0.249698,-0.044151,-0.195024,-0.050998,9.766012,-1.265044,-0.121686,-0.280533,-0.092208,-0.088561,-0.173379,-0.06753,-0.084763,-0.197696,2.228269,-0.139657,0.324007,-1.114858,-0.461238,-0.694248,-0.370928,-0.315381,2.064655
1027,0.115778,0.964852,-1.023501,-0.937759,-0.662551,-0.608113,-0.152343,-0.139354,-0.069868,-0.065297,0.033749,-0.026284,0.832840,0.443399,-0.696245,-0.614366,-0.219869,-0.195251,-0.112847,-0.094575,1.299451,1.346674,0.457637,0.120104,-0.725230,-0.499196,-0.514368,-0.692955,-0.789203,-0.528048,-0.528048,-0.0541,-0.036037,-0.031204,-0.01801,-0.025474,-0.040298,-0.031204,-0.01801,-0.01801,-0.044151,0.0,-0.040298,-0.01801,-0.01801,-0.036037,-0.040298,-0.025474,-0.047696,-0.01801,-0.025474,-0.036037,-0.050998,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.086683,-0.01801,-0.031204,-0.01801,-0.01801,0.0,-0.01801,0.0,-0.01801,-0.01801,-0.036037,-0.01801,-0.036037,-0.040298,-0.031204,-0.025474,-0.0541,-0.025474,-0.025474,-0.01801,-0.025474,-0.036037,-0.040298,-0.025474,0.0,-0.01801,0.0,-0.036037,-0.036037,-0.057036,-0.01801,-0.025474,-0.047696,-0.047696,0.0,0.0,-0.025474,-0.01801,-0.0541,-0.01801,-0.031204,0.0,-0.057036,-0.01801,-0.01801,-0.031204,-0.01801,0.0,-0.0541,0.0,-0.025474,-0.082801,-0.031204,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.309756,-0.036037,-0.0541,-0.025474,-0.01801,0.0,-0.025474,-0.047696,-0.031204,-0.01801,-0.025474,-0.025474,0.0,-0.01801,-0.01801,-0.036037,0.0,-0.01801,-0.031204,-0.031204,-0.170379,-0.01801,-0.0541,-0.025474,-0.01801,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.01801,-0.057036,-0.01801,-0.076621,-0.031204,0.0,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.050998,-0.031204,-0.01801,-0.01801,-0.01801,-0.01801,-0.040298,-0.01801,0.0,-0.01801,-0.01801,-0.104001,-0.536862,-0.01801,-0.01801,0.0,-0.025474,-0.01801,-0.01801,-0.01801,-0.036037,-0.01801,-0.025474,-0.025474,-0.01801,0.0,-0.01801,-0.137224,-0.01801,0.0,-0.078734,0.0,-0.01801,-0.01801,-0.01801,-0.142052,-0.01801,0.0,-0.069911,-0.025474,-0.01801,0.0,-0.025474,-0.01801,-0.047696,-0.152397,-0.01801,-0.01801,-0.01801,-0.031204,-0.01801,-0.01801,-0.01801,0.0,-0.065063,-0.01801,-0.01801,1.760984,-0.0541,-0.01801,-0.036037,-0.031204,-0.031204,-0.040298,-0.01801,-0.104001,-0.108679,-0.086683,-0.102396,-0.040298,-0.06753,-0.044151,-0.036037,-0.036037,-0.031204,-0.09398,-0.01801,-0.031204,0.0,-0.069911,-0.065063,-0.031204,-0.0625,-0.01801,-0.01801,-0.059829,-0.031204,-0.025474,-0.040298,-0.044151,-0.084763,-0.0541,-0.01801,-0.01801,-0.031204,-0.050998,-0.102396,-0.044151,-0.257663,-0.241535,-0.110196,-0.214786,-0.025474,-0.456579,-0.194126,-0.151280,-0.025474,0.0,-0.092208,-0.01801,-0.309756,-0.257663,-0.277156,-0.205533,-0.192321,-0.160022,-0.13096,-0.186814,-0.088561,-0.044151,-0.072216,-0.01801,-0.01801,-0.01801,-0.01801,-0.025474,-0.01801,0.937732,-0.088561,0.0,-0.249698,-0.044151,-0.195024,-0.050998,-0.102396,0.790486,-0.121686,-0.280533,-0.092208,-0.088561,-0.173379,-0.06753,-0.084763,-0.197696,-0.448779,-0.139657,0.324007,0.896975,-0.461238,1.440408,-0.370928,-0.315381,-0.484343


In [31]:
model=LogisticRegressionCV(solver='liblinear',random_state=12,class_weight='balanced',cv=10)
model.fit(features_train_scaled, target_train)
roc_auc_score(target_test, model.predict_proba(features_test_scaled)[:,1])

np.float64(0.8418051786832479)

In [32]:
pd.DataFrame(zip(list(features_one.columns), list(abs(model.coef_[0]))),columns=['feature','importance']).sort_values(by=['importance'], ascending=False).head(10)

Unnamed: 0,feature,importance
25,Number.of.Unsuccessful.Grant.1,1.448239
285,Contract.Value.Band...see.note.A_not indicated,1.438929
24,Number.of.Successful.Grant.1,1.059605
137,Sponsor.Code_24D,0.870277
233,Sponsor.Code_6B,0.577065
278,Contract.Value.Band...see.note.A_J,0.574163
226,Sponsor.Code_62B,0.48508
188,Sponsor.Code_33A,0.395884
75,Sponsor.Code_166B,0.360079
256,Sponsor.Code_not indicated,0.359772


In [40]:
clf=RandomForestClassifier()
parametrs={
    'n_estimators': range(10,51,10),
    'max_depth':range(1,13,2),
    'min_samples_leaf':range(1,8),
    'min_samples_split':range(2,10)
}


In [41]:
grid=GridSearchCV(clf, parametrs, cv=5)
grid.fit(features_train_scaled, target_train)

  _data = np.array(data, dtype=dtype, copy=copy,


In [42]:
grid.best_params_

{'max_depth': 11,
 'min_samples_leaf': 1,
 'min_samples_split': 9,
 'n_estimators': 10}

In [43]:
model=RandomForestClassifier(random_state=12, class_weight='balanced', max_depth=11, min_samples_leaf=1, min_samples_split=9,n_estimators=10)
model.fit(features_train_scaled, target_train)
roc_auc_score(target_test, model.predict_proba(features_test_scaled)[:,1])

np.float64(0.8864952157052303)

In [44]:
pd.DataFrame(zip(list(features_one.columns), model.feature_importances_), columns=['feature','importance']).sort_values(by=['importance'], ascending=False).head(10)


Unnamed: 0,feature,importance
285,Contract.Value.Band...see.note.A_not indicated,0.217257
25,Number.of.Unsuccessful.Grant.1,0.132621
24,Number.of.Successful.Grant.1,0.057622
269,Grant.Category.Code_not indicated,0.029052
261,Grant.Category.Code_30B,0.026321
10,SEO.Code.1,0.023604
0,RFCD.Code.1,0.023518
22,Dept.No..1,0.022514
20,Person.ID.1,0.022154
21,Year.of.Birth.1,0.020464


Модель случайного леса показала себя лучше но на нее потребовалось больше времени, почти 18 минут. Самый важный признак у случайного леса Contract.Value.Band...see.note.A_not indicated а у логистической регресси этот признак на втором месте.