In [3]:
import gzip
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from sklearn.metrics import roc_curve, roc_auc_score,recall_score,f1_score,precision_score
from sklearn.model_selection import train_test_split



In [4]:
#Importation of data 
os.chdir("C:/Users/lucie/OneDrive/Documents/Documents/ENSTA/2A/Pre-Travail/Documentation/Tache 1")

df_on = pd.read_csv("Data/adverse_reactions.csv")
df_off = pd.read_csv("Data/OFFSIDES.csv")
df_two = pd.read_csv('Data/TWOSIDES.csv', nrows=1000000)

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [5]:
#Preprocessing
df_off = pd.concat([df_off.iloc[:200000], df_off.iloc[200001:]], ignore_index=True)
df_two = pd.concat([df_two.iloc[:1000], df_two.iloc[1001:]], ignore_index=True)

df_off['drug_rxnorn_id'].astype(int)
df_off['condition_meddra_id'].astype(int)

df_two['drug_1_rxnorn_id'].astype(int)
df_two['drug_2_rxnorm_id'].astype(int)
df_two['condition_meddra_id'].astype(int)

0         10003239
1         10003239
2         10003239
3         10012735
4         10012735
            ...   
999994    10049079
999995    10034902
999996    10046996
999997    10003601
999998    10052471
Name: condition_meddra_id, Length: 999999, dtype: int32

0         10003239
1         10003239
2         10003239
3         10012735
4         10012735
            ...   
999994    10049079
999995    10034902
999996    10046996
999997    10003601
999998    10052471
Name: condition_meddra_id, Length: 999999, dtype: int32

In [7]:
#Build the frequences matrix for OffSIDES
# Create the frequence matrix using OffSIDES data
drugs_off = df_off['drug_rxnorn_id'].unique()
AE_off = df_off['condition_meddra_id'].unique()

F = np.zeros((len(drugs_off), len(AE_off)))
for k in tqdm(range(df_off.shape[0])):
    i = np.where(drugs_off == df_off['drug_rxnorn_id'][k])
    j = np.where(AE_off == df_off['condition_meddra_id'][k])
    F[i, j] = df_off['mean_reporting_frequency'][k]

100%|██████████| 3206557/3206557 [22:52<00:00, 2335.88it/s]


In [13]:
#Build the frequence matrix for TwoSIDES
drugs_two=[]
for i in range(df_two.shape[0]):
    drug1=df_two['drug_1_rxnorn_id'][i]
    drug2=df_two['drug_2_rxnorm_id'][i]
    if [drug1,drug2] not in drugs_two :
        drugs_two.append([drug1,drug2])

N_drugs_two = len(drugs_two)


F_valid = np.zeros((N_drugs_two, len(AE_off)))

for k in tqdm(range(df_two.shape[0])):
    i =0
    for l, sous_liste in enumerate(drugs_two):
        if sous_liste[0] == df_two['drug_1_rxnorn_id'][k] and sous_liste[1] == df_two['drug_2_rxnorm_id'][k]:
            i=l
    j = np.where(AE_off == df_two['condition_meddra_id'][k])
    if type(df_two['mean_reporting_frequency'][k]) != str:
        F_valid[i, j] = df_two['mean_reporting_frequency'][k]
    else:
        df_two['mean_reporting_frequency'][k] = float(df_two['mean_reporting_frequency'][k])
        F_valid[i, j] = df_two['mean_reporting_frequency'][k]



100%|██████████| 999999/999999 [10:58:03<00:00, 25.33it/s]      


In [8]:
#Building the response variable
#Build a function to generalize the model for a massive amount of AE
drugs_off = df_off['drug_rxnorn_id'].unique()
AE_off = df_off['condition_meddra_id'].unique()
def build_response(AE="Hypertension"):
    #Get the meddra_id associate to the AE
    inter1=df_on['pt_meddra_id'][df_on['pt_meddra_term']==AE].unique()
    inter2=df_on['pt_meddra_id'][df_on['pt_meddra_term']==AE.lower()].unique()
    if len(inter1>0):
        med_id=inter1[0]
    elif len(inter2)>0:
        med_id=inter2[0]
    else:
        print('Error : no match for this AE')
    
    AE_drug = df_on['ingredients_rxcuis'][df_on['pt_meddra_id']==med_id]
    AE_drug=AE_drug[df_on['num_ingredients']==1]
    AE_drug=[int(AE) for AE in AE_drug]
    class_drugs = np.zeros(len(drugs_off))
    k = 0
    for doff in drugs_off:
        for drug in AE_drug :
            if doff in AE_drug:
                class_drugs[k] = 1
                break
        k += 1
    return([class_drugs,sum(class_drugs)])  #Return the response variable and the number of positive labels

In [11]:
print(len(drugs_off))
print(len(AE_off))

2786
24705


In [45]:
#Build a fonction which train the model, get the different score and predict on TwoSIDES data

def full_training(AE):
    #First we get the best parameter for the model
    def trainmodel(C,X,Y):
        model=LogisticRegression(class_weight='balanced',warm_start=True,C=C)
        return(np.mean(cross_val_score(model,X,Y,cv=5,scoring='recall')))
    pen_C=np.linspace(10**-1,10,50)
    score=[]
    X=F
    Y=build_response(AE)[0]
    for i in range(50):
        score.append(trainmodel(C=pen_C[i],X=X,Y=Y))
    
    C=pen_C[score.index(max(score))]

    scores=[max(score)]
    model=LogisticRegression(class_weight='balanced',C=C)
    scores.append(np.mean(cross_val_score(model,X,Y,cv=5,scoring='roc_auc')))   

    model.fit(X,Y)
    Y_pred=model.predict(F_valid)
    scores.append(sum(Y_pred))
    return(scores)
        

In [25]:
print(full_training("Hypertension"))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

[0.7864046391752577, 0.7722130180688664, 1317.0]


In [29]:
#Mapping between Undina doc and OnSIDES
df_impdrug=pd.read_csv("Data/FAERS_ADE_Severity - Sheet1.csv")
df_impdrug.head()

Unnamed: 0,Event,Unnamed: 1,Reports (N),Deaths (N),Disabled (N),Life Threatening (N),Other Serious (N),Congenital Abnormality (N),Any Serious (N),Any Serious (%),Deaths (%),Life Threatening (%)
0,Death,35809059,416565,415844,1836,3937,101096,298,416144,0.999,99.83%,0.95%
1,Completed suicide,36919230,55679,55034,164,840,9966,4,55567,0.998,98.84%,1.51%
2,Acute myocardial infarction,35205180,20201,5926,1188,2916,9777,7,20146,0.9973,29.34%,14.43%
3,Septic shock,36110649,21986,14173,572,4925,9699,21,21922,0.9971,64.46%,22.40%
4,Acute kidney injury,37080784,55781,8637,1165,6746,33903,58,55619,0.9971,15.48%,12.09%


In [47]:
#Get the 200 most important AE (regarding of Undina doc) and which are in OnSIDES
AE_namme=[]

AE_on=df_on['pt_meddra_term'].unique()
k=0
for name in df_impdrug['Event'].unique():
    if k>=200:
        break
    name=name.lower()
    for ae in AE_on :
        ae=ae.lower()
        if name==ae:
            AE_namme.append(name)
            k+=1
            break
print(AE_namme)
print(len(AE_namme))
print(len(df_impdrug['Event'].unique()))

    


['death', 'completed suicide', 'acute myocardial infarction', 'septic shock', 'acute kidney injury', 'cerebral infarction', 'respiratory failure', 'cardiac arrest', 'cerebral haemorrhage', 'suicide attempt', 'pulmonary embolism', 'cardio-respiratory arrest', 'myocardial infarction', 'coma', 'sepsis', 'cerebrovascular accident', 'cardiac failure', 'hepatic failure', 'femur fracture', 'respiratory arrest', 'metabolic acidosis', 'interstitial lung disease', 'cardiac failure congestive', 'febrile neutropenia', 'epilepsy', 'renal failure', 'abortion spontaneous', 'rhabdomyolysis', 'hip fracture', 'pancytopenia', 'breast cancer', 'gastrointestinal haemorrhage', 'chronic kidney disease', 'transient ischaemic attack', 'osteonecrosis', 'pulmonary oedema', 'intestinal obstruction', 'coronary artery disease', 'hypoxia', 'pancreatitis', 'ascites', 'respiratory distress', 'premature baby', 'hypokalaemia', 'deep vein thrombosis', 'malignant neoplasm progression', 'drug abuse', 'hyponatraemia', 'deli

In [52]:
for k in range(len(AE_namme)):
    AE_namme[k]=AE_namme[k][0].upper()+AE_namme[k][1:]
res=np.zeros((200,3))
k=0
for name in tqdm(AE_namme):
    res[k]=full_training(name)
    k+=1



  0%|          | 0/200 [00:00<?, ?it/s]

6.5653061224489795


  0%|          | 1/200 [10:03<33:22:05, 603.65s/it]

0.1


  1%|          | 2/200 [17:56<28:57:18, 526.46s/it]

4.3428571428571425


  2%|▏         | 3/200 [28:36<31:38:54, 578.35s/it]

0.1


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


9.393877551020408


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  2%|▎         | 5/200 [47:21<31:26:57, 580.60s/it]

0.1


  3%|▎         | 6/200 [56:35<30:48:18, 571.64s/it]

1.3122448979591839


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


8.585714285714285


  4%|▍         | 8/200 [1:16:01<31:13:53, 585.59s/it]

1.3122448979591839


  4%|▍         | 9/200 [1:25:21<30:38:06, 577.42s/it]

4.746938775510204


  5%|▌         | 10/200 [1:33:13<28:45:10, 544.79s/it]

5.151020408163265


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

0.1


  6%|▌         | 12/200 [1:53:14<30:28:22, 583.52s/it]

5.757142857142857


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

0.7061224489795919


  7%|▋         | 14/200 [2:16:43<33:44:49, 653.17s/it]

0.7061224489795919


  8%|▊         | 15/200 [2:25:42<31:47:51, 618.77s/it]

6.7673469387755105


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


8.383673469387755


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


7.575510204081633


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

0.1


Traceback (most recent call last):
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 115, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 399, in _score
    return self._sign * self._score_func(y, y_pred, **self._kwargs)
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\metrics\_ranking.py", line 572, in roc_auc_score
    return _average_binary_score(
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\metrics\_base.py", line 75, in _average_binary_score
    return binary_metric(y_true, y_score, sample_weight=sample_weight)
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\metrics\_ranking.py", line 339, in _binary_roc_auc_score
    raise ValueError(
ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

Traceback (most recent call last):
  File "c:\Users\lucie\anaconda3\lib\sit

0.1


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.3020408163265306


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


9.595918367346938


 11%|█         | 22/200 [3:33:22<29:42:11, 600.74s/it]

4.544897959183674


 12%|█▏        | 23/200 [3:43:18<29:28:44, 599.57s/it]

4.140816326530612


 12%|█▏        | 24/200 [3:55:22<31:07:40, 636.71s/it]

0.1


 12%|█▎        | 25/200 [4:06:26<31:21:13, 644.99s/it]

9.191836734693878


 13%|█▎        | 26/200 [4:19:46<33:25:09, 691.44s/it]

0.3020408163265306


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

5.555102040816326


 14%|█▍        | 28/200 [4:47:26<36:31:57, 764.64s/it]

0.1


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

4.3428571428571425


 15%|█▌        | 30/200 [5:13:41<36:50:12, 780.08s/it]

5.757142857142857


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


4.3428571428571425


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1


 16%|█▋        | 33/200 [5:47:13<32:29:33, 700.44s/it]

2.928571428571429


 17%|█▋        | 34/200 [5:55:12<29:13:59, 633.97s/it]

2.322448979591837


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

7.575510204081633


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

0.7061224489795919


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


2.5244897959183676


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

6.3632653061224485


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


9.595918367346938


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
 20%|██        | 40/200 [7:00:01<28:46:46, 647.54s/it]

0.1


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

0.3020408163265306


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
1 fits failed out of a total of 5.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1241, in fit
    raise ValueError(
ValueError: This solver needs samples of at least 2 classes in the data,

0.1


Traceback (most recent call last):
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 115, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 399, in _score
    return self._sign * self._score_func(y, y_pred, **self._kwargs)
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\metrics\_ranking.py", line 572, in roc_auc_score
    return _average_binary_score(
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\metrics\_base.py", line 75, in _average_binary_score
    return binary_metric(y_true, y_score, sample_weight=sample_weight)
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\metrics\_ranking.py", line 339, in _binary_roc_auc_score
    raise ValueError(
ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

Traceback (most recent call last):
  File "c:\Users\lucie\anaconda3\lib\sit

7.575510204081633


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

1.1102040816326533


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

0.1


Traceback (most recent call last):
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 115, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 399, in _score
    return self._sign * self._score_func(y, y_pred, **self._kwargs)
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\metrics\_ranking.py", line 572, in roc_auc_score
    return _average_binary_score(
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\metrics\_base.py", line 75, in _average_binary_score
    return binary_metric(y_true, y_score, sample_weight=sample_weight)
  File "c:\Users\lucie\anaconda3\lib\site-packages\sklearn\metrics\_ranking.py", line 339, in _binary_roc_auc_score
    raise ValueError(
ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

Traceback (most recent call last):
  File "c:\Users\lucie\anaconda3\lib\sit

0.1


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


8.585714285714285


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

2.322448979591837


 24%|██▍       | 49/200 [8:24:59<25:56:12, 618.36s/it]

Error : no match for this AE





UnboundLocalError: local variable 'med_id' referenced before assignment

In [53]:
df = pd.DataFrame(res, index=AE_namme, columns=['Recall','Auroc','Prediction'])
df.to_csv('results2222.csv', index=True)