In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/ABIDE

Mounted at /content/drive
/content/drive/MyDrive/ABIDE


In [2]:
!pip install nilearn

from nilearn.image import load_img
from nilearn.datasets import fetch_atlas_schaefer_2018
from nilearn.input_data import NiftiLabelsMasker
import numpy as np
from scipy.spatial.distance import squareform
import tensorflow as tf
from tensorflow.keras import layers, models
import os

# Fetch the atlas with 100 regions of interest (ROIs)
atlas = fetch_atlas_schaefer_2018(n_rois=100)
masker = NiftiLabelsMasker(labels_img=atlas.maps, standardize=True)


Collecting nilearn
  Downloading nilearn-0.11.0-py3-none-any.whl.metadata (8.8 kB)
Downloading nilearn-0.11.0-py3-none-any.whl (10.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.5/10.5 MB[0m [31m100.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nilearn
Successfully installed nilearn-0.11.0


In [3]:
imaging_files = os.listdir("func_preproc")
print(f"Number of imaging files: {len(imaging_files)}")
print("First 10 imaging files:", imaging_files[:10])


Number of imaging files: 247
First 10 imaging files: ['Pitt_0050014_func_preproc.nii.gz', 'Pitt_0050025_func_preproc.nii.gz', 'Pitt_0050041_func_preproc.nii.gz', 'Pitt_0050042_func_preproc.nii.gz', 'Pitt_0050045_func_preproc.nii.gz', 'Pitt_0050047_func_preproc.nii.gz', 'Pitt_0050052_func_preproc.nii.gz', 'Olin_0050104_func_preproc.nii.gz', 'Olin_0050107_func_preproc.nii.gz', 'Olin_0050112_func_preproc.nii.gz']


In [4]:
import pandas as pd

# Function to extract subject ID from the filename
def get_id(name):
    parts = name.split('_')
    for part in parts:
        if part.isdigit():
            return part.lstrip('0')  # Strip leading zeros
    return None


# Load the phenotypic data
phenotypic_data = pd.read_csv('Updated_Phenotypic_V1_0b_preprocessed1_corrected_version2.csv')
phenotypic_data['SUB_ID'] = phenotypic_data['SUB_ID'].astype(str).str.strip()

# List of fMRI files
imaging_files = os.listdir("func_preproc")

# Extract IDs from imaging files
image_ids = [get_id(f) for f in imaging_files]
print(f"First 10 extracted image IDs: {image_ids[:10]}")

# Ensure there are no None values
image_ids = [id_ for id_ in image_ids if id_ is not None]

# Check the first few `SUB_ID`s in phenotypic data
print("First 10 SUB_IDs in phenotypic data:", phenotypic_data['WISC_IV_PSI'])

First 10 extracted image IDs: ['50014', '50025', '50041', '50042', '50045', '50047', '50052', '50104', '50107', '50112']
First 10 SUB_IDs in phenotypic data: 0        89.0
1       100.0
2       102.0
3       117.0
4        89.0
        ...  
1107    120.0
1108    105.0
1109    108.0
1110    116.0
1111     87.0
Name: WISC_IV_PSI, Length: 1112, dtype: float64


In [19]:
from tqdm import tqdm

# #WIP
# def standardise_column(column):
#   print(column)
#   return (column - column.mean())/column.std()

def standardise_all(variables, phenotypic_data):
  print(variables)
  for variable in variables:
    print(type(phenotypic_data[variable]))
    print(phenotypic_data[variable])
    print(phenotypic_data["WISC_IV_PSI"])

    phenotypic_data[variable] = (phenotypic_data[variable] - phenotypic_data[variable].mean())/phenotypic_data[variable].std()
    print(type(phenotypic_data[variable]))
    print(phenotypic_data[variable])
    print(phenotypic_data["WISC_IV_PSI"])
  return phenotypic_data

def get_personality_score(personality_type, personality_dictionary, row):
  pos = np.mean([row[variable] for variable in personality_dictionary[personality_type]["Pos"]])
  neg = np.mean([row[variable] for variable in personality_dictionary[personality_type]["Neg"]])
  return pos - neg

def get_personality(personality_dictionary, row):
  choleric_score = get_personality_score("choleric", personality_dictionary, row)
  print(choleric_score)
  sanguine_score = get_personality_score("sanguine", personality_dictionary, row)
  print(sanguine_score)
  phlegmatic_score = get_personality_score("phlegmatic", personality_dictionary, row)
  print(phlegmatic_score)
  melancholic_score = get_personality_score("melancholic", personality_dictionary, row)
  print(melancholic_score)
  if max(choleric_score, sanguine_score, phlegmatic_score, melancholic_score) == choleric_score:
    return "choleric"
  if max(choleric_score, sanguine_score, phlegmatic_score, melancholic_score) == sanguine_score:
    return "sanguine"
  if max(choleric_score, sanguine_score, phlegmatic_score, melancholic_score) == phlegmatic_score:
    return "phlegmatic"
  if max(choleric_score, sanguine_score, phlegmatic_score, melancholic_score) == melancholic_score:
    return "melancholic"

# Scores for receptive communication, expressive communication, written communication, interpersonal skills, coping skills, processing speed
personality_variables = ["VINELAND_RECEPTIVE_V_SCALED", "VINELAND_EXPRESSIVE_V_SCALED",
                        "VINELAND_WRITTEN_V_SCALED", "VINELAND_INTERPERSONAL_V_SCALED",
                        "VINELAND_COPING_V_SCALED", "WISC_IV_PSI"]

phenotypic_data = standardise_all(personality_variables, phenotypic_data)

print(phenotypic_data["WISC_IV_PSI"])

personality_dictionary = {
  "choleric": {"Pos":["WISC_IV_PSI"], "Neg": ["VINELAND_RECEPTIVE_V_SCALED", "VINELAND_INTERPERSONAL_V_SCALED", "VINELAND_COPING_V_SCALED"]},
  "sanguine": {"Pos":["VINELAND_EXPRESSIVE_V_SCALED", "VINELAND_INTERPERSONAL_V_SCALED"], "Neg": ["VINELAND_WRITTEN_V_SCALED"]},
  "phlegmatic": {"Pos":["VINELAND_RECEPTIVE_V_SCALED", "VINELAND_COPING_V_SCALED"], "Neg": ["WISC_IV_PSI"]},
  "melancholic": {"Pos":["VINELAND_WRITTEN_V_SCALED"], "Neg": ["VINELAND_EXPRESSIVE_V_SCALED"]}
}

def add_personalities(personality_dictionary, phenotypic_data):
  for idx, row in tqdm(phenotypic_data.iterrows(), total=phenotypic_data.shape[0]):
    phenotypic_data.loc[idx, "PERSONALITY"] = get_personality(personality_dictionary, row)
  return phenotypic_data

phenotypic_data = add_personalities(personality_dictionary, phenotypic_data)

phenotypic_data.to_csv("with_personalities.csv", encoding='utf-8')

['VINELAND_RECEPTIVE_V_SCALED', 'VINELAND_EXPRESSIVE_V_SCALED', 'VINELAND_WRITTEN_V_SCALED', 'VINELAND_INTERPERSONAL_V_SCALED', 'VINELAND_COPING_V_SCALED', 'WISC_IV_PSI']
<class 'pandas.core.series.Series'>
0       0.805293
1      -0.347941
2      -0.347941
3       0.228676
4      -0.492095
          ...   
1107    0.228676
1108   -0.924558
1109   -1.357020
1110   -0.492095
1111   -1.645329
Name: VINELAND_RECEPTIVE_V_SCALED, Length: 1112, dtype: float64
0      -0.907366
1      -0.204103
2      -0.076237
3       0.882759
4      -0.907366
          ...   
1107    1.074558
1108    0.115562
1109    0.307361
1110    0.818826
1111   -1.035232
Name: WISC_IV_PSI, Length: 1112, dtype: float64
<class 'pandas.core.series.Series'>
0       0.805293
1      -0.347941
2      -0.347941
3       0.228676
4      -0.492095
          ...   
1107    0.228676
1108   -0.924558
1109   -1.357020
1110   -0.492095
1111   -1.645329
Name: VINELAND_RECEPTIVE_V_SCALED, Length: 1112, dtype: float64
0      -0.907366
1  

  7%|▋         | 81/1112 [00:00<00:01, 807.76it/s]

-1.2591912910281584
-1.1141955954891836
1.5739212905745439
1.0109110166948234
0.36419387007178095
-0.4652303427086931
0.22129275078591049
-0.87918028512929
0.1706643900081638
-1.6514514141917287
-0.5613073179882095
2.172880229049204
0.4637375765108068
1.2328649670601972
-0.359015501486044
-1.4570497566623124
-0.3417403570913806
-1.9854499951375286
0.1977448896693771
2.1737702675189663
1.4334025035800737
-0.8575484196861443
-1.9439502812547302
1.8892857241012662
-0.6402946612354616
-0.4752858033013081
0.6948117839962871
1.0198114013924455
1.1922371751309788
1.534815670860798
-0.8513847290951111
-2.4732165434287943
2.18313037119632
-0.5991718761574953
-1.6293185636586256
-1.028542864596238
1.1642371895267016
-3.1711663463770603
-0.6885022745156395
2.9036724339282234
2.472573377373963
-0.7523596469287228
-2.8089982201866954
0.7219762809283121
-0.18637736055965243
-2.2758997704183335
0.838081593906227
2.3249129239254387
-0.6527945848434724
-1.7925386100055862
1.2275972138915971
1.445648178

 21%|██        | 235/1112 [00:00<00:01, 681.53it/s]

-0.3985680331496361
1.9675077178504536
0.27784372897286086
-2.32919419478042
-1.230551365819681
1.5345506334355676
1.2061535985458378
-2.187841961541332
1.473879076555845
-1.0143470535503352
-1.224612143121622
0.7237563578678365
-0.2156847556224989
-0.04170371784189063
0.2713771666115071
1.0171412859831586
1.338102966057501
-0.13864246155317905
-1.3086465938629868
-1.1725652132446127
2.0380576730661186
-0.4530790585374084
-1.972409161665456
0.14588688633481395
1.7002568020641888
0.41343475672993757
-1.1927068264493095
-1.7495446463078728
1.9236880394260563
-2.3185585376135665
-1.5796604579948486
2.0279678419310674
1.6139523554268118
-1.5350267503397421
-1.1124855946040322
0.42592123740370313
-0.1759005358658845
1.5292104026169944
0.10367504776327849
-1.3076871771953649
1.421132379358318
0.34411905242199414
-1.4728712389332705
-0.43910289295630633
-0.04767363112436751
1.634399175374416
0.1255453682617359
-2.474996620368319
1.3284368644163298
1.1700445694658201
-1.641366845568699
-1.4588

 35%|███▌      | 392/1112 [00:00<00:00, 745.34it/s]

1.1626592242824918
-1.4435279107007895
8.44607469148162e-05
0.0920474558185512
-2.0684320231310505
0.011863276676984919
2.4671551956342883
-1.7251611184817077
2.7750516814346593
1.9627658153145078
-3.347141058486426
-2.0474069342578662
-0.5152491004802967
1.67122421616588
2.034198111219403
-1.314248077669568
0.7797716068531241
0.748587421167057
0.7308766656259342
-3.0851095128122443
1.6353145684511357
3.087785781840931
-0.8702799004316679
0.3661378472118069
2.0653111454245465
-0.9849548929584598
-2.0375893436046217
-1.8053651995510187
-0.04199410987407881
1.6502383434891776
-0.44889331612369077
0.9911462873682957
1.9396703502357924
-1.5734072482098438
-2.0411494974836706
0.06463581829442622
-1.1918373926641321
-0.2986509094121268
0.4241411604641787
-0.26068227987545933
-0.8335615979827202
0.4130918284451646
0.7219762809283121
-0.9363722841744788
0.3118314924585223
0.442648509253995
0.5824041246287484
-1.4569241424942974
0.007810319686208933
0.8321641292393651
0.42592123740370313
-0.991

 52%|█████▏    | 579/1112 [00:00<00:00, 788.12it/s]

-0.5893942630220924
1.465499966552855
0.706646713330011
-1.162774790077228
0.6452981185978814
-0.3037164790704606
-0.2668679852816079
-0.7324878210716287
1.5925165258503255
-1.676978630016404
-1.001701236577255
1.5967908344557058
-0.10771468002238076
1.5973710310299447
0.018665189165602246
-2.1860618846018074
1.291427281709835
-0.4239916888336842
-1.3620868809064932
0.1352064246976673
0.10555112705983538
-0.8961169581518665
0.4960884013066224
0.434821622101325
1.5772591159681615
1.2441008581547626
-1.045319485032377
-2.0366993051348596
-0.36443183329708945
0.9670523691343054
0.160135065915673
-0.5831252416046809
0.15419089438388622
-0.615747998070634
-0.04938536822780852
0.4312614682222764
-0.28413024305077267
-1.3918944401611295
0.807609427759283
0.5743937784008888
-1.0813074327602559
0.6395744495090291
1.3364906787239494
-0.14304784943169746
1.881552932711628
1.2218941133908627
-2.0848714553011884
-1.162774790077228
-0.6194014333914688
-1.1473985485293752
0.8111782565626628
0.4277013

 66%|██████▌   | 736/1112 [00:00<00:00, 739.42it/s]


-0.16305319785825434
-2.998681629069493
0.4514637074788309
2.7560899314008
1.9623648169118408
0.9486454610677276
-2.1491398313148697
-2.332754348659469
0.38788507120241017
1.3869681309081443
-0.9657689534416267
-1.6028521822502118
1.1560614827347353
0.516868807154792
-1.276618884648881
-0.8720599773711923
2.829998391021602
0.3535748665770351
-2.8898905472773784
-0.7253675133135311
-0.06224889595627836
1.3878581693779064
-0.32253451564680286
-1.7495446463078728
0.2972922564291921
0.6176073875634025
0.673276813625093
-1.3059071002558404
-0.24588941759953253
-0.2841544951088901
0.922207004682148
0.5850742400380353
0.707848156479556
-2.627654903779222
-0.4246260679905378
2.4662651571645267
-0.04374209324885425
-2.21960003414448
0.6586665432751886
1.8875056471617415
0.008328644358970128
-1.4132111464552672
0.36120708446881966
1.3016258294008591
1.4659568158880982
-1.266518682397606
-1.3137709657335284
0.8633285141674001
-0.4471941905993264
1.7081904266381276
0.40516107804208423
-2.76749151

 73%|███████▎  | 811/1112 [00:01<00:00, 690.58it/s]

-2.0375893436046217
-0.6622631948769864
1.0601152123273998
0.3485786673478964
-1.0240926722474268
0.6884923064592494
1.0230870679829285
-1.6507307464971477
-0.7351579364809155
0.12175897461765504
-0.99801061445547
-0.39204965607281933
1.300735790931097
-1.1698933732698231
-0.5125789850710099
1.0582171594344136
1.594120719046419
-1.337749040301051
0.9080825174511654
1.1700083578247393
0.29435942733199894
-1.459608523475922
1.5991764625764266
0.836190700711802
-0.7280376287228177
-0.013944030588160544
-1.0329936444351862
0.10434614300868475
1.0109110166948234
-0.16872857670719188
-0.1202355534866015
-0.32521889662842746
0.5770638938101753
-2.99491803802019
-1.5702494631377313
3.1788354976472624
2.172880229049204
1.2718933951326203
-0.3169974845297808
-1.9452924717455424
0.42592123740370313
-1.5093691716398638
-0.6930994032133944
1.710724933114009
1.3105262140984812
-1.4179208135917645
-0.8513181503978089
1.3420727932635477
0.8624384756976379
0.3634276544135835
2.1521437193293056
-0.44003

 80%|████████  | 892/1112 [00:01<00:00, 680.35it/s]

-1.3927844786308918
1.9193609194357353
0.7210862424585499
2.0447120851624048
0.3947881658450868
-1.9089028409317363
-1.4623899874808863
1.7295159751992943
0.7853515204899706
-1.253864511319547
-2.1860618846018074
-0.13966813180656623
-0.6729631274212071
0.7680177507765977
-0.7360479749506776
0.4826273317966058
-1.711071621526358
-0.034988055463374246
1.1602735961617712
2.298653193454872


 88%|████████▊ | 982/1112 [00:01<00:00, 739.29it/s]

-0.7512045710337298
-1.8665358229328706
0.2899092349831884
0.0016669791541733536
0.14468700596952827
0.07835829100323422
-0.1501681571897951
-0.19130738673326336
0.14584208186452108
0.9215359094367417
-0.5822352031349187
2.846368192008323
-2.8371677654312606
-2.7865791969846248
2.902782395458461
1.5619187031260133
-0.3129072558002707
-0.9851996780420463
-0.7315977826018667
-0.4581171754965424
1.0612702882223923
0.5321426819931648
-1.4561597181925503
-1.459757964461137
1.0677909495429159
1.6976698108403872
-1.0169723644893292
2.132045919944355
-1.7581805810704014
-1.8256929154939292
1.5967908344557058
1.8010054599510505
-2.808440359346837
-1.036290934231192
2.4680452341040504
-0.43172413210687416
-2.5446728757857002
0.8518987714598109
2.172880229049204
1.0491617640984208
0.8205995878949173
-0.4519561104868005
-2.1816116922529964
-0.31745554854542335
-0.5659435685103462
0.17087258984217124
1.30607602174967
1.259445234353957
-2.591516797904513
-0.8313143417565227
2.3240228854556766
0.6106

 96%|█████████▋| 1073/1112 [00:01<00:00, 783.32it/s]

-1.7811755959072841
-0.3807079205939202
1.8498457790097187
0.5708336245218399
1.2117163438184535
2.0902133602046904
-1.9215312580526598
-1.600182066840925
0.011298468741963497
-1.4484338592532566
-0.6349399899212189
3.0485848210463606


100%|██████████| 1112/1112 [00:01<00:00, 728.91it/s]


-0.764711184016436
0.47957562538509024
0.31045157339781687
-0.29775065971721837
-1.1325179887609196
-1.1660197848072689
0.586589414409341
2.4662651571645267
0.03639506798847841
1.0138962912531182
-0.5594164247937845
-0.7342678980111533
-1.6829410150339603
1.5132339271414301
0.9499951848383464
-1.4606099105413615
0.6911187391509763
0.5009176862861852
-1.2857698279918592
0.7264264732771231
0.5074051122151934
0.8268805663971679
-0.6214585574857774
0.29435942733199894
0.13147082260648485
2.0128366004549725
-0.3055752144425374
-2.472326504959032
0.9435373161209405
0.9451106617956362
-1.159917061955242
0.005424691565488304
-0.7659767283131138
-0.21536886565140811
0.8462256968856567
-0.1546183495386062
0.426573293483921
-0.688024209820052
-0.8518424717641679
0.7157460116399768
2.158584863417633
-0.4717256494222593
-2.320746524084621
0.43304154516180077
0.4126988716065005
1.065745835178161
-0.8016356633967779
-0.43821285448654423
-1.5260590280813349
1.0918031258532976
1.5535155531086917
-0.432

In [None]:
import os

fmri_directory = '/content/drive/MyDrive/ABIDE/func_preproc'
fmri_files = os.listdir(fmri_directory)
print(f"Files in '{fmri_directory}':")
print(fmri_files)


Files in '/content/drive/MyDrive/ABIDE/func_preproc':
['Pitt_0050014_func_preproc.nii.gz', 'Pitt_0050025_func_preproc.nii.gz', 'Pitt_0050041_func_preproc.nii.gz', 'Pitt_0050042_func_preproc.nii.gz', 'Pitt_0050045_func_preproc.nii.gz', 'Pitt_0050047_func_preproc.nii.gz', 'Pitt_0050052_func_preproc.nii.gz', 'Olin_0050104_func_preproc.nii.gz', 'Olin_0050107_func_preproc.nii.gz', 'Olin_0050112_func_preproc.nii.gz', 'Olin_0050118_func_preproc.nii.gz', 'Olin_0050125_func_preproc.nii.gz', 'Olin_0050133_func_preproc.nii.gz', 'OHSU_0050143_func_preproc.nii.gz', 'OHSU_0050144_func_preproc.nii.gz', 'OHSU_0050153_func_preproc.nii.gz', 'OHSU_0050166_func_preproc.nii.gz', 'OHSU_0050168_func_preproc.nii.gz', 'OHSU_0050169_func_preproc.nii.gz', 'OHSU_0050170_func_preproc.nii.gz', 'SDSU_0050182_func_preproc.nii.gz', 'SDSU_0050187_func_preproc.nii.gz', 'SDSU_0050188_func_preproc.nii.gz', 'SDSU_0050190_func_preproc.nii.gz', 'SDSU_0050191_func_preproc.nii.gz', 'SDSU_0050194_func_preproc.nii.gz', 'SDSU_005

In [None]:
file_to_check = 'Pitt_0050041_func_preproc.nii.gz'
if file_to_check in fmri_files:
    print(f"File '{file_to_check}' exists in '{fmri_directory}'.")
else:
    print(f"File '{file_to_check}' does NOT exist in '{fmri_directory}'.")


File 'Pitt_0050041_func_preproc.nii.gz' exists in '/content/drive/MyDrive/ABIDE/func_preproc'.


In [None]:
# Match phenotypic data with imaging data
matched_data = phenotypic_data[phenotypic_data['SUB_ID'].isin(image_ids)].reset_index(drop=True)
print(f"Number of matched subjects: {len(matched_data)}")


Number of matched subjects: 172


In [None]:
import os

# Change current working directory
os.chdir('/content/drive/MyDrive/ABIDE')
print(f"Current working directory: {os.getcwd()}")


Current working directory: /content/drive/MyDrive/ABIDE


In [None]:
from nilearn.image import load_img
from nilearn.datasets import fetch_atlas_schaefer_2018
from nilearn.input_data import NiftiLabelsMasker
import numpy as np
from scipy.spatial.distance import squareform
from tqdm import tqdm
import os
import shutil

# Fetch the atlas
atlas = fetch_atlas_schaefer_2018(n_rois=100)
masker = NiftiLabelsMasker(labels_img=atlas.maps, standardize=True)

# Copy fMRI files from Google Drive to Colab's local directory for more reliable access
fmri_directory_drive = "/content/drive/MyDrive/ABIDE/func_preproc"
local_directory = "/content/func_preproc"
if not os.path.exists(local_directory):
    os.makedirs(local_directory)

imaging_files = os.listdir(fmri_directory_drive)
for fmri_file in imaging_files:
    shutil.copy(f"{fmri_directory_drive}/{fmri_file}", local_directory)

# Initialize feature and label lists
features_list = []
labels_list = []

# Loop through matched data to extract features
for idx, row in tqdm(matched_data.iterrows(), total=matched_data.shape[0]):
    sub_id = row['SUB_ID']
    fmri_file = next((f for f in imaging_files if sub_id in f), None)
    if fmri_file:
        fmri_file_path = f"/content/func_preproc/{fmri_file}"
        if os.path.exists(fmri_file_path):
            try:
                # Load the fMRI image
                fmri_img = load_img(fmri_file_path)
                # Extract time series data using the atlas-based masker
                time_series = masker.fit_transform(fmri_img)

                # Replace NaNs/Infs in time series
                time_series = np.nan_to_num(time_series, nan=0.0, posinf=0.0, neginf=0.0)

                # Compute the correlation matrix
                correlation_matrix = np.corrcoef(time_series.T)

                # Replace NaNs/Infs in correlation matrix
                correlation_matrix = np.nan_to_num(correlation_matrix, nan=0.0, posinf=0.0, neginf=0.0)

                # Flatten the upper triangle of the correlation matrix to use as features
                features = squareform(correlation_matrix, checks=False)
                features_list.append(features)
                labels_list.append(row['DX_GROUP'])
            except (OSError, ConnectionAbortedError) as e:
                print(f"Error loading file '{fmri_file_path}': {e}")
        else:
            print(f"File '{fmri_file_path}' does not exist.")
    else:
        print(f"No fMRI file found for subject {sub_id}")



  c /= stddev[:, None]
  c /= stddev[None, :]
100%|██████████| 172/172 [10:16<00:00,  3.58s/it]


In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Assuming feature extraction is complete and features_list and labels_list are populated

# Convert features_list to a NumPy array
X = np.array(features_list)

# Convert labels_list to a NumPy array
y = np.array(labels_list)

# Encode labels: 1 -> 1 (ASD), 2 -> 0 (Control)
y_encoded = (y == 1).astype(int)

# Optional: Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape[0]} samples")
print(f"Testing set size: {X_test.shape[0]} samples")


Training set size: 137 samples
Testing set size: 35 samples


In [None]:
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras import layers, models, regularizers, callbacks
from tensorflow.keras.optimizers import Adam,RMSprop
from sklearn.utils import class_weight
import numpy as np

In [None]:
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras import layers, models, regularizers, callbacks
from tensorflow.keras.optimizers import Adam, RMSprop
from sklearn.utils import class_weight
import numpy as np

# Initialize cross-validation with StratifiedKFold
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
fold_accuracies = []

# Define the learning rate schedule function
def lr_schedule(epoch, lr):
    if epoch < 10:
        return lr + 0.0001  # Gradually increase during warm-up
    return lr * 0.95  # Gradually decrease after epoch 10

for train_idx, test_idx in kfold.split(X_scaled, y_encoded):
    X_train_fold, X_test_fold = X_scaled[train_idx], X_scaled[test_idx]
    y_train_fold, y_test_fold = y_encoded[train_idx], y_encoded[test_idx]

    # Compute class weights for the current fold
    class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train_fold), y=y_train_fold)
    class_weights = dict(enumerate(class_weights))

    # Define a more complex model with LeakyReLU activations and increased number of units
    model = models.Sequential([
        layers.Input(shape=(X_train_fold.shape[1],)),
        layers.Dense(256),
        layers.LeakyReLU(alpha=0.1),
        layers.BatchNormalization(),
        layers.Dropout(0.4),
        layers.Dense(128),
        layers.LeakyReLU(alpha=0.1),
        layers.BatchNormalization(),
        layers.Dropout(0.4),
        layers.Dense(64),
        layers.LeakyReLU(alpha=0.1),
        layers.BatchNormalization(),
        layers.Dropout(0.4),
        layers.Dense(32),
        layers.LeakyReLU(alpha=0.1),
        layers.BatchNormalization(),
        layers.Dense(1, activation='sigmoid')
    ])

    # Compile the model with RMSprop optimizer
    model.compile(optimizer=RMSprop(learning_rate=0.0005),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # Early stopping callback with increased patience
    early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)

    # Learning rate scheduler callback
    lr_scheduler_callback = callbacks.LearningRateScheduler(lr_schedule, verbose=1)

    # Train the model
    history = model.fit(X_train_fold, y_train_fold,
                        epochs=150,
                        batch_size=32,
                        validation_data=(X_test_fold, y_test_fold),
                        class_weight=class_weights,
                        callbacks=[early_stopping, lr_scheduler_callback],
                        verbose=0)

    # Evaluate the model on the validation set
    score = model.evaluate(X_test_fold, y_test_fold, verbose=0)
    fold_accuracies.append(score[1])
    print(f"Fold Test Accuracy: {score[1] * 100:.2f}%")

# Print the average cross-validation accuracy
print(f"Average Cross-Validation Accuracy: {np.mean(fold_accuracies) * 100:.2f}%")





Epoch 1: LearningRateScheduler setting learning rate to 0.0006000000237487257.

Epoch 2: LearningRateScheduler setting learning rate to 0.0007000000284984708.

Epoch 3: LearningRateScheduler setting learning rate to 0.000800000033248216.

Epoch 4: LearningRateScheduler setting learning rate to 0.0009000000379979611.

Epoch 5: LearningRateScheduler setting learning rate to 0.0010000000427477062.

Epoch 6: LearningRateScheduler setting learning rate to 0.0011000000474974514.

Epoch 7: LearningRateScheduler setting learning rate to 0.0011999999940395356.

Epoch 8: LearningRateScheduler setting learning rate to 0.0012999999405816198.

Epoch 9: LearningRateScheduler setting learning rate to 0.001399999887123704.
Fold Test Accuracy: 51.43%

Epoch 1: LearningRateScheduler setting learning rate to 0.0006000000237487257.





Epoch 2: LearningRateScheduler setting learning rate to 0.0007000000284984708.

Epoch 3: LearningRateScheduler setting learning rate to 0.000800000033248216.

Epoch 4: LearningRateScheduler setting learning rate to 0.0009000000379979611.

Epoch 5: LearningRateScheduler setting learning rate to 0.0010000000427477062.

Epoch 6: LearningRateScheduler setting learning rate to 0.0011000000474974514.

Epoch 7: LearningRateScheduler setting learning rate to 0.0011999999940395356.

Epoch 8: LearningRateScheduler setting learning rate to 0.0012999999405816198.

Epoch 9: LearningRateScheduler setting learning rate to 0.001399999887123704.
Fold Test Accuracy: 60.00%

Epoch 1: LearningRateScheduler setting learning rate to 0.0006000000237487257.





Epoch 2: LearningRateScheduler setting learning rate to 0.0007000000284984708.

Epoch 3: LearningRateScheduler setting learning rate to 0.000800000033248216.

Epoch 4: LearningRateScheduler setting learning rate to 0.0009000000379979611.

Epoch 5: LearningRateScheduler setting learning rate to 0.0010000000427477062.

Epoch 6: LearningRateScheduler setting learning rate to 0.0011000000474974514.

Epoch 7: LearningRateScheduler setting learning rate to 0.0011999999940395356.

Epoch 8: LearningRateScheduler setting learning rate to 0.0012999999405816198.

Epoch 9: LearningRateScheduler setting learning rate to 0.001399999887123704.

Epoch 10: LearningRateScheduler setting learning rate to 0.0014999998336657882.
Fold Test Accuracy: 70.59%

Epoch 1: LearningRateScheduler setting learning rate to 0.0006000000237487257.





Epoch 2: LearningRateScheduler setting learning rate to 0.0007000000284984708.

Epoch 3: LearningRateScheduler setting learning rate to 0.000800000033248216.

Epoch 4: LearningRateScheduler setting learning rate to 0.0009000000379979611.

Epoch 5: LearningRateScheduler setting learning rate to 0.0010000000427477062.

Epoch 6: LearningRateScheduler setting learning rate to 0.0011000000474974514.

Epoch 7: LearningRateScheduler setting learning rate to 0.0011999999940395356.

Epoch 8: LearningRateScheduler setting learning rate to 0.0012999999405816198.

Epoch 9: LearningRateScheduler setting learning rate to 0.001399999887123704.

Epoch 10: LearningRateScheduler setting learning rate to 0.0014999998336657882.

Epoch 11: LearningRateScheduler setting learning rate to 0.0014249997911974788.

Epoch 12: LearningRateScheduler setting learning rate to 0.0013537498016376048.

Epoch 13: LearningRateScheduler setting learning rate to 0.0012860622839070857.

Epoch 14: LearningRateScheduler setti




Epoch 1: LearningRateScheduler setting learning rate to 0.0006000000237487257.

Epoch 2: LearningRateScheduler setting learning rate to 0.0007000000284984708.

Epoch 3: LearningRateScheduler setting learning rate to 0.000800000033248216.

Epoch 4: LearningRateScheduler setting learning rate to 0.0009000000379979611.

Epoch 5: LearningRateScheduler setting learning rate to 0.0010000000427477062.

Epoch 6: LearningRateScheduler setting learning rate to 0.0011000000474974514.

Epoch 7: LearningRateScheduler setting learning rate to 0.0011999999940395356.

Epoch 8: LearningRateScheduler setting learning rate to 0.0012999999405816198.
Fold Test Accuracy: 35.29%
Average Cross-Validation Accuracy: 58.76%


In [None]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.7937 - loss: 0.5151 
Test Accuracy: 80.00%
