# Feature engineering

> In this module, we develop tools to extract features from compounds, proteins, etc.

In [None]:
#| default_exp feature

In [None]:
#| hide
import sys
sys.path.append("/notebooks/tools")
from tools.dataset import Data

In [None]:
#| hide
from nbdev.showdoc import *
%matplotlib inline

In [None]:
#| export
import seaborn as sns
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.ML.Descriptors import MoleculeDescriptors
import pandas as pd
from rdkit.Chem import Draw
from rdkit.Chem import Descriptors
from sklearn.preprocessing import StandardScaler

## Features from compound

In [None]:
#| export
from fastbook import *
from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP
from fairscale.nn.wrap import enable_wrap, wrap
import esm
from tqdm.notebook import tqdm; tqdm.pandas()
import gc

In [None]:
#| export
def smi2prop(df, # df needs to have SMILES an ID columns
             smi_colname = "SMILES", # column name of smiles
             id_colname = "ID", # column name of ID
             remove_duplicate=True, # remove features that are same across compounds
             normalize = False, # normalize features using StandardScaler()
            ):
    "Extract 208 features from smiles via rdkit.Chem.Descriptors, and remove duplicate features"
    
    mols = [Chem.MolFromSmiles(smi) for smi in df[smi_colname]]
    desc_names = [desc_name[0] for desc_name in Descriptors.descList]
    desc_calc = MoleculeDescriptors.MolecularDescriptorCalculator(desc_names)
    desc_values = [desc_calc.CalcDescriptors(mol) for mol in mols]
    compound = pd.DataFrame(np.stack(desc_values).T, index=desc_names,columns=df[id_colname])
    if remove_duplicate:
        compound = compound.loc[~compound.duplicated()] # remove duplicates
        compound = compound.loc[compound.std(axis=1) != 0] # remove compound that has same value across features
    compound = compound.T.reset_index()
    if normalize:
        scaler = StandardScaler()
        transformed = scaler.fit_transform(compound.iloc[:,1:])
        compound.iloc[:,1:] = transformed
    return compound

In [None]:
show_doc(smi2prop)

---

[source](https://github.com/sky1ove/tools/blob/main/tools/feature.py#L26){target="_blank" style="float:right; font-size:smaller"}

### smi2prop

>      smi2prop (df, smi_colname='SMILES', id_colname='ID',
>                remove_duplicate=True, normalize=False)

Extract 208 features from smiles via rdkit.Chem.Descriptors, and remove duplicate features

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| df |  |  | df needs to have SMILES an ID columns |
| smi_colname | str | SMILES | column name of smiles |
| id_colname | str | ID | column name of ID |
| remove_duplicate | bool | True | remove features that are same across compounds |
| normalize | bool | False | normalize features using StandardScaler() |

In [None]:
df = Data.get_g12d()

In [None]:
smi2prop(df,normalize=True)

Unnamed: 0,ID,MaxEStateIndex,MinEStateIndex,MinAbsEStateIndex,qed,MolWt,HeavyAtomMolWt,ExactMolWt,NumValenceElectrons,MaxPartialCharge,MinPartialCharge,MaxAbsPartialCharge,MinAbsPartialCharge,FpDensityMorgan1,FpDensityMorgan2,FpDensityMorgan3,BCUT2D_MWHI,BCUT2D_MWLOW,BCUT2D_CHGHI,BCUT2D_CHGLO,BCUT2D_LOGPHI,BCUT2D_LOGPLOW,BCUT2D_MRHI,BCUT2D_MRLOW,BalabanJ,BertzCT,Chi0,Chi0n,Chi0v,Chi1,Chi1n,Chi1v,Chi2n,Chi2v,Chi3n,Chi3v,Chi4n,Chi4v,HallKierAlpha,Ipc,Kappa1,Kappa2,Kappa3,LabuteASA,PEOE_VSA1,PEOE_VSA10,PEOE_VSA11,PEOE_VSA12,PEOE_VSA13,PEOE_VSA14,PEOE_VSA2,PEOE_VSA3,PEOE_VSA4,PEOE_VSA5,PEOE_VSA6,PEOE_VSA7,PEOE_VSA8,PEOE_VSA9,SMR_VSA1,SMR_VSA10,SMR_VSA2,SMR_VSA3,SMR_VSA4,SMR_VSA5,SMR_VSA6,SMR_VSA7,SMR_VSA9,SlogP_VSA1,SlogP_VSA10,SlogP_VSA11,SlogP_VSA12,SlogP_VSA2,SlogP_VSA3,SlogP_VSA4,SlogP_VSA5,SlogP_VSA6,SlogP_VSA7,SlogP_VSA8,TPSA,EState_VSA1,EState_VSA10,EState_VSA11,EState_VSA2,EState_VSA3,EState_VSA4,EState_VSA5,EState_VSA6,EState_VSA7,EState_VSA8,EState_VSA9,VSA_EState1,VSA_EState10,VSA_EState2,VSA_EState3,VSA_EState4,VSA_EState5,VSA_EState6,VSA_EState7,VSA_EState8,VSA_EState9,FractionCSP3,HeavyAtomCount,NHOHCount,NOCount,NumAliphaticCarbocycles,NumAliphaticHeterocycles,NumAliphaticRings,NumAromaticCarbocycles,NumAromaticHeterocycles,NumAromaticRings,NumHAcceptors,NumHDonors,NumHeteroatoms,NumRotatableBonds,NumSaturatedCarbocycles,NumSaturatedHeterocycles,NumSaturatedRings,RingCount,MolLogP,MolMR,fr_Al_COO,fr_Al_OH,fr_Al_OH_noTert,fr_ArN,fr_Ar_N,fr_Ar_NH,fr_Ar_OH,fr_COO2,fr_C_O,fr_C_O_noCOO,fr_HOCCN,fr_Imine,fr_NH0,fr_NH1,fr_NH2,fr_N_O,fr_Ndealkylation1,fr_Ndealkylation2,fr_aldehyde,fr_alkyl_carbamate,fr_alkyl_halide,fr_allylic_oxid,fr_amide,fr_aniline,fr_aryl_methyl,fr_bicyclic,fr_ester,fr_ether,fr_guanido,fr_halogen,fr_imidazole,fr_lactone,fr_methoxy,fr_morpholine,fr_nitrile,fr_para_hydroxylation,fr_piperdine,fr_piperzine,fr_priamide,fr_pyridine,fr_quatN,fr_sulfide,fr_sulfonamd,fr_sulfone,fr_term_acetylene,fr_thiazole,fr_unbrch_alkane,fr_urea
0,US_1,-1.932028,0.932171,1.381110,0.495241,-1.183587,-1.219109,-1.182076,-1.046640,-0.203173,-1.070885,0.960805,-0.201299,-0.606818,-0.027786,0.238330,-0.929849,0.169612,-1.291745,0.519884,-1.519547,0.185658,-0.425142,0.608875,0.337035,-0.603426,-1.036679,-0.801448,-0.917005,-0.932184,-0.777188,-0.867430,-0.807099,-0.902348,-0.806015,-0.900582,-0.864011,-0.972370,-0.126939,-0.154995,-1.075533,-0.754664,-0.327962,-0.976340,0.813537,-1.109378,-1.722754,-0.237511,-0.150438,-0.258243,-0.690788,-1.677478,-0.252705,-0.275917,-0.455194,0.586341,-0.695775,0.424056,-0.818465,-0.302768,-0.193272,0.123156,-0.361079,-0.742411,-0.199686,-0.279138,0.655344,-0.36044,-1.709988,0.908283,-0.727266,0.045041,-0.772241,-1.454431,-0.791409,0.740918,-0.673271,0.606634,0.257682,-1.120582,-0.123426,-1.959448,-1.622800,-1.137956,0.581413,0.436907,-0.082673,0.600444,-0.316618,0.410895,-1.477411,-0.713491,-0.020944,0.966588,0.316115,0.706608,1.074237,0.035838,-0.307140,0.688654,-0.495195,-0.975006,0.504157,0.034921,-0.393,-0.446513,-0.613431,0.241193,0.123187,0.298974,0.164340,0.642889,-1.190554,-0.645101,-0.356435,-0.308046,-0.465984,-0.535647,-0.875193,-0.643206,-0.037242,-0.289541,-0.278433,-0.145659,-0.016211,-0.164399,1.260085,-0.052705,-0.321026,-0.31841,-0.087167,-0.037242,-0.098784,-0.261324,-0.215353,-0.051678,1.432247,1.722508,-0.091542,-0.14062,-0.508104,-0.070507,-0.309352,-0.493979,-0.51257,-0.342359,-0.052705,-0.316228,-0.052705,-1.583939,-0.204565,-0.037242,-0.258067,-0.145659,-0.185429,-0.211808,-0.183176,0.327661,-0.052705,0.357590,-0.037242,-0.083507,-0.037242,-0.052705,-0.204565,-0.091542,-0.16413,-0.037242
1,US_2,0.370290,0.282627,-0.324387,0.318811,-0.840874,-0.837744,-0.838700,-0.740120,-0.200922,-1.070885,0.960805,-0.198749,0.203344,0.332533,0.322539,-0.646890,0.160857,-1.217596,0.513626,-0.927388,0.182332,-0.416530,0.608299,0.705937,-0.323952,-0.697765,-0.663958,-0.781950,-0.685520,-0.697979,-0.791224,-0.730912,-0.830814,-0.734399,-0.831915,-0.806103,-0.915471,-0.260409,-0.153546,-0.753847,-0.575009,-0.555061,-0.780717,0.813537,0.471341,0.245113,-0.237511,-0.150438,-0.258243,-0.690788,-0.487264,-0.252705,-0.275917,-0.455194,0.220578,-0.695775,-1.361355,-0.092199,-0.302768,-0.193272,0.123156,-0.361079,-0.742411,-0.199686,-0.300922,0.655344,-0.36044,-0.734650,0.908283,-0.727266,0.045041,-0.772241,-0.312574,-0.791409,0.228520,-0.673271,0.606634,0.257682,-0.402170,-0.123426,0.378935,-0.236918,0.014104,0.042263,-0.821400,0.594253,0.172109,0.004146,-0.934719,-0.446542,-0.713491,-0.224429,0.823311,-0.487655,-0.179828,0.219573,-0.139407,-0.513885,0.654352,-0.495195,-0.696653,0.504157,0.034921,-0.393,-0.446513,-0.613431,0.241193,0.123187,0.298974,0.164340,0.642889,-0.514784,-0.645101,-0.356435,-0.308046,-0.465984,-0.535647,-0.717804,-0.646230,-0.037242,-0.289541,-0.278433,-0.145659,-0.016211,-0.164399,1.260085,-0.052705,-0.321026,-0.31841,-0.087167,-0.037242,-0.098784,-0.261324,-0.215353,-0.051678,1.432247,1.722508,-0.091542,-0.14062,-0.508104,-0.070507,-0.309352,-0.493979,-0.51257,-0.342359,-0.052705,-0.316228,-0.052705,-0.686995,-0.204565,-0.037242,-0.258067,-0.145659,-0.185429,-0.211808,-0.183176,0.327661,-0.052705,0.357590,-0.037242,-0.083507,-0.037242,-0.052705,-0.204565,-0.091542,-0.16413,-0.037242
2,US_3,0.366447,0.255857,-0.483658,-0.364802,-0.631645,-0.556569,-0.629139,-0.637947,-0.201909,-1.070885,0.960805,-0.199867,0.171757,0.365089,0.402430,-0.646890,0.160865,-1.249951,0.529808,-0.965000,0.185125,-0.416498,1.440770,0.497265,0.570095,-0.422385,-0.613488,-0.732374,-0.397038,-0.868573,-0.955352,-1.153696,-1.227779,-1.256683,-1.332690,-1.296645,-1.397467,-1.690448,-0.150834,-0.667184,-0.455833,-0.222323,-0.600051,0.745171,0.361010,0.245113,-0.237511,-0.150438,-0.258243,-0.690788,0.863861,-0.252705,-0.275917,-0.455194,-1.373230,1.166896,-0.309139,-0.092199,-0.302768,-0.193272,1.419923,3.005662,-1.502066,-1.611664,1.291933,0.655344,-0.36044,-0.734650,0.908283,-0.727266,-1.184302,1.233551,-0.312574,-1.264763,1.275358,-0.673271,0.606634,1.165821,-0.402170,-0.123426,0.378935,-0.236918,0.054692,-0.796676,-0.821400,1.271180,1.403276,-0.568357,-0.934719,-0.333341,-0.713491,0.206342,0.809274,-0.530972,0.571471,0.094069,0.531189,-1.115673,0.551571,-1.613182,-0.418300,0.504157,0.968725,-0.393,-1.466710,-1.655543,0.241193,1.867123,1.719101,1.273251,0.642889,0.160987,0.074780,-0.356435,-1.472494,-1.676202,-0.535647,-0.851319,-0.555500,-0.037242,-0.289541,-0.278433,-0.145659,2.584797,-0.164399,1.260085,-0.052705,-0.321026,-0.31841,-0.087167,-0.037242,1.559865,-0.261324,-0.215353,-0.051678,-0.640062,-0.580549,-0.091542,-0.14062,-0.508104,-0.070507,-0.309352,-0.493979,1.85971,-0.342359,-0.052705,-0.316228,-0.052705,-0.686995,4.888410,-0.037242,-0.258067,-0.145659,-0.185429,-0.211808,-0.183176,0.327661,-0.052705,0.357590,-0.037242,-0.083507,-0.037242,-0.052705,-0.204565,-0.091542,-0.16413,-0.037242
3,US_4,0.363396,0.256350,-0.481359,-0.469590,-0.688872,-0.596636,-0.686606,-0.740120,-0.201930,-1.070885,0.960805,-0.199891,-1.428648,-0.872050,-0.356534,-0.646890,0.160863,-1.252099,0.529706,-0.965484,0.185171,-0.416458,1.460675,0.456686,0.451644,-0.485918,-0.747273,-0.863789,-0.345506,-0.849896,-0.937383,-1.259070,-1.326717,-1.398294,-1.468468,-1.374888,-1.474347,-1.804851,-0.150593,-0.687990,-0.213464,0.077968,-0.589911,-0.193009,-0.460241,0.245113,-0.237511,-0.150438,-0.258243,0.662722,-0.487264,-0.252705,-0.275917,0.048826,-0.641703,0.457309,-0.309139,-0.092199,-0.302768,-0.193272,0.146595,-0.361079,-1.502066,-1.611664,2.329913,0.655344,-0.36044,-0.734650,0.908283,-0.727266,-1.559026,0.183978,-0.312574,-1.273564,2.289132,-0.673271,0.606634,0.858748,-0.402170,-0.123426,0.378935,-0.236918,0.054692,-0.804747,-0.821400,1.271180,1.430059,-0.568357,-0.934719,-0.457100,-0.713491,0.204625,0.809023,-0.080902,-0.260102,1.566288,-0.117701,-1.112109,-0.633797,-2.085221,-0.418300,0.504157,0.034921,-0.393,-1.466710,-1.655543,0.241193,1.867123,1.719101,0.164340,0.642889,-0.514784,0.074780,-0.356435,-1.472494,-1.676202,-0.535647,-0.102844,-0.481404,-0.037242,-0.289541,-0.278433,-0.145659,1.284293,-0.164399,1.260085,-0.052705,-0.321026,-0.31841,-0.087167,-0.037242,-0.098784,-0.261324,-0.215353,-0.051678,-0.640062,-0.580549,-0.091542,-0.14062,-0.508104,-0.070507,-0.309352,-0.493979,-0.51257,-0.342359,-0.052705,-0.316228,-0.052705,-0.686995,-0.204565,-0.037242,-0.258067,-0.145659,-0.185429,-0.211808,-0.183176,0.327661,-0.052705,2.527167,-0.037242,-0.083507,-0.037242,-0.052705,-0.204565,-0.091542,-0.16413,-0.037242
4,US_5,0.358201,0.249087,-0.513840,-0.138580,-0.898862,-0.797677,-0.896648,-0.944467,-0.193474,-1.070885,0.960805,-0.190311,0.203344,0.332533,0.322539,-0.646890,0.160865,-1.248200,0.530251,-0.947588,0.185958,-0.416506,1.270048,0.953152,0.481607,-0.697765,-0.936893,-1.050051,-0.685520,-1.218476,-1.291990,-1.405083,-1.463814,-1.436607,-1.505204,-1.486580,-1.584093,-1.690448,-0.153546,-1.013359,-0.928842,-0.914902,-0.900445,-0.193009,0.471341,0.245113,-0.237511,-0.150438,-0.258243,0.580659,-0.487264,1.145586,-0.275917,-0.455194,-1.007467,-0.018884,-0.454536,-0.092199,-0.302768,-0.193272,1.483883,3.005662,-1.490724,-2.297985,1.269117,0.655344,-0.36044,-0.734650,0.908283,-0.727266,-1.707565,1.261260,-0.312574,-1.273564,1.264337,-0.673271,0.606634,1.165821,-0.402170,-0.123426,0.378935,0.294776,-1.340548,-0.804747,-0.425119,1.119934,1.078572,-0.558927,-0.934719,-0.349336,-0.713491,-0.786942,1.509353,-0.117188,-0.282391,0.555769,-0.340437,-1.171548,0.490955,-1.879370,-0.696653,0.504157,0.968725,-0.393,-1.466710,-1.655543,0.241193,1.867123,1.719101,1.273251,0.642889,0.160987,-0.645101,-0.356435,-1.472494,-1.676202,-0.535647,-0.899407,-0.905962,-0.037242,-0.289541,-0.278433,-0.145659,2.584797,-0.164399,1.260085,-0.052705,-0.321026,-0.31841,-0.087167,-0.037242,1.559865,-0.261324,-0.215353,-0.051678,-0.640062,-0.580549,-0.091542,-0.14062,-0.508104,-0.070507,-0.309352,-0.493979,1.85971,-0.342359,-0.052705,-0.316228,-0.052705,-0.686995,-0.204565,-0.037242,-0.258067,-0.145659,-0.185429,-0.211808,-0.183176,0.327661,-0.052705,0.357590,-0.037242,-0.083507,-0.037242,-0.052705,-0.204565,-0.091542,-0.16413,-0.037242
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
717,paper_34,0.412000,0.282268,-0.641477,-0.021133,-0.306915,-0.295327,-0.304065,-0.229253,-0.200687,0.648214,-0.646127,-0.198483,-1.418645,-1.111746,-0.754989,-0.646747,0.071399,0.355821,0.407910,-0.109332,0.173694,-0.435484,-0.028260,-0.343267,0.130336,-0.291194,-0.218157,-0.344045,-0.091014,0.042768,-0.078558,0.119728,-0.032122,0.314796,0.174071,0.456276,0.324916,-0.012536,-0.142133,-0.445877,-0.585222,-0.835802,-0.259654,-1.242000,0.480888,0.245113,-0.237511,-0.150438,-0.258243,0.639891,0.702949,-0.252705,-0.275917,-0.455194,0.964525,-1.263573,-0.479222,-0.210659,-0.302768,-0.193272,0.123156,-0.361079,0.009690,-0.251928,0.207693,-0.197389,-0.36044,0.240688,-0.663397,-0.727266,-0.456470,-0.772241,0.829282,0.074835,0.228520,-0.673271,0.606634,-1.002376,-0.402170,-0.265067,0.378935,0.214280,-0.056111,0.113930,0.810034,0.594253,-0.666525,-0.575269,-0.934719,0.474419,-0.713491,-0.130687,-0.959483,-0.466569,-0.585922,-0.050176,0.677808,0.062092,-0.633797,-0.086468,-0.139948,-0.940293,-0.898884,-0.393,0.573684,0.428680,0.241193,0.123187,0.298974,-0.944571,-0.991498,-0.514784,-0.645101,-0.356435,0.856401,0.744233,0.880973,0.377130,-0.256438,-0.037242,-0.289541,-0.278433,-0.145659,-0.016211,-0.164399,-0.793597,-0.052705,-0.321026,-0.31841,-0.087167,-0.037242,-0.098784,-0.261324,-0.215353,-0.051678,-0.640062,-0.580549,-0.091542,-0.14062,-0.508104,-0.070507,-0.309352,-0.493979,-0.51257,1.038552,-0.052705,-0.316228,-0.052705,0.209950,-0.204565,-0.037242,-0.258067,-0.145659,-0.185429,-0.211808,-0.183176,0.327661,-0.052705,0.357590,-0.037242,-0.083507,-0.037242,-0.052705,-0.204565,-0.091542,-0.16413,-0.037242
718,paper_35,0.415567,-0.134391,-0.846065,-0.202761,0.035799,0.086038,0.039311,0.077267,-0.200687,0.648214,-0.646127,-0.198483,-1.789715,-1.633947,-1.374656,-0.644994,0.071395,0.358620,0.407856,-0.034801,0.173687,-0.427111,-0.028260,-0.220271,0.399656,0.047720,-0.080667,-0.208990,0.155650,0.121977,-0.002352,0.194054,0.037664,0.394111,0.250119,0.511893,0.379564,-0.146006,-0.133985,-0.126361,-0.401449,-0.627865,-0.064031,-1.242000,-0.339351,4.180846,-0.237511,-0.150438,-0.258243,0.639891,1.893163,-0.252705,-0.275917,-0.455194,0.274006,-0.757388,-0.479222,0.515607,-0.302768,-0.193272,0.123156,-0.361079,0.009690,-0.251928,0.185909,-0.197389,-0.36044,1.216026,-0.663397,-0.727266,-0.456470,-0.772241,1.971139,0.074835,-0.283877,-0.673271,0.606634,-1.002376,1.034654,-0.265067,2.717317,0.627318,-0.651945,0.950382,-0.425119,1.256927,-1.505159,-0.575269,-0.934719,1.425648,-0.713491,-0.205823,-1.145568,-0.846163,-2.373451,-0.753307,0.566744,-0.040324,-0.633797,-0.086468,0.138405,-0.940293,-0.898884,-0.393,0.573684,0.428680,0.241193,0.123187,0.298974,-0.944571,-0.991498,0.160987,-0.645101,-0.356435,0.856401,0.744233,0.880973,0.534519,-0.259462,-0.037242,-0.289541,-0.278433,-0.145659,-0.016211,-0.164399,-0.793597,-0.052705,-0.321026,-0.31841,-0.087167,-0.037242,-0.098784,-0.261324,-0.215353,-0.051678,-0.640062,-0.580549,-0.091542,-0.14062,-0.508104,-0.070507,-0.309352,-0.493979,-0.51257,1.038552,-0.052705,-0.316228,-0.052705,1.106894,-0.204565,-0.037242,-0.258067,-0.145659,-0.185429,-0.211808,-0.183176,0.327661,-0.052705,0.357590,-0.037242,-0.083507,-0.037242,-0.052705,-0.204565,-0.091542,-0.16413,-0.037242
719,paper_36,0.482881,0.233261,-0.874320,-0.405659,0.150709,0.186889,0.154009,0.179440,-0.200687,0.648214,-0.646127,-0.198483,-0.285501,-0.408123,-0.555310,-0.646744,0.071216,0.358538,0.407465,-0.009968,0.173688,-0.366278,-0.028260,-0.124154,0.852412,0.323101,0.239206,0.105217,0.466059,0.370103,0.236368,0.356388,0.190084,0.572115,0.420792,0.697584,0.562020,-0.851492,-0.117859,0.090696,-0.114644,-0.662905,0.291172,-1.242000,0.480888,0.245113,-0.237511,-0.150438,-0.258243,0.639891,0.702949,-0.252705,2.451447,0.036701,0.274006,-0.757388,0.406816,-0.210659,-0.302768,-0.193272,0.123156,-0.361079,0.009690,-0.251928,0.163721,1.633368,-0.36044,0.240688,-0.663397,-0.727266,-0.456470,-0.772241,3.252231,0.450123,-0.283877,-0.673271,0.606634,-1.002376,0.316242,-0.265067,0.378935,0.193858,0.520743,0.575119,-0.425119,1.256927,-1.085842,-0.088529,-0.067605,0.520974,-0.713491,-0.134837,-1.010810,-0.549036,1.514278,-0.507364,2.614234,0.016357,-0.633797,-0.382266,0.416758,-0.940293,-0.898884,-0.393,0.573684,0.428680,0.241193,0.123187,0.298974,-0.944571,-0.991498,-0.514784,-0.645101,-0.356435,0.856401,0.744233,0.880973,0.355971,0.314944,-0.037242,-0.289541,-0.278433,-0.145659,-0.016211,-0.164399,-0.793597,-0.052705,-0.321026,-0.31841,-0.087167,-0.037242,-0.098784,-0.261324,-0.215353,-0.051678,-0.640062,-0.580549,-0.091542,-0.14062,-0.508104,-0.070507,-0.309352,-0.493979,-0.51257,1.038552,-0.052705,-0.316228,-0.052705,0.209950,-0.204565,-0.037242,-0.258067,-0.145659,-0.185429,-0.211808,-0.183176,0.327661,-0.052705,0.357590,-0.037242,-0.083507,-0.037242,-0.052705,4.888410,-0.091542,-0.16413,-0.037242
720,paper_37,0.462809,0.246761,-0.836028,-0.473964,0.311341,0.356153,0.306039,0.077267,-0.200687,-1.070884,0.960804,-0.198483,0.493790,0.131239,0.069227,1.100147,0.071395,0.357978,0.407492,0.278894,0.173698,0.425537,-0.028260,-0.173040,0.413790,0.047720,-0.048995,0.161731,0.145936,0.142300,0.275220,0.246405,0.389054,0.372909,0.493315,0.558158,0.704271,0.292539,-0.135418,-0.047174,-0.297154,-0.392865,0.253736,-0.193009,0.471341,0.245113,-0.237511,-0.150438,-0.258243,0.639891,-0.487264,-0.252705,-0.275917,-0.499378,1.005533,-0.285373,-0.479222,-0.092199,1.032990,-0.193272,0.123156,-0.361079,0.009690,-0.251928,-0.392178,0.655344,-0.36044,-0.734650,0.908283,1.230756,-0.037486,-0.772241,-0.312574,0.074835,-0.283877,1.289903,0.606634,0.257682,-0.402170,-0.123426,0.378935,0.208831,1.058656,-0.219260,0.413753,1.256927,-1.085842,-0.575269,0.631338,-0.416608,1.313203,-0.129163,0.867298,-0.577751,-0.274387,-0.452078,0.658876,0.043170,-0.633797,-0.086468,0.138405,0.504157,0.034921,-0.393,0.573684,0.428680,0.241193,0.123187,0.298974,0.164340,0.642889,0.160987,-0.645101,-0.356435,0.856401,0.744233,0.880973,0.625942,0.227224,-0.037242,-0.289541,-0.278433,-0.145659,-0.016211,-0.164399,1.260085,-0.052705,-0.321026,-0.31841,-0.087167,-0.037242,-0.098784,-0.261324,-0.215353,-0.051678,-0.640062,-0.580549,-0.091542,-0.14062,-0.508104,-0.070507,-0.309352,-0.493979,-0.51257,1.038552,-0.052705,-0.316228,-0.052705,0.209950,-0.204565,-0.037242,-0.258067,-0.145659,-0.185429,-0.211808,-0.183176,0.327661,-0.052705,0.357590,-0.037242,-0.083507,-0.037242,-0.052705,-0.204565,-0.091542,-0.16413,-0.037242


In [None]:
#| export
def smi2morgan(df, # a dataframe contains ID and SMILES columns
               smi_colname = "SMILES", # set smiles columne name
               id_colname = "ID", # set ID column name
              ):
    "Like `smi2prop`, get 2048 morgan feature (0/1) given a dataframe that contains ID&smiles"
    mols = [Chem.MolFromSmiles(smi) for smi in df[smi_colname]]
    morgan_fps = [AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048) for mol in mols]
    fp_df = pd.DataFrame(np.array(morgan_fps), index=df[id_colname])
    colnames = [f'morgan_{i}' for i in fp_df.columns]
    fp_df.columns = colnames
    fp_df = fp_df.reset_index()
    return fp_df

In [None]:
show_doc(smi2morgan)

---

[source](https://github.com/sky1ove/tools/blob/main/tools/feature.py#L50){target="_blank" style="float:right; font-size:smaller"}

### smi2morgan

>      smi2morgan (df, smi_colname='SMILES', id_colname='ID')

Like `smi2prop`, get 2048 morgan feature (0/1) given a dataframe that contains ID&smiles

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| df |  |  | a dataframe contains ID and SMILES columns |
| smi_colname | str | SMILES | set smiles columne name |
| id_colname | str | ID | set ID column name |

In [None]:
smi2morgan(df)

Unnamed: 0,ID,morgan_0,morgan_1,morgan_2,morgan_3,morgan_4,morgan_5,morgan_6,morgan_7,morgan_8,morgan_9,morgan_10,morgan_11,morgan_12,morgan_13,morgan_14,morgan_15,morgan_16,morgan_17,morgan_18,morgan_19,morgan_20,morgan_21,morgan_22,morgan_23,morgan_24,morgan_25,morgan_26,morgan_27,morgan_28,morgan_29,morgan_30,morgan_31,morgan_32,morgan_33,morgan_34,morgan_35,morgan_36,morgan_37,morgan_38,morgan_39,morgan_40,morgan_41,morgan_42,morgan_43,morgan_44,morgan_45,morgan_46,morgan_47,morgan_48,morgan_49,morgan_50,morgan_51,morgan_52,morgan_53,morgan_54,morgan_55,morgan_56,morgan_57,morgan_58,morgan_59,morgan_60,morgan_61,morgan_62,morgan_63,morgan_64,morgan_65,morgan_66,morgan_67,morgan_68,morgan_69,morgan_70,morgan_71,morgan_72,morgan_73,morgan_74,morgan_75,morgan_76,morgan_77,morgan_78,morgan_79,morgan_80,morgan_81,morgan_82,morgan_83,morgan_84,morgan_85,morgan_86,morgan_87,morgan_88,morgan_89,morgan_90,morgan_91,morgan_92,morgan_93,morgan_94,morgan_95,morgan_96,morgan_97,morgan_98,morgan_99,morgan_100,morgan_101,morgan_102,morgan_103,morgan_104,morgan_105,morgan_106,morgan_107,morgan_108,morgan_109,morgan_110,morgan_111,morgan_112,morgan_113,morgan_114,morgan_115,morgan_116,morgan_117,morgan_118,morgan_119,morgan_120,morgan_121,morgan_122,morgan_123,morgan_124,morgan_125,morgan_126,morgan_127,morgan_128,morgan_129,morgan_130,morgan_131,morgan_132,morgan_133,morgan_134,morgan_135,morgan_136,morgan_137,morgan_138,morgan_139,morgan_140,morgan_141,morgan_142,morgan_143,morgan_144,morgan_145,morgan_146,morgan_147,morgan_148,morgan_149,morgan_150,morgan_151,morgan_152,morgan_153,morgan_154,morgan_155,morgan_156,morgan_157,morgan_158,morgan_159,morgan_160,morgan_161,morgan_162,morgan_163,morgan_164,morgan_165,morgan_166,morgan_167,morgan_168,morgan_169,morgan_170,morgan_171,morgan_172,morgan_173,morgan_174,morgan_175,morgan_176,morgan_177,morgan_178,morgan_179,morgan_180,morgan_181,morgan_182,morgan_183,morgan_184,morgan_185,morgan_186,morgan_187,morgan_188,morgan_189,morgan_190,morgan_191,morgan_192,morgan_193,morgan_194,morgan_195,morgan_196,morgan_197,morgan_198,morgan_199,morgan_200,morgan_201,morgan_202,morgan_203,morgan_204,morgan_205,morgan_206,morgan_207,morgan_208,morgan_209,morgan_210,morgan_211,morgan_212,morgan_213,morgan_214,morgan_215,morgan_216,morgan_217,morgan_218,morgan_219,morgan_220,morgan_221,morgan_222,morgan_223,morgan_224,morgan_225,morgan_226,morgan_227,morgan_228,morgan_229,morgan_230,morgan_231,morgan_232,morgan_233,morgan_234,morgan_235,morgan_236,morgan_237,morgan_238,morgan_239,morgan_240,morgan_241,morgan_242,morgan_243,morgan_244,morgan_245,morgan_246,morgan_247,morgan_248,morgan_249,morgan_250,morgan_251,morgan_252,morgan_253,morgan_254,morgan_255,morgan_256,morgan_257,morgan_258,morgan_259,morgan_260,morgan_261,morgan_262,morgan_263,morgan_264,morgan_265,morgan_266,morgan_267,morgan_268,morgan_269,morgan_270,morgan_271,morgan_272,morgan_273,morgan_274,morgan_275,morgan_276,morgan_277,morgan_278,morgan_279,morgan_280,morgan_281,morgan_282,morgan_283,morgan_284,morgan_285,morgan_286,morgan_287,morgan_288,morgan_289,morgan_290,morgan_291,morgan_292,morgan_293,morgan_294,morgan_295,morgan_296,morgan_297,morgan_298,morgan_299,morgan_300,morgan_301,morgan_302,morgan_303,morgan_304,morgan_305,morgan_306,morgan_307,morgan_308,morgan_309,morgan_310,morgan_311,morgan_312,morgan_313,morgan_314,morgan_315,morgan_316,morgan_317,morgan_318,morgan_319,morgan_320,morgan_321,morgan_322,morgan_323,morgan_324,morgan_325,morgan_326,morgan_327,morgan_328,morgan_329,morgan_330,morgan_331,morgan_332,morgan_333,morgan_334,morgan_335,morgan_336,morgan_337,morgan_338,morgan_339,morgan_340,morgan_341,morgan_342,morgan_343,morgan_344,morgan_345,morgan_346,morgan_347,morgan_348,morgan_349,morgan_350,morgan_351,morgan_352,morgan_353,morgan_354,morgan_355,morgan_356,morgan_357,morgan_358,morgan_359,morgan_360,morgan_361,morgan_362,morgan_363,morgan_364,morgan_365,morgan_366,morgan_367,morgan_368,morgan_369,morgan_370,morgan_371,morgan_372,morgan_373,morgan_374,morgan_375,morgan_376,morgan_377,morgan_378,morgan_379,morgan_380,morgan_381,morgan_382,morgan_383,morgan_384,morgan_385,morgan_386,morgan_387,morgan_388,morgan_389,morgan_390,morgan_391,morgan_392,morgan_393,morgan_394,morgan_395,morgan_396,morgan_397,morgan_398,morgan_399,morgan_400,morgan_401,morgan_402,morgan_403,morgan_404,morgan_405,morgan_406,morgan_407,morgan_408,morgan_409,morgan_410,morgan_411,morgan_412,morgan_413,morgan_414,morgan_415,morgan_416,morgan_417,morgan_418,morgan_419,morgan_420,morgan_421,morgan_422,morgan_423,morgan_424,morgan_425,morgan_426,morgan_427,morgan_428,morgan_429,morgan_430,morgan_431,morgan_432,morgan_433,morgan_434,morgan_435,morgan_436,morgan_437,morgan_438,morgan_439,morgan_440,morgan_441,morgan_442,morgan_443,morgan_444,morgan_445,morgan_446,morgan_447,morgan_448,morgan_449,morgan_450,morgan_451,morgan_452,morgan_453,morgan_454,morgan_455,morgan_456,morgan_457,morgan_458,morgan_459,morgan_460,morgan_461,morgan_462,morgan_463,morgan_464,morgan_465,morgan_466,morgan_467,morgan_468,morgan_469,morgan_470,morgan_471,morgan_472,morgan_473,morgan_474,morgan_475,morgan_476,morgan_477,morgan_478,morgan_479,morgan_480,morgan_481,morgan_482,morgan_483,morgan_484,morgan_485,morgan_486,morgan_487,morgan_488,morgan_489,morgan_490,morgan_491,morgan_492,morgan_493,morgan_494,morgan_495,morgan_496,morgan_497,...,morgan_1549,morgan_1550,morgan_1551,morgan_1552,morgan_1553,morgan_1554,morgan_1555,morgan_1556,morgan_1557,morgan_1558,morgan_1559,morgan_1560,morgan_1561,morgan_1562,morgan_1563,morgan_1564,morgan_1565,morgan_1566,morgan_1567,morgan_1568,morgan_1569,morgan_1570,morgan_1571,morgan_1572,morgan_1573,morgan_1574,morgan_1575,morgan_1576,morgan_1577,morgan_1578,morgan_1579,morgan_1580,morgan_1581,morgan_1582,morgan_1583,morgan_1584,morgan_1585,morgan_1586,morgan_1587,morgan_1588,morgan_1589,morgan_1590,morgan_1591,morgan_1592,morgan_1593,morgan_1594,morgan_1595,morgan_1596,morgan_1597,morgan_1598,morgan_1599,morgan_1600,morgan_1601,morgan_1602,morgan_1603,morgan_1604,morgan_1605,morgan_1606,morgan_1607,morgan_1608,morgan_1609,morgan_1610,morgan_1611,morgan_1612,morgan_1613,morgan_1614,morgan_1615,morgan_1616,morgan_1617,morgan_1618,morgan_1619,morgan_1620,morgan_1621,morgan_1622,morgan_1623,morgan_1624,morgan_1625,morgan_1626,morgan_1627,morgan_1628,morgan_1629,morgan_1630,morgan_1631,morgan_1632,morgan_1633,morgan_1634,morgan_1635,morgan_1636,morgan_1637,morgan_1638,morgan_1639,morgan_1640,morgan_1641,morgan_1642,morgan_1643,morgan_1644,morgan_1645,morgan_1646,morgan_1647,morgan_1648,morgan_1649,morgan_1650,morgan_1651,morgan_1652,morgan_1653,morgan_1654,morgan_1655,morgan_1656,morgan_1657,morgan_1658,morgan_1659,morgan_1660,morgan_1661,morgan_1662,morgan_1663,morgan_1664,morgan_1665,morgan_1666,morgan_1667,morgan_1668,morgan_1669,morgan_1670,morgan_1671,morgan_1672,morgan_1673,morgan_1674,morgan_1675,morgan_1676,morgan_1677,morgan_1678,morgan_1679,morgan_1680,morgan_1681,morgan_1682,morgan_1683,morgan_1684,morgan_1685,morgan_1686,morgan_1687,morgan_1688,morgan_1689,morgan_1690,morgan_1691,morgan_1692,morgan_1693,morgan_1694,morgan_1695,morgan_1696,morgan_1697,morgan_1698,morgan_1699,morgan_1700,morgan_1701,morgan_1702,morgan_1703,morgan_1704,morgan_1705,morgan_1706,morgan_1707,morgan_1708,morgan_1709,morgan_1710,morgan_1711,morgan_1712,morgan_1713,morgan_1714,morgan_1715,morgan_1716,morgan_1717,morgan_1718,morgan_1719,morgan_1720,morgan_1721,morgan_1722,morgan_1723,morgan_1724,morgan_1725,morgan_1726,morgan_1727,morgan_1728,morgan_1729,morgan_1730,morgan_1731,morgan_1732,morgan_1733,morgan_1734,morgan_1735,morgan_1736,morgan_1737,morgan_1738,morgan_1739,morgan_1740,morgan_1741,morgan_1742,morgan_1743,morgan_1744,morgan_1745,morgan_1746,morgan_1747,morgan_1748,morgan_1749,morgan_1750,morgan_1751,morgan_1752,morgan_1753,morgan_1754,morgan_1755,morgan_1756,morgan_1757,morgan_1758,morgan_1759,morgan_1760,morgan_1761,morgan_1762,morgan_1763,morgan_1764,morgan_1765,morgan_1766,morgan_1767,morgan_1768,morgan_1769,morgan_1770,morgan_1771,morgan_1772,morgan_1773,morgan_1774,morgan_1775,morgan_1776,morgan_1777,morgan_1778,morgan_1779,morgan_1780,morgan_1781,morgan_1782,morgan_1783,morgan_1784,morgan_1785,morgan_1786,morgan_1787,morgan_1788,morgan_1789,morgan_1790,morgan_1791,morgan_1792,morgan_1793,morgan_1794,morgan_1795,morgan_1796,morgan_1797,morgan_1798,morgan_1799,morgan_1800,morgan_1801,morgan_1802,morgan_1803,morgan_1804,morgan_1805,morgan_1806,morgan_1807,morgan_1808,morgan_1809,morgan_1810,morgan_1811,morgan_1812,morgan_1813,morgan_1814,morgan_1815,morgan_1816,morgan_1817,morgan_1818,morgan_1819,morgan_1820,morgan_1821,morgan_1822,morgan_1823,morgan_1824,morgan_1825,morgan_1826,morgan_1827,morgan_1828,morgan_1829,morgan_1830,morgan_1831,morgan_1832,morgan_1833,morgan_1834,morgan_1835,morgan_1836,morgan_1837,morgan_1838,morgan_1839,morgan_1840,morgan_1841,morgan_1842,morgan_1843,morgan_1844,morgan_1845,morgan_1846,morgan_1847,morgan_1848,morgan_1849,morgan_1850,morgan_1851,morgan_1852,morgan_1853,morgan_1854,morgan_1855,morgan_1856,morgan_1857,morgan_1858,morgan_1859,morgan_1860,morgan_1861,morgan_1862,morgan_1863,morgan_1864,morgan_1865,morgan_1866,morgan_1867,morgan_1868,morgan_1869,morgan_1870,morgan_1871,morgan_1872,morgan_1873,morgan_1874,morgan_1875,morgan_1876,morgan_1877,morgan_1878,morgan_1879,morgan_1880,morgan_1881,morgan_1882,morgan_1883,morgan_1884,morgan_1885,morgan_1886,morgan_1887,morgan_1888,morgan_1889,morgan_1890,morgan_1891,morgan_1892,morgan_1893,morgan_1894,morgan_1895,morgan_1896,morgan_1897,morgan_1898,morgan_1899,morgan_1900,morgan_1901,morgan_1902,morgan_1903,morgan_1904,morgan_1905,morgan_1906,morgan_1907,morgan_1908,morgan_1909,morgan_1910,morgan_1911,morgan_1912,morgan_1913,morgan_1914,morgan_1915,morgan_1916,morgan_1917,morgan_1918,morgan_1919,morgan_1920,morgan_1921,morgan_1922,morgan_1923,morgan_1924,morgan_1925,morgan_1926,morgan_1927,morgan_1928,morgan_1929,morgan_1930,morgan_1931,morgan_1932,morgan_1933,morgan_1934,morgan_1935,morgan_1936,morgan_1937,morgan_1938,morgan_1939,morgan_1940,morgan_1941,morgan_1942,morgan_1943,morgan_1944,morgan_1945,morgan_1946,morgan_1947,morgan_1948,morgan_1949,morgan_1950,morgan_1951,morgan_1952,morgan_1953,morgan_1954,morgan_1955,morgan_1956,morgan_1957,morgan_1958,morgan_1959,morgan_1960,morgan_1961,morgan_1962,morgan_1963,morgan_1964,morgan_1965,morgan_1966,morgan_1967,morgan_1968,morgan_1969,morgan_1970,morgan_1971,morgan_1972,morgan_1973,morgan_1974,morgan_1975,morgan_1976,morgan_1977,morgan_1978,morgan_1979,morgan_1980,morgan_1981,morgan_1982,morgan_1983,morgan_1984,morgan_1985,morgan_1986,morgan_1987,morgan_1988,morgan_1989,morgan_1990,morgan_1991,morgan_1992,morgan_1993,morgan_1994,morgan_1995,morgan_1996,morgan_1997,morgan_1998,morgan_1999,morgan_2000,morgan_2001,morgan_2002,morgan_2003,morgan_2004,morgan_2005,morgan_2006,morgan_2007,morgan_2008,morgan_2009,morgan_2010,morgan_2011,morgan_2012,morgan_2013,morgan_2014,morgan_2015,morgan_2016,morgan_2017,morgan_2018,morgan_2019,morgan_2020,morgan_2021,morgan_2022,morgan_2023,morgan_2024,morgan_2025,morgan_2026,morgan_2027,morgan_2028,morgan_2029,morgan_2030,morgan_2031,morgan_2032,morgan_2033,morgan_2034,morgan_2035,morgan_2036,morgan_2037,morgan_2038,morgan_2039,morgan_2040,morgan_2041,morgan_2042,morgan_2043,morgan_2044,morgan_2045,morgan_2046,morgan_2047
0,US_1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,US_2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,US_3,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,US_4,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,US_5,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
717,paper_34,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
718,paper_35,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
719,paper_36,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
720,paper_37,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Features from protein sequence

In [None]:
#| export
def esm_embeddings(df: pd.DataFrame, 
                   seq_colname: str, #The name of the column containing the sequences.
                   model_name: str = "esm2_t33_650M_UR50D", #The name of the ESM model to use for the embeddings.
                  ) -> pd.DataFrame:
    """
    Extract ESM embeddings from a DataFrame using the specified ESM model and feature extraction function.
    """
    
    # Initialize distributed world with world_size 1
    if not torch.distributed.is_initialized():
        url = "tcp://localhost:23456"
        torch.distributed.init_process_group(backend="nccl", init_method=url, world_size=1, rank=0)
    
    #get number of repr layers
    match = re.search(r'_t(\d+)_', model_name)
    number = int(match.group(1))
    print(f"repr_layers number for model {model_name} is {number}.")
    print("You can also choose other esm2 models:",
          "\nesm2_t48_15B_UR50D\nesm2_t36_3B_UR50D\nesm2_t33_650M_UR50D\nesm2_t30_150M_UR50D\nesm2_t12_35M_UR50D\nesm2_t6_8M_UR50D\n")

    # Download model data from the hub
    model_data, regression_data = esm.pretrained._download_model_and_regression_data(model_name)

    # Initialize the model with FSDP wrapper
    fsdp_params = dict(
        mixed_precision=True,
        flatten_parameters=True,
        state_dict_device=torch.device("cpu"),  # reduce GPU mem usage
        cpu_offload=True,  # enable cpu offloading
    )

    with enable_wrap(wrapper_cls=FSDP, **fsdp_params):
        model, vocab = esm.pretrained.load_model_and_alphabet_core(
            model_name, model_data, regression_data
        )
        batch_converter = vocab.get_batch_converter()
        model.eval()

        # Wrap each layer in FSDP separately
        for name, child in model.named_children():
            if name == "layers":
                for layer_name, layer in child.named_children():
                    wrapped_layer = wrap(layer)
                    setattr(child, layer_name, wrapped_layer)
        model = wrap(model)

        # Define the feature extraction function
        def get_feature(r, colname=seq_colname) -> np.ndarray:
            data = [('protein', r[colname])]
            labels, strs, tokens = batch_converter(data)
            with torch.no_grad():
                results = model(tokens.cuda(), repr_layers=[number], return_contacts=False)
            rpr = results["representations"][number].squeeze()
            rpr = rpr[1 : len(r[colname]) + 1].mean(0).detach().cpu().numpy()

            del results, labels, strs, tokens, data #especially need to delete those on cuda: tokens, results
            gc.collect()

            return rpr
        
        # Apply the feature extraction function to each row in the DataFrame
        series = df.progress_apply(get_feature, axis=1)
        df_feature = pd.DataFrame(series.tolist())

        return df_feature

In [None]:
show_doc(esm_embeddings)

---

[source](https://github.com/sky1ove/tools/blob/main/tools/esm.py#L15){target="_blank" style="float:right; font-size:smaller"}

### esm_embeddings

>      esm_embeddings (df:pandas.core.frame.DataFrame, seq_colname:str,
>                      model_name:str='esm2_t33_650M_UR50D')

Extract ESM embeddings from a DataFrame using the specified ESM model and feature extraction function.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| df | DataFrame |  |  |
| seq_colname | str |  | The name of the column containing the sequences. |
| model_name | str | esm2_t33_650M_UR50D | The name of the ESM model to use for the embeddings. |
| **Returns** | **DataFrame** |  |  |

In [None]:
df = Data.get_kseq()

In [None]:
df

Unnamed: 0,ID,WT_sequence,g12d_seq,g12c_seq
0,kras_human,MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQRVEDAFYTLVREIRQYRLKKISKEEKTPGCVKIKKCIIM,MTEYKLVVVGADGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQRVEDAFYTLVREIRQYRLKKISKEEKTPGCVKIKKCIIM,MTEYKLVVVGACGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQRVEDAFYTLVREIRQYRLKKISKEEKTPGCVKIKKCIIM
1,kras_human_isoform2b,MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQGVDDAFYTLVREIRKHKEKMSKDGKKKKKKSKTKCVIM,MTEYKLVVVGADGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQGVDDAFYTLVREIRKHKEKMSKDGKKKKKKSKTKCVIM,MTEYKLVVVGACGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQGVDDAFYTLVREIRKHKEKMSKDGKKKKKKSKTKCVIM


In [None]:
esm_embeddings(df,'g12d_seq')

repr_layers number for model esm2_t36_3B_UR50D is 36.
You can also choose other esm2 models: 
esm2_t48_15B_UR50D
esm2_t36_3B_UR50D
esm2_t33_650M_UR50D
esm2_t30_150M_UR50D
esm2_t12_35M_UR50D
esm2_t6_8M_UR50D



  0%|          | 0/2 [00:00<?, ?it/s]

## Features from models

In [None]:
#| export


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()