#### Importing Required Libraries: 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import pymatgen.core as pg
from sklearn.feature_selection import VarianceThreshold
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.model_selection import train_test_split

#### Importing Training Data (various Features of Alloys)

In [2]:
df = pd.read_excel('Data/Training/Features.xlsx') ## Import the training dataset
df

Unnamed: 0,Compound,Temperature(K),mean_EffectiveCoordination,var_EffectiveCoordination,min_EffectiveCoordination,max_EffectiveCoordination,var_MeanBondLength,min_MeanBondLength,max_MeanBondLength,mean_BondLengthVariation,...,max_SpaceGroupNumber,min_SpaceGroupNumber,most_SpaceGroupNumber,frac_sValence,frac_pValence,frac_dValence,frac_fValence,CanFormIonic,MaxIonicChar,MeanIonicChar
0,AlAs,152.91209,5.720212,3.108624e-15,5.720212,5.720212,2.775558e-16,1.000000,1.000000,0.160958,...,29.5,225,166.0,195.500000,0.222222,0.222222,0.555556,0,1.000000,0.078014
1,AlP,140.72993,5.368858,3.150000e-10,5.368858,5.368858,1.660000e-11,1.000000,1.000000,0.146248,...,225.0,2,113.5,0.500000,0.500000,0.000000,0.000000,1,0.080661,0.040330
2,AlP (F-43m),162.26119,5.720212,1.330000e-15,5.720212,5.720212,5.000000e-16,1.000000,1.000000,0.160958,...,225.0,166,195.5,0.222222,0.222222,0.555556,0.000000,1,0.047247,0.023624
3,AlSb,148.62687,5.720212,0.000000e+00,5.720212,5.720212,2.780000e-16,1.000000,1.000000,0.160958,...,166.0,2,166.0,0.307692,0.307692,0.384615,0.000000,1,0.005609,0.002627
4,B2AsP,142.34014,5.731569,1.021271e-01,5.541343,5.935824,1.160746e-02,0.977966,1.023215,0.160397,...,229.0,166,229.0,0.233333,0.100000,0.666667,0.000000,0,0.285663,0.131096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,TePb,147.79412,6.000000,4.440000e-16,6.000000,6.000000,0.000000e+00,1.000000,1.000000,0.000000,...,166.0,2,166.0,0.307692,0.307692,0.384615,0.000000,1,0.005609,0.002627
115,Ti2SnC,100.70368,9.383733,1.691867e+00,6.000000,11.858700,9.280932e-02,0.814381,1.163322,0.089257,...,194.0,141,194.0,0.307692,0.153846,0.538462,0.000000,0,0.225103,0.077479
116,TlBr,160.52725,11.956922,8.880000e-16,11.956922,11.956922,1.670000e-16,1.000000,1.000000,0.051982,...,166.0,2,84.0,0.500000,0.500000,0.000000,0.000000,1,0.005609,0.002805
117,TlCl,160.26508,11.956922,8.880000e-16,11.956922,11.956922,2.220000e-16,1.000000,1.000000,0.051982,...,166.0,64,115.0,0.142857,0.142857,0.714286,0.000000,1,0.033646,0.016823


In [3]:
print('Shape of DataFrame is : ',df.shape)
print('Null entries in DataFrame are : ', df.isnull().sum().sum())
df.dropna(inplace=True)
print('After dropping null values, shape of DataFrame becomes : ',df.shape)

Shape of DataFrame is :  (119, 273)
Null entries in DataFrame are :  1
After dropping null values, shape of DataFrame becomes :  (118, 273)


#### Importing LTC vs Temperature Data:

In [4]:
Y = pd.read_excel('Data/Training/Initial Dataset.xlsx')
Y

Unnamed: 0,Compound,LTC,Temperature(K)
0,AlAs,266.67806,152.91209
1,AlAs,220.88175,159.37363
2,AlAs,175.08544,185.21978
3,AlAs,129.28913,211.06593
4,AlAs,106.39098,243.37363
...,...,...,...
2140,TlI,0.05528,751.19048
2141,TlI,0.04856,818.19728
2142,TlI,0.04184,885.20408
2143,TlI,0.04184,952.21088


#### Making combined training dataset: Merging LTC data at various temperature with features of alloys

In [5]:
df.drop(['Temperature(K)'],axis = 1, inplace=True)
data_full = pd.merge(df,Y,on='Compound')
data_full

Unnamed: 0,Compound,mean_EffectiveCoordination,var_EffectiveCoordination,min_EffectiveCoordination,max_EffectiveCoordination,var_MeanBondLength,min_MeanBondLength,max_MeanBondLength,mean_BondLengthVariation,var_BondLengthVariation,...,most_SpaceGroupNumber,frac_sValence,frac_pValence,frac_dValence,frac_fValence,CanFormIonic,MaxIonicChar,MeanIonicChar,LTC,Temperature(K)
0,AlP,5.368858,3.150000e-10,5.368858,5.368858,1.660000e-11,1.0,1.0,0.146248,2.090000e-11,...,113.5,0.500000,0.500000,0.000000,0.0,1,0.080661,0.040330,181.81181,140.72993
1,AlP,5.368858,3.150000e-10,5.368858,5.368858,1.660000e-11,1.0,1.0,0.146248,2.090000e-11,...,113.5,0.500000,0.500000,0.000000,0.0,1,0.080661,0.040330,154.58458,160.53180
2,AlP,5.368858,3.150000e-10,5.368858,5.368858,1.660000e-11,1.0,1.0,0.146248,2.090000e-11,...,113.5,0.500000,0.500000,0.000000,0.0,1,0.080661,0.040330,127.35736,167.13243
3,AlP,5.368858,3.150000e-10,5.368858,5.368858,1.660000e-11,1.0,1.0,0.146248,2.090000e-11,...,113.5,0.500000,0.500000,0.000000,0.0,1,0.080661,0.040330,108.29830,200.13556
4,AlP,5.368858,3.150000e-10,5.368858,5.368858,1.660000e-11,1.0,1.0,0.146248,2.090000e-11,...,113.5,0.500000,0.500000,0.000000,0.0,1,0.080661,0.040330,89.23924,239.73931
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2123,TlI,11.956922,8.880000e-16,11.956922,11.956922,2.220000e-16,1.0,1.0,0.051982,9.710000e-17,...,33.0,0.222222,0.222222,0.555556,0.0,1,0.035456,0.017728,0.05528,751.19048
2124,TlI,11.956922,8.880000e-16,11.956922,11.956922,2.220000e-16,1.0,1.0,0.051982,9.710000e-17,...,33.0,0.222222,0.222222,0.555556,0.0,1,0.035456,0.017728,0.04856,818.19728
2125,TlI,11.956922,8.880000e-16,11.956922,11.956922,2.220000e-16,1.0,1.0,0.051982,9.710000e-17,...,33.0,0.222222,0.222222,0.555556,0.0,1,0.035456,0.017728,0.04184,885.20408
2126,TlI,11.956922,8.880000e-16,11.956922,11.956922,2.220000e-16,1.0,1.0,0.051982,9.710000e-17,...,33.0,0.222222,0.222222,0.555556,0.0,1,0.035456,0.017728,0.04184,952.21088


#### Splitting data into target property and features (input features and output property)

In [6]:
y = data_full['LTC']
Before_Features = data_full.drop(['LTC','Compound'],axis = 1)
print('Shape of dataset before feature selection is: ',Before_Features.shape)
Before_Features

Shape of dataset before feature selection is:  (2128, 272)


Unnamed: 0,mean_EffectiveCoordination,var_EffectiveCoordination,min_EffectiveCoordination,max_EffectiveCoordination,var_MeanBondLength,min_MeanBondLength,max_MeanBondLength,mean_BondLengthVariation,var_BondLengthVariation,min_BondLengthVariation,...,min_SpaceGroupNumber,most_SpaceGroupNumber,frac_sValence,frac_pValence,frac_dValence,frac_fValence,CanFormIonic,MaxIonicChar,MeanIonicChar,Temperature(K)
0,5.368858,3.150000e-10,5.368858,5.368858,1.660000e-11,1.0,1.0,0.146248,2.090000e-11,0.146248,...,2,113.5,0.500000,0.500000,0.000000,0.0,1,0.080661,0.040330,140.72993
1,5.368858,3.150000e-10,5.368858,5.368858,1.660000e-11,1.0,1.0,0.146248,2.090000e-11,0.146248,...,2,113.5,0.500000,0.500000,0.000000,0.0,1,0.080661,0.040330,160.53180
2,5.368858,3.150000e-10,5.368858,5.368858,1.660000e-11,1.0,1.0,0.146248,2.090000e-11,0.146248,...,2,113.5,0.500000,0.500000,0.000000,0.0,1,0.080661,0.040330,167.13243
3,5.368858,3.150000e-10,5.368858,5.368858,1.660000e-11,1.0,1.0,0.146248,2.090000e-11,0.146248,...,2,113.5,0.500000,0.500000,0.000000,0.0,1,0.080661,0.040330,200.13556
4,5.368858,3.150000e-10,5.368858,5.368858,1.660000e-11,1.0,1.0,0.146248,2.090000e-11,0.146248,...,2,113.5,0.500000,0.500000,0.000000,0.0,1,0.080661,0.040330,239.73931
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2123,11.956922,8.880000e-16,11.956922,11.956922,2.220000e-16,1.0,1.0,0.051982,9.710000e-17,0.051982,...,2,33.0,0.222222,0.222222,0.555556,0.0,1,0.035456,0.017728,751.19048
2124,11.956922,8.880000e-16,11.956922,11.956922,2.220000e-16,1.0,1.0,0.051982,9.710000e-17,0.051982,...,2,33.0,0.222222,0.222222,0.555556,0.0,1,0.035456,0.017728,818.19728
2125,11.956922,8.880000e-16,11.956922,11.956922,2.220000e-16,1.0,1.0,0.051982,9.710000e-17,0.051982,...,2,33.0,0.222222,0.222222,0.555556,0.0,1,0.035456,0.017728,885.20408
2126,11.956922,8.880000e-16,11.956922,11.956922,2.220000e-16,1.0,1.0,0.051982,9.710000e-17,0.051982,...,2,33.0,0.222222,0.222222,0.555556,0.0,1,0.035456,0.017728,952.21088


#### Out of 272 features available in dataset, only important features need to be selected.Columns with constant values need to be dropped first. 

In [7]:
## VT Analysis
from sklearn.feature_selection import VarianceThreshold
var_thres = VarianceThreshold(threshold=.8*(1-0.8))
var_thres.fit(Before_Features)
var_thres.get_support()
constant_columns = [column for column in Before_Features.columns if column not in Before_Features.columns[var_thres.get_support()]]
After_Variance = Before_Features.drop(constant_columns,axis=1)
print('Shape data after feature selection is: ',After_Variance.shape)
After_Variance

Shape data after feature selection is:  (2128, 211)


Unnamed: 0,mean_EffectiveCoordination,var_EffectiveCoordination,min_EffectiveCoordination,max_EffectiveCoordination,mean_NeighDiff_shell1_Number,var_NeighDiff_shell1_Number,min_NeighDiff_shell1_Number,max_NeighDiff_shell1_Number,range_NeighDiff_shell1_Number,mean_NeighDiff_shell1_MendeleevNumber,...,maxdiff_GSmagmom,max_GSmagmom,mean_SpaceGroupNumber,maxdiff_SpaceGroupNumber,dev_SpaceGroupNumber,max_SpaceGroupNumber,min_SpaceGroupNumber,most_SpaceGroupNumber,CanFormIonic,Temperature(K)
0,5.368858,3.150000e-10,5.368858,5.368858,1.773368,5.840000e-11,1.773368,1.773368,1.170000e-10,8.866839,...,0.0,0.0,113.5,223.0,111.5,225.0,2,113.5,1,140.72993
1,5.368858,3.150000e-10,5.368858,5.368858,1.773368,5.840000e-11,1.773368,1.773368,1.170000e-10,8.866839,...,0.0,0.0,113.5,223.0,111.5,225.0,2,113.5,1,160.53180
2,5.368858,3.150000e-10,5.368858,5.368858,1.773368,5.840000e-11,1.773368,1.773368,1.170000e-10,8.866839,...,0.0,0.0,113.5,223.0,111.5,225.0,2,113.5,1,167.13243
3,5.368858,3.150000e-10,5.368858,5.368858,1.773368,5.840000e-11,1.773368,1.773368,1.170000e-10,8.866839,...,0.0,0.0,113.5,223.0,111.5,225.0,2,113.5,1,200.13556
4,5.368858,3.150000e-10,5.368858,5.368858,1.773368,5.840000e-11,1.773368,1.773368,1.170000e-10,8.866839,...,0.0,0.0,113.5,223.0,111.5,225.0,2,113.5,1,239.73931
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2123,11.956922,8.880000e-16,11.956922,11.956922,21.727741,1.780000e-15,21.727741,21.727741,3.550000e-15,15.519815,...,0.0,0.0,33.0,62.0,31.0,64.0,2,33.0,1,751.19048
2124,11.956922,8.880000e-16,11.956922,11.956922,21.727741,1.780000e-15,21.727741,21.727741,3.550000e-15,15.519815,...,0.0,0.0,33.0,62.0,31.0,64.0,2,33.0,1,818.19728
2125,11.956922,8.880000e-16,11.956922,11.956922,21.727741,1.780000e-15,21.727741,21.727741,3.550000e-15,15.519815,...,0.0,0.0,33.0,62.0,31.0,64.0,2,33.0,1,885.20408
2126,11.956922,8.880000e-16,11.956922,11.956922,21.727741,1.780000e-15,21.727741,21.727741,3.550000e-15,15.519815,...,0.0,0.0,33.0,62.0,31.0,64.0,2,33.0,1,952.21088


#### Now, columns with High correlation coefficient need to be dropped. 

In [8]:
## PC Analysis
import matplotlib.pyplot as plt 
import seaborn as sns
def correlation(dataset, threshold):
    col_corr = set()  # Set of all the names of correlated columns
    corr_matrix = dataset.corr()
    for i in range(len(corr_matrix.columns)):
        for j in range(i):
            if abs(corr_matrix.iloc[i, j]) > threshold: # we are interested in absolute coeff value
                colname = corr_matrix.columns[i]  # getting the name of column
                col_corr.add(colname)
    af_corr = dataset.drop(col_corr,axis=1)
    return af_corr

af_both2 = correlation(After_Variance, 0.80)
print('Now,shape of input features is: ',af_both2.shape)
af_both2

Now,shape of input features is:  (2128, 57)


Unnamed: 0,mean_EffectiveCoordination,var_EffectiveCoordination,min_EffectiveCoordination,mean_NeighDiff_shell1_Number,var_NeighDiff_shell1_Number,mean_NeighDiff_shell1_MendeleevNumber,var_NeighDiff_shell1_MendeleevNumber,mean_NeighDiff_shell1_MeltingT,var_NeighDiff_shell1_MeltingT,mean_NeighDiff_shell1_CovalentRadius,...,max_GSvolume_pa,min_GSvolume_pa,most_GSvolume_pa,mean_GSbandgap,maxdiff_GSmagmom,mean_SpaceGroupNumber,maxdiff_SpaceGroupNumber,max_SpaceGroupNumber,CanFormIonic,Temperature(K)
0,5.368858,3.150000e-10,5.368858,1.773368,5.840000e-11,8.866839,2.920000e-10,546.348032,1.800000e-08,12.413575,...,22.570238,16.4800,19.525119,0.8125,0.0,113.5,223.0,225.0,1,140.72993
1,5.368858,3.150000e-10,5.368858,1.773368,5.840000e-11,8.866839,2.920000e-10,546.348032,1.800000e-08,12.413575,...,22.570238,16.4800,19.525119,0.8125,0.0,113.5,223.0,225.0,1,160.53180
2,5.368858,3.150000e-10,5.368858,1.773368,5.840000e-11,8.866839,2.920000e-10,546.348032,1.800000e-08,12.413575,...,22.570238,16.4800,19.525119,0.8125,0.0,113.5,223.0,225.0,1,167.13243
3,5.368858,3.150000e-10,5.368858,1.773368,5.840000e-11,8.866839,2.920000e-10,546.348032,1.800000e-08,12.413575,...,22.570238,16.4800,19.525119,0.8125,0.0,113.5,223.0,225.0,1,200.13556
4,5.368858,3.150000e-10,5.368858,1.773368,5.840000e-11,8.866839,2.920000e-10,546.348032,1.800000e-08,12.413575,...,22.570238,16.4800,19.525119,0.8125,0.0,113.5,223.0,225.0,1,239.73931
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2123,11.956922,8.880000e-16,11.956922,21.727741,1.780000e-15,15.519815,0.000000e+00,147.554643,1.420000e-14,4.655945,...,22.570238,18.8575,20.713869,0.8125,0.0,33.0,62.0,64.0,1,751.19048
2124,11.956922,8.880000e-16,11.956922,21.727741,1.780000e-15,15.519815,0.000000e+00,147.554643,1.420000e-14,4.655945,...,22.570238,18.8575,20.713869,0.8125,0.0,33.0,62.0,64.0,1,818.19728
2125,11.956922,8.880000e-16,11.956922,21.727741,1.780000e-15,15.519815,0.000000e+00,147.554643,1.420000e-14,4.655945,...,22.570238,18.8575,20.713869,0.8125,0.0,33.0,62.0,64.0,1,885.20408
2126,11.956922,8.880000e-16,11.956922,21.727741,1.780000e-15,15.519815,0.000000e+00,147.554643,1.420000e-14,4.655945,...,22.570238,18.8575,20.713869,0.8125,0.0,33.0,62.0,64.0,1,952.21088


In [40]:
total_data, last_data,total_y,last_y = af_both2[:1900], af_both2[1900:],y[:1900],y[1900:]

#### MinMax Scaling need to be done to speed up convergence of training of Model

In [41]:
##Scaling
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(total_data)
df_scaled = pd.DataFrame(df_scaled)
df_scaled.columns =af_both2.columns

In [42]:
df_scaled

Unnamed: 0,mean_EffectiveCoordination,var_EffectiveCoordination,min_EffectiveCoordination,mean_NeighDiff_shell1_Number,var_NeighDiff_shell1_Number,mean_NeighDiff_shell1_MendeleevNumber,var_NeighDiff_shell1_MendeleevNumber,mean_NeighDiff_shell1_MeltingT,var_NeighDiff_shell1_MeltingT,mean_NeighDiff_shell1_CovalentRadius,...,max_GSvolume_pa,min_GSvolume_pa,most_GSvolume_pa,mean_GSbandgap,maxdiff_GSmagmom,mean_SpaceGroupNumber,maxdiff_SpaceGroupNumber,max_SpaceGroupNumber,CanFormIonic,Temperature(K)
0,0.002126,1.339281e-10,0.011392,0.004632,3.136153e-12,0.088147,1.286175e-11,0.248160,2.369582e-11,0.092292,...,0.060268,0.372210,0.113752,0.281727,0.0,0.417820,0.982379,0.975758,1.0,0.050642
1,0.002126,1.339281e-10,0.011392,0.004632,3.136153e-12,0.088147,1.286175e-11,0.248160,2.369582e-11,0.092292,...,0.060268,0.372210,0.113752,0.281727,0.0,0.417820,0.982379,0.975758,1.0,0.068331
2,0.002126,1.339281e-10,0.011392,0.004632,3.136153e-12,0.088147,1.286175e-11,0.248160,2.369582e-11,0.092292,...,0.060268,0.372210,0.113752,0.281727,0.0,0.417820,0.982379,0.975758,1.0,0.074227
3,0.002126,1.339281e-10,0.011392,0.004632,3.136153e-12,0.088147,1.286175e-11,0.248160,2.369582e-11,0.092292,...,0.060268,0.372210,0.113752,0.281727,0.0,0.417820,0.982379,0.975758,1.0,0.103709
4,0.002126,1.339281e-10,0.011392,0.004632,3.136153e-12,0.088147,1.286175e-11,0.248160,2.369582e-11,0.092292,...,0.060268,0.372210,0.113752,0.281727,0.0,0.417820,0.982379,0.975758,1.0,0.139087
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1895,0.055344,2.074823e-15,0.064117,0.440489,6.658956e-16,0.009109,3.911382e-17,0.449826,6.740144e-16,0.193119,...,0.183218,0.890008,0.254077,0.091936,0.0,0.630252,0.110132,0.618182,1.0,0.356662
1896,0.055344,2.074823e-15,0.064117,0.440489,6.658956e-16,0.009109,3.911382e-17,0.449826,6.740144e-16,0.193119,...,0.183218,0.890008,0.254077,0.091936,0.0,0.630252,0.110132,0.618182,1.0,0.404543
1897,0.055344,2.074823e-15,0.064117,0.440489,6.658956e-16,0.009109,3.911382e-17,0.449826,6.740144e-16,0.193119,...,0.183218,0.890008,0.254077,0.091936,0.0,0.630252,0.110132,0.618182,1.0,0.452424
1898,0.055344,2.074823e-15,0.064117,0.440489,6.658956e-16,0.009109,3.911382e-17,0.449826,6.740144e-16,0.193119,...,0.183218,0.890008,0.254077,0.091936,0.0,0.630252,0.110132,0.618182,1.0,0.506290


#### Splitting data into training and testing: 

In [43]:
# converting input data into numpy array format.
input_features = df_scaled.to_numpy()
output_property = total_y.to_numpy()

##Vizualizing the split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(input_features, output_property, test_size=0.2, random_state=90)
# sns.histplot(data=y,bins=20,kde=True,legend=True)
# plt.show()
# sns.histplot(data=y_train,color = "g",bins=20,kde=True)
# plt.show()
# sns.histplot(data=y_test,color="r",bins=20,kde=True)
# plt.show();

#### Now, Data is ready to model preparation and validation:

In [44]:
##RF Algorithm
from sklearn.ensemble import RandomForestRegressor
rf_reg = RandomForestRegressor()
rf_reg.fit(X_train,y_train)
print('Training Accuracy: {}'.format(rf_reg.score(X_train,y_train)))
print('Test Accuracy: {}'.format(rf_reg.score(X_test,y_test)))

Training Accuracy: 0.9899521784631229
Test Accuracy: 0.9354591834723949


#### Cross-Validation (K-Fold) is performed to check model performance on different random sampling.

In [14]:
##Repeated K-fold

# scores = []
# counter = 0
# from sklearn.model_selection import RepeatedKFold
# fold = RepeatedKFold(n_splits = 10,n_repeats=10, random_state=30)
# for train_index, test_index in fold.split(af_both2):
#     counter = counter + 1
#     X_train, X_test = af_both2.iloc[train_index], af_both2.iloc[test_index]
#     y_train, y_test = y.iloc[train_index], y.iloc[test_index]
#     rf_reg.fit(X_train,y_train)
#     scores.append(rf_reg.score(X_test,y_test))
# print(scores)

In [55]:
rf_reg.predict(total_data[:20].to_numpy())

array([191.3799605, 191.3799605, 191.3799605, 191.3799605, 191.3799605,
       191.3799605, 191.3799605, 191.3799605, 191.3799605, 191.3799605,
       191.3799605, 191.3799605, 191.3799605, 191.3799605, 191.3799605,
       191.3799605, 191.3799605, 191.3799605, 197.5510891, 197.5510891])

In [56]:
total_y[:20]

0     181.81181
1     154.58458
2     127.35736
3     108.29830
4      89.23924
5      75.62563
6      56.56657
7      45.67568
8      40.23023
9      34.78478
10     26.61662
11     23.89389
12     21.17117
13     18.44845
14     15.72573
15     13.00300
16     13.00300
17     10.28028
18    406.56566
19    330.23416
Name: LTC, dtype: float64

In [29]:
# Predicting LTC of material using Features of alloy. 
X = total_data    # Features of Alloy 
print(rf_reg.predict([X,input_features[-1],input_features[-2],input_features[-3]])) # Predicted LTC at some temperature
print(input_features[-2][-1])

[0.0460868 0.0460868 0.0462964 0.0449278]
0.775542732282113


In [18]:
rf_reg.get_metadata_routing()

{'fit': {'sample_weight': None}, 'score': {'sample_weight': None}}

### Prediction Code Analysis: 

In [16]:
import numpy as np
import pandas as pd
import os
import pymatgen.core as pg

In [17]:
##Initializing A,A_1, B and X as elements used in this work. Please refer to Fig. 6a in the paper

A = ["Li","Na","K","Rb","Cs"]
A_1 = ["Be","Mg","Ca","Sr","Ba"]
B = ["Ti","Zr","Hf","V","Nb","Ta","Cr","Mo","W","Mn","Tc","Re","Fe","Os","Ru","Co","Rh","Ir","Ni","Pt",
     "Pd","Cu","Ag","Au","Zn","Cd","Hg","Al","Ga","In","Tl","Si","Ge","Ga","Sn","Pb","As","Sb","Bi","Se","Te"]
X = ["F","Cl","Br","I","O","S","P"]

In [20]:
##Read all 4 prototype 
# structures and replace A,B,X by elements from A,bA_1, B, and X

os.chdir(r"prototype_Poscar")
with open('protoA3B2X9.txt') as f:  
    lines = f.read()

In [21]:
#this is to create the properties file header that will be used in Magpie later

for_text = []
initial = "filename delta_e structure"
for_text.append(initial)

In [29]:
cd ..

/raid/phy_aftab/aftab/piyush/Lattice_RF_ML/am1c17378_si_001/Supplementary files/ML Code


In [49]:
##Make a directory A3B2X9 in prototypes to create poscar files from the elemtens and prototype structure A3B2X9
os.chdir(r"/raid/phy_aftab/aftab/piyush/Lattice_RF_ML/am1c17378_si_001/Supplementary files/ML Code/prototype_Poscar/prototypes/A3B2X9")
counter = 1
for i in A:
    for j in B:
        for k in X:
            
            for_output = lines.replace("A",i)
            for_output_2 = for_output.replace("B",j)
            for_output_3 = for_output_2.replace("X",k)
            for_text.append(f"{counter}-{i}3{j}2{k}9 none {counter}")
            with open(f"{counter}-{i}3{j}2{k}9", "w") as text_file:
                text_file.write(for_output_3)
            counter = counter + 1

In [50]:
##This is for the properties file again
textfile = open("properties.txt", "w")
for element in for_text:
    textfile.write(element + "\n")
textfile.close()