# 06-post-analysis

How can we verify if the trained models are correct in their predictions? 

One way is to verify against known candidates. 

In [2]:
import sys
sys.path.insert(0, "../")

import pandas as pd
from pathlib import Path

data_dir = Path.cwd().parent.parent.parent / "data"
models_dir = Path.cwd().parent.parent.parent / "models" 

print("Current data directory {}".format(data_dir))

Current data directory /home/oliver/Dokumenter/masterprosjekt/predicting-solid-state-qubit-candidates/data


In [3]:
known_candidates = ["C", "Si", "SiGe", "AlN", "GaN",
                   "AlP", "GaP", "AlAs", "ZnO", "ZnS", "ZnSe", "ZnTe", "CdS"]
QD_2D_candidates = ["BN", "MoS2", "WSe2", #2D
                    "LnAs" #QD
                   ] 

In [4]:
InsertApproach = "01-naive-approach"
numberOfPrincipalComponents = 176

## Training set
How many entries in our training set consists of known candidates?

In [5]:
trainingSet   = pd.read_pickle(data_dir / InsertApproach / "processed" / "trainingSet.pkl")

In [6]:
trainingSet[["full_formula", "pretty_formula", "candidate"]][trainingSet["pretty_formula"].isin(known_candidates)]

Unnamed: 0,full_formula,pretty_formula,candidate
3,C8,C,1.0
4,C4,C,1.0
5,C2,C,1.0
11,Si2,Si,1.0
12,Si4,Si,1.0
13,C2,C,1.0
29,Al2N2,AlN,0.0
40,Ga2N2,GaN,0.0
50,Zn1Se1,ZnSe,1.0
83,Zn1O1,ZnO,1.0


In [7]:
trainingSet[["full_formula", "pretty_formula", "candidate"]][trainingSet["pretty_formula"].isin(QD_2D_candidates)]

Unnamed: 0,full_formula,pretty_formula,candidate
20,B4N4,BN,0.0
66,B4N4,BN,0.0
73,W2Se4,WSe2,1.0
110,B2N2,BN,0.0
117,Mo2S4,MoS2,1.0
1238,B1N1,BN,0.0
1312,Mo2S4,MoS2,1.0
1360,B3N3,BN,0.0


## Test set
How many entries in our test set consists of known candidates?

In [8]:
Summary = pd.read_pickle(models_dir / InsertApproach /  "summary" / Path("PCA-" + str(numberOfPrincipalComponents) + "-" + "summary.pkl"))
Summary.shape

(23605, 9)

In [9]:
Summary[Summary["pretty_formula"].isin(known_candidates)]

Unnamed: 0,material_id,full_formula,pretty_formula,LOG,LOG Prob,RF,RF Prob,GB,GB Prob
1685,mp-370,Cd1S1,CdS,1.0,0.995646,1.0,0.805624,1.0,0.962312
1687,mp-380,Zn2Se2,ZnSe,0.0,0.260078,1.0,0.815099,1.0,0.585733
1724,mp-672,Cd2S2,CdS,0.0,0.082432,1.0,0.818493,1.0,0.887264
1742,mp-830,Ga1N1,GaN,0.0,0.049723,1.0,0.811560,1.0,0.797846
1803,mp-1330,Al1N1,AlN,0.0,0.204789,1.0,0.781007,1.0,0.944038
...,...,...,...,...,...,...,...,...,...
24249,mp-1201781,Zn18S18,ZnS,1.0,0.958279,1.0,0.826572,1.0,0.963652
24311,mp-1202023,Zn18S18,ZnS,1.0,0.974678,1.0,0.828588,1.0,0.972981
24352,mp-1202182,Zn18S18,ZnS,1.0,0.960286,1.0,0.831803,1.0,0.966896
24556,mp-1202959,Zn18S18,ZnS,1.0,0.958567,1.0,0.835631,1.0,0.966896


In [10]:
Summary[Summary["pretty_formula"].isin(QD_2D_candidates)]

Unnamed: 0,material_id,full_formula,pretty_formula,LOG,LOG Prob,RF,RF Prob,GB,GB Prob
1762,mp-984,B2N2,BN,0.0,0.172512,1.0,0.780745,1.0,0.555291
1819,mp-1434,Mo1S2,MoS2,1.0,0.855687,1.0,0.814598,1.0,0.828431
1840,mp-1639,B1N1,BN,1.0,0.999459,1.0,0.758509,1.0,0.604085
3384,mp-7991,B2N2,BN,0.0,0.074792,1.0,0.784616,0.0,0.465163
4439,mp-13151,B4N4,BN,1.0,0.999915,1.0,0.769431,0.0,0.490584
15144,mp-569655,B4N4,BN,1.0,0.999651,1.0,0.795902,1.0,0.729184
15929,mp-604884,B2N2,BN,0.0,0.233674,1.0,0.766639,1.0,0.561835
16269,mp-629015,B2N2,BN,1.0,0.83384,1.0,0.770712,1.0,0.528826
16647,mp-644751,B4N4,BN,1.0,0.998739,1.0,0.795595,1.0,0.89443
19883,mp-1060281,B1N1,BN,1.0,0.999348,1.0,0.760148,1.0,0.82692


## How many entries does the models agree on? 

In [11]:
Summary[(Summary["RF "] == 1) & 
        (Summary["GB "] == 1) & 
        (Summary["LOG "] == 1)]

Unnamed: 0,material_id,full_formula,pretty_formula,LOG,LOG Prob,RF,RF Prob,GB,GB Prob
1649,mvc-12905,Fe4O8,FeO2,1.0,0.999925,1.0,0.853474,1.0,0.989108
1650,mp-25,N8,N2,1.0,0.771614,1.0,0.827394,1.0,0.874150
1654,mp-157,P4,P,1.0,0.996161,1.0,0.836939,1.0,0.954907
1655,mp-160,B12,B,1.0,0.961721,1.0,0.799087,1.0,0.945049
1657,mp-189,Si4Ru4,SiRu,1.0,0.997804,1.0,0.853642,1.0,0.975826
...,...,...,...,...,...,...,...,...,...
25248,mp-1304797,Sr12In4Ni4O24,Sr3InNiO6,1.0,0.999995,1.0,0.865411,1.0,0.957868
25249,mp-1539137,Rb1Cr5S8,RbCr5S8,1.0,0.615347,1.0,0.819534,1.0,0.796804
25250,mp-1541522,Bi2P2O8,BiPO4,1.0,0.999970,1.0,0.832872,1.0,0.951909
25252,mp-1542038,Cs2Sn2Se6,CsSnSe3,1.0,0.767358,1.0,0.830804,1.0,0.958060
