In [3]:
import wget
import os.path
import json


import numpy as np
from numpy.linalg import det, inv
from numpy import array
import random

import ase.db
import pandas as pd

import matplotlib.pyplot as plt

In [None]:
from anneling_super_cell import super_cell

In [None]:
DATA_PATH = "database"
database_name1 = "c2db-20211702.db"
database_name2 = "c2db-20211702.db"

In [None]:
def get_data_as_pd(path,options,props):
    db = ase.db.connect(path)
    rows = db.select(options)
    data = [ [ x.get(p) for p in props ] for x in rows]
    raw_df = pd.DataFrame( data, columns = props);
    return raw_df 

In [None]:
path = DATA_PATH+database_name1

options = 'is_magnetic=True, thermodynamic_stability_level=3'
props = ["formula","spgnum", "spacegroup","uid","asr_id"] 

raw_df_1 = get_data_as_pd(path,options,props)
raw_df_1 = raw_df_1[ (raw_df_1["spacegroup"]!='P1') & (raw_df_1["spacegroup"]!='Pc')& (raw_df_1["spacegroup"]!='P-1') ];


#Print
print("The number of elements is:",len(list(raw_df_1)) )
print( raw_df_1[["formula","uid"]] )

In [None]:
path = DATA_PATH+database_name2

options = 'thermodynamic_stability_level=3'
props = ["formula","spgnum", "spacegroup","gap","uid"] 

raw_df_2 = get_data_as_pd(path,options,props)
raw_df_2 = raw_df_2[ (raw_df_2["gap"]<=10e-3)& (raw_df_2["spacegroup"]!='P1') & (raw_df_2["spacegroup"]!='Pc')& (raw_df_2["spacegroup"]!='P-1') ];


#Print
print("The number of elements is:",len(list(raw_df_2)) )
print( raw_df_2[["formula","uid","gap"]] )

In [None]:
# get jsons 
def extract_json(uid_list,save_path ="JSONcolection"):
    for uid in uid_list:
        dataurl = 'https://cmrdb.fysik.dtu.dk/c2db/row/'+uid+'/all_data';
        file = save_path+"/"+uid+"_data.json"
        if os.path.isfile(file):
            print("file: ",file, "found")
        else:
            print(wget.download(dataurl, out="./"+save_path))

def extract_structure(uid_list ,save_path="XYZcolection"):
    for uid in uid_list:
        
        #https://cmrdb.fysik.dtu.dk/c2db/row/N2O2V3-358facb64a22/data/structure.json
        dataurl = 'https://cmrdb.fysik.dtu.dk/c2db/row/'+uid+'/data/structure.json';
        print(dataurl)
        file = save_path+"/"+uid+".json"
        dw_path = save_path+"/"+"structure.json"
        if os.path.isfile(file):
            print("file: ",file, "found")
        else:
            print(wget.download(dataurl, out="./"+save_path))
            os.rename(dw_path, file)

In [None]:
print("extract_data1:")
print(raw_df_1["uid"][:1])
extract_structure(raw_df_1["uid"][:10])
print("extract_data2:")
extract_structure(raw_df_1["uid"][:10])

In [None]:
# Example of good practice in data hadeling 
# uid= "C2-a6735a4a3797"
# with open('XYZcolection/structure.json', 'r') as file:
#     json_data= json.load(file)
# mytype= "__ndarray__";
# datastruc=json_data['1']["cell"]["array"];
# if mytype in datastruc:
#     shape= datastruc[mytype][0]
#     dtype= datastruc[mytype][1]
#     lattice = array(datastruc[mytype][2],dtype=dtype).reshape(shape)


In [None]:
#f = "/Users/voicutu/Documents/GitHub/vdw_evolve/sandbox/XYZcolection/c2db-822.xyz"

def get_lattice_from_structure(f):
    with open(f, 'r') as file:
        json_data= json.load(file)
    mytype= "__ndarray__";
    datastruc=json_data['1']["cell"]["array"];
    if mytype in datastruc:
        shape= datastruc[mytype][0]
        dtype= datastruc[mytype][1]
        lattice = array(datastruc[mytype][2],dtype=dtype).reshape(shape)
    return lattice 

In [None]:
#
up = 99999999
bond =20
model_par = {
    
    'initialTemp': 4, 
    'finalTemp': 0.0002,

    'beta': 10,
    'bounds': [[-2*bond,2*bond] for _ in range(4)],

    'nr_neighbours': 1,
    'step_size': 4,
    'gaussian_sd':3,
    
    'known_min': -up,
    "start_point":[1,5,3,2],
    
    "strain_boundary":[[-0.2,0.2],[-0.2,0.2]]
}



In [None]:
uids=["C2-a6735a4a3797","WS2-64090c9845f8"]
extract_structure(uids)

In [None]:
# xyz path 

structure_path = "XYZcolection"#/Users/voicutu/Documents/GitHub/vdw_evolve/sandbox/XYZcolection"
uid_1 = "WS2-64090c9845f8"#"VCl2-1ccb0b28392d"
uid_2 = "C2-a6735a4a3797"#"Mn2Se2-a1d595c25fc5"


path1=structure_path+"/"+uid_1+".json"
path2=structure_path+"/"+uid_2+".json"

# lattices
latice1 = get_lattice_from_structure(path1)
latice2 = get_lattice_from_structure(path2)


cel1 = np.array([[latice1[0][0], latice1[1][0]],
        [latice1[0][1], latice1[1][1]]])

cel2 = np.array([[latice2[0][0], latice2[1][0]],
        [latice2[0][1], latice2[1][1]]])

tA, tB, t_cel2_no_strain, diagonal_strain, strain = super_cell(cel1,cel2, 7, model_par)

new = np.dot(tA, cel1)

print("\ncel1",cel1)
print("cel2",cel2)
print("\n _____\n tA:\n",tA)
print("det:",det(tA))
print("\n _____\n tB:\n",tB)
print("\n _____\n t_cel2_no_strain:\n",t_cel2_no_strain)
print("\n _____\n Strain diagonal :\n",diagonal_strain)
print("\n _____\n Strain:\n",strain)

super_cell= np.dot(tA,cel1)
print("\n _____\n Super cell:\n",super_cell)
print("det:",det(super_cell))





# print("\n zero_mat:\n",zero_mat)

In [None]:
solutions = {"uid_1":[], "uid_2":[],
             "mat_1":[], "mat_2":[],
             "tA":[],    "tB":[],
             "new":[],   "strainB":[],
             "new_uid":[],       }

In [None]:
structure_path = "/Users/voicutu/Documents/GitHub/vdw_evolve/sandbox/XYZcolection"
saving_spot = 5
to_be_generated = 10
while to_be_generated!=0:
    
    data1= raw_df_1.iloc[random.randinit(0,len(raw_df_1["uid"]))
    data2= raw_df_1.iloc[random.randinit(0,len(raw_df_2["uid"]))
    uid_1 = data1["uid"]
    uid_2 = data2["uid"]
    
    new_uid =uid_1+"_"+uid_2
    
    if new_uid not in solutions["new_uid"]:
        to_be_generated = to_be_generated-1
        
        solutions["new_uid"].append(new_uid)
        solutions["uid_1"].append(uid_1)
        solutions["uid_2"].append(uid_2)
        
        # calculate the super cel 
        
        # xyz path 
        path1=structure_path+"/"+uid_1+".json"
        path2=structure_path+"/"+uid_2+".json"
        
        # lattices
        latice1 = get_lattice_from_structure(path1)
        latice2 = get_lattice_from_structure(path2)


        cel1 = [[latice1[0][0], latice1[1][0]],
                [latice1[0][1], latice1[1][1]]]

        cel2 = [[latice2[0][0], latice2[1][0]],
                [latice2[0][1], latice2[1][1]]]
        
        tA, tB, strain = build_annealing(cel1,cel2, 5, model_par)
        new = np.dot(tA, cel1)
        
        solutions["tA"].append(tA)
        solutions["tB"].append(tB)
        solutions["new"].append(new)
        solutions["strainB"].append(strain)
    
    # save
    if k%saving_spot==0:
        data = pd.DataFrame.from_dict(solutions)
        data.to_csv("SuperCell_"+str(len(solutions["new"])))

print("Done")