# Bandgap Prediction

Using QM9 data set to predict the bandgap of molecules using LSTM and GNN's

In [14]:
from torch_geometric.datasets import QM9
from torch_geometric.loader import DataLoader as GraphDataLoader
import torch
import itertools
from rdkit import Chem
from rdkit.Chem import AllChem
import pandas as pd

In [32]:
def load_qm9(path="./QM9"):
    return QM9(path)

qm9 = load_qm9()
smiles = []
coords = []
homo_lumo_gaps = []
stock_gap = []

for entry in qm9:
    smiles.append(entry.smiles)  # Get SMILES notation
    coords.append(entry.pos)     # Get atomic coordinates
    homo_energy = entry.y[0, 2].item()  # Get HOMO energy from the tensor
    lumo_energy = entry.y[0, 3].item()  # Get LUMO energy from the tensor
    gap_stock = entry.y[0, 4].item() 
    gap = lumo_energy - homo_energy  # Calculate the bandgap
    stock_gap.append(gap_stock)
    homo_lumo_gaps.append(gap)  # Store the bandgap
# Creating a DataFrame for better visualization and handling
data = pd.DataFrame({
    "SMILES": smiles,
    "Coordinates": coords,
    "HOMO-LUMO Gap": homo_lumo_gaps,
    "STOCK" : stock_gap
})

# Print the first few rows of the DataFrame to verify
print(data.head())


              SMILES                                        Coordinates  \
0  [H]C([H])([H])[H]  [[tensor(-0.0127), tensor(1.0858), tensor(0.00...   
1       [H]N([H])[H]  [[tensor(-0.0404), tensor(1.0241), tensor(0.06...   
2            [H]O[H]  [[tensor(-0.0344), tensor(0.9775), tensor(0.00...   
3          [H]C#C[H]  [[tensor(0.5995), tensor(0.), tensor(1.)], [te...   
4             [H]C#N  [[tensor(-0.0133), tensor(1.1325), tensor(0.00...   

   HOMO-LUMO Gap      STOCK  
0      13.736308  13.736308  
1       9.249150   9.249149  
2       9.836916   9.836916  
3       9.118535   9.118535  
4      10.326720  10.329442  
