### Table of Content
- [`passivating_molecule` into SMILES format](Converting-`passivating_molecule`-into-SMILES-format)
- [`perovskite_composition` into features](`perovskite_composition`-into-features)
- [baseline ML model](baseline-ML-model)

In [40]:
import pandas as pd
import numpy as np
import pubchempy as pcp

In [42]:
from rdkit import Chem
from rdkit.Chem import Descriptors, rdMolDescriptors

In [44]:
df = pd.read_json('finetuned_llama_output.json')
data = df.T.sort_index()
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 142 entries, 0 to 149
Data columns (total 13 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   control_pce                   70 non-null     object
 1   control_voc                   60 non-null     object
 2   treated_pce                   135 non-null    object
 3   treated_voc                   129 non-null    object
 4   passivating_molecule          137 non-null    object
 5   perovskite_composition        134 non-null    object
 6   electron_transport_layer      133 non-null    object
 7   hole_transport_layer          134 non-null    object
 8   pin_nip_structure             128 non-null    object
 9   stability_tests               142 non-null    object
 10  pin_nip_structure_value       3 non-null      object
 11  pin_nip_structure_type        1 non-null      object
 12  pin_nip_structure_type_value  1 non-null      object
dtypes: object(13)
memory usag

# Cleaning

In [47]:
# Function to select columns
def select_data(df):
    # Convert PCE and VOC to numeric
    for col in ['control_pce', 'control_voc', 'treated_pce', 'treated_voc']:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    # Drop rows where treated_pce or passivating_molecule is missing
    df = df.dropna(subset=['treated_pce', 'passivating_molecule', 'perovskite_composition'])

    return df

data = select_data(data)
data.head()

Unnamed: 0,control_pce,control_voc,treated_pce,treated_voc,passivating_molecule,perovskite_composition,electron_transport_layer,hole_transport_layer,pin_nip_structure,stability_tests,pin_nip_structure_value,pin_nip_structure_type,pin_nip_structure_type_value
0,21.0,1.17,23.77,1.145,Cyclohexylmethylammonium iodide (CMAI),α-formamidinium lead triiodide (FAPbI3),Fluorine-doped tin oxide (FTO),Mixed SAMs (2PACz and Me-4PACz),NIP,"[{'test_name': 'ISOS-D-2I', 'temperature': 85,...",,,
1,20.95,1.12,22.73,1.16,phenethylammonium iodide (PEAI),FA1-x MAx PbI3,SnO2,spiro-OMeTAD,NIP,"[{'test_name': 'thermal cycling', 'temperature...",,,
2,22.39,1.095,24.59,1.185,Spiro-OMeTAD,(BA)2PbI4,SnO2,PTAA,NIP,"[{'test_name': 'ISOS-D', 'temperature': '25', ...",,,
4,,,21.06,1.14,vinylbenzylammonium bromide,MAPbBr 3 ) 0.05 (FAPbI 3 ) 0.95,Spiro-OMeTAD,Spiro-OMeTAD,NIP,"[{'test_name': 'ISOS-D', 'temperature': None, ...",,,
5,,,22.1,1.135,iso-BAI,FA(MA)PbI 3,Spiro-OMeTAD,IZO,n-i-p,"[{'test_name': None, 'temperature': None, 'tim...",,,


In [49]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 117 entries, 0 to 149
Data columns (total 13 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   control_pce                   57 non-null     float64
 1   control_voc                   50 non-null     float64
 2   treated_pce                   117 non-null    float64
 3   treated_voc                   114 non-null    float64
 4   passivating_molecule          117 non-null    object 
 5   perovskite_composition        117 non-null    object 
 6   electron_transport_layer      115 non-null    object 
 7   hole_transport_layer          115 non-null    object 
 8   pin_nip_structure             113 non-null    object 
 9   stability_tests               117 non-null    object 
 10  pin_nip_structure_value       3 non-null      object 
 11  pin_nip_structure_type        0 non-null      object 
 12  pin_nip_structure_type_value  0 non-null      object 
dtypes: float64

In [51]:
data.isna().sum(axis=0)

control_pce                      60
control_voc                      67
treated_pce                       0
treated_voc                       3
passivating_molecule              0
perovskite_composition            0
electron_transport_layer          2
hole_transport_layer              2
pin_nip_structure                 4
stability_tests                   0
pin_nip_structure_value         114
pin_nip_structure_type          117
pin_nip_structure_type_value    117
dtype: int64

## `passivating_molecule` into SMILES format

In [54]:
def fetch_smiles_from_name(molecule_name):
    try:
        # Search for the molecule in PubChem by name
        compounds = pcp.get_compounds(molecule_name, 'name')
        if compounds:
            return compounds[0].isomeric_smiles  # Return the first match's SMILES
        else:
            return np.nan
    except Exception as e:
        print(f"Error fetching SMILES for {molecule_name}: {e}")
        return None

### Cleaning Data

In [60]:
data['passivating_molecule'].head(20)

0                Cyclohexylmethylammonium iodide (CMAI)
1                       phenethylammonium iodide (PEAI)
2                                          Spiro-OMeTAD
4                           vinylbenzylammonium bromide
5                                               iso-BAI
6                                   phenylethylammonium
8                                pyren-1-aminium iodide
9                                                    C8
10                                             tosylate
11                                        benzotriazole
12                         butylammonium bromide (BABr)
13    2-(9H-carbazol-9-yl)ethyl phosphonic acid (2PACz)
14                                                 PEAI
15    4-chloro-phenylethylammonium iodide (Cl-PEAI) ...
16                                             CF3 PEAI
17                       hexyltrimethylammonium bromide
19           3-fluoro-phenethylammonium iodide (3FPEAI)
20                           phenylethylammonium

In [None]:
fetch_smiles_from_name('3FPEAI')

In [18]:
import re

def fix_unmatched_brackets(s):
    """
    Fixes unmatched brackets in the given string by adding the correct brackets where necessary.

    :param s: Input string with potential unmatched brackets.
    :return: A corrected string with properly balanced brackets.
    """
    opening = "({["
    closing = ")}]"
    match = {')': '(', '}': '{', ']': '['}
    stack = []

    # Step 1: Identify missing closing brackets
    fixed_s = []
    for char in s:
        if char in opening:
            stack.append(char)
            fixed_s.append(char)
        elif char in closing:
            if stack and stack[-1] == match[char]:
                stack.pop()
                fixed_s.append(char)
            else:
                # Add missing opening bracket before unmatched closing
                fixed_s.insert(0, match[char])
                fixed_s.append(char)
        else:
            fixed_s.append(char)

    # Step 2: Add missing closing brackets at the end
    while stack:
        open_bracket = stack.pop()
        fixed_s.append(closing[opening.index(open_bracket)])

    return "".join(fixed_s)


def get_chemical_names(chemical_list):
    cleaned_list = []
    for name in chemical_list:
        # Remove text inside parentheses only if it's extra information (abbreviations)
        name = re.sub(r"\s*\([^)]*\)$", "", name).strip() 
        # Remove spaces after a closing bracket (ensure proper chemical formatting)
        name = re.sub(r"\] +", "]", name)

        cleaned_list.append(name)

    return cleaned_list

In [None]:
fix_unmatched_brackets

In [20]:
# Example input list
lst = data['passivating_molecule']

# Cleaning the list
cleaned_list = lst.apply(lambda x: fix_unmatched_brackets(x))
cleaned_list = get_chemical_names(cleaned_list)

# Output result
data['passivating_molecule_cleaned'] = cleaned_list

In [22]:
cleaned_list

['Cyclohexylmethylammonium iodide',
 'phenethylammonium iodide',
 'Spiro-OMeTAD',
 'vinylbenzylammonium bromide',
 'iso-BAI',
 'phenylethylammonium',
 'pyren-1-aminium iodide',
 'C8',
 'tosylate',
 'benzotriazole',
 'butylammonium bromide',
 '2-(9H-carbazol-9-yl)ethyl phosphonic acid',
 'PEAI',
 '4-chloro-phenylethylammonium iodide (Cl-PEAI) and 4-fluoro-phenylethylammonium iodide',
 'CF3 PEAI',
 'hexyltrimethylammonium bromide',
 '3-fluoro-phenethylammonium iodide',
 'phenylethylammonium iodide',
 'cyclohexylmethylammonium iodide',
 '3-fluoro-phenethylammonium iodide',
 'Cs2PbI2Cl2',
 'CF3-PEA',
 '2,2′,2"-(1,3,5-Benzinetriyl)-tris',
 'ammonia',
 'CH3NH3',
 'benzene',
 'spiro-OMeTAD',
 'C60',
 '1,3-propylene diammonium',
 '2-thiophenemethylammonium iodide',
 'Oleylammonium iodide',
 'ortho-(phenylene)di(ethylammonium) iodide',
 'Azetidinium',
 "2,2',7,7'-tetrakis(N,N-di-p-methoxyphenyl-amine)-9,9'-spirobifluorene",
 'PFN-P2',
 'PbSO4',
 'PCBM',
 'n-butylamine acetate',
 'allylammonium'

In [26]:
data['passivating_molecule_SMILES'] = data['passivating_molecule_cleaned'].apply(fetch_smiles_from_name)

In [27]:
# temporary smaller data with all SMILES
temp_data = data[~data['passivating_molecule_SMILES'].isna()]

In [28]:
temp_data

Unnamed: 0,control_pce,control_voc,treated_pce,treated_voc,passivating_molecule,perovskite_composition,electron_transport_layer,hole_transport_layer,pin_nip_structure,stability_tests,pin_nip_structure_value,pin_nip_structure_type,pin_nip_structure_type_value,passivating_molecule_cleaned,passivating_molecule_SMILES
1,20.95,1.12,22.73,1.16,phenethylammonium iodide (PEAI),FA1-x MAx PbI3,SnO2,spiro-OMeTAD,NIP,"[{'test_name': 'thermal cycling', 'temperature...",,,,phenethylammonium iodide,C1=CC=C(C=C1)CCN.I
2,22.39,1.095,24.59,1.185,Spiro-OMeTAD,(BA)2PbI4,SnO2,PTAA,NIP,"[{'test_name': 'ISOS-D', 'temperature': '25', ...",,,,Spiro-OMeTAD,COC1=CC=C(C=C1)N(C2=CC=C(C=C2)OC)C3=CC4=C(C=C3...
10,,,24.41,1.08,tosylate,FAPbI3 0.95 (MAPbBr 3 ) 0.05,SnO2,spiro-MeOTAD,NIP,"[{'test_name': 'ISOS-D', 'temperature': None, ...",,,,tosylate,CC1=CC=C(C=C1)S(=O)(=O)[O-]
11,,,21.8,1.16,benzotriazole,FAPbI3,SnO2,Spiro-OMeTAD,NIP,"[{'test_name': 'ISOS-D', 'temperature': None, ...",,,,benzotriazole,C1=CC2=NNN=C2C=C1
12,19.4,1.31,19.4,1.31,butylammonium bromide (BABr),Cs 0.17 FA 0.83 Pb(I 0.6 Br 0.4 ) 3,nanoparticle-based SnO 2,spiro-OMeTAD,PIN,"[{'test_name': 'ISOS-D', 'temperature': None, ...",,,,butylammonium bromide,CCCCN.Br
17,20.95,1.16,22.16,1.19,hexyltrimethylammonium bromide,Cs 0.05 FA 0.81 MA 0.14 PbI 2.55 Br 0.45,Spiro-OMeTAD,1F-PTAA,PIN,"[{'test_name': 'ISOS-D', 'temperature': None, ...",,,,hexyltrimethylammonium bromide,CCCCCC[N+](C)(C)C.[Br-]
19,24.09,1.1595,19.9,1.1607,3-fluoro-phenethylammonium iodide (3FPEAI),Cs0.05MA0.15FA0.8Pb(I0.95Br0.05)3,C60,2PACz,PIN,"[{'test_name': 'ISOS-L-3-85°C', 'temperature':...",,,,3-fluoro-phenethylammonium iodide,C1=CC(=CC(=C1)F)CC[NH3+].[I-]
20,,,0.0,0.0,phenylethylammonium iodide,MAPbI3,poly(methyl methacrylate),poly(methyl methacrylate),NIP,"[{'test_name': 'ISOS-D', 'temperature': None, ...",,,,phenylethylammonium iodide,C1=CC=C(C=C1)CC[NH3+].[I-]
23,,,23.91,1.15,3-fluoro-phenethylammonium iodide (3F-PEA),Cs0.05 MA 0.1 FA 0.85 PbI 3,C60/ALD-SnO2,spiro-OMeTAD,PIN,"[{'test_name': 'ISOS-L-1I', 'temperature': 25,...",,,,3-fluoro-phenethylammonium iodide,C1=CC(=CC(=C1)F)CC[NH3+].[I-]
30,,,23.21,1.16,ammonia,PbI2: FAI: CsI (1:0.9:0.1 molar ratio),SnO2,Spiro-OMeTAD,NIP,"[{'test_name': 'TRPL', 'temperature': None, 't...",,,,ammonia,N


In [None]:
temp_data.shape

## `perovskite_composition` into features

In [72]:
import re

def parse_perovskite_formula(formula):
    # Define allowed species (order matters for multi-letter elements)
    allowed_species = ["FA", "MA", "CS", "Pb", "Sn", "I", "Br", "Cl"]

    # Dictionary to store parsed results (initialize with 0.0 for all species)
    parsed_result = {species: 0.0 for species in allowed_species}

    # Step 1: Handle groups in parentheses with coefficients (e.g., (FAPbI3)0.95)
    pattern_group = r"\(([^)]+)\)\s*([0-9\.]+)"
    groups = re.findall(pattern_group, formula)

    if groups:
        for group, coef in groups:
            coef = float(coef)  # Convert coefficient to float
            elements = re.findall(r"(FA|MA|CS|Pb|Sn|I|Br|Cl)\s*([\d\.]*)", group)
            for element, count in elements:
                count = float(count) if count else 1.0
                parsed_result[element] += count * coef  # Distribute coefficient

    # Step 2: Handle formulas without parentheses (e.g., FA1-xMAxPbI3)
    remaining_formula = re.sub(r"\([^)]*\)\s*[0-9\.]+", "", formula)  # Remove processed groups
    elements = re.findall(r"(FA|MA|CS|Pb|Sn|I|Br|Cl)\s*([\d\.]*)", remaining_formula)

    for element, count in elements:
        count = float(count) if count and 'x' not in count else 1.0  # Ignore '-x' or 'x'
        parsed_result[element] += count

    # Round to 2 decimal places for all values
    parsed_result = {k: round(v, 2) for k, v in parsed_result.items()}

    return parsed_result

# Test cases
formulas = [
    "(FAPbI3)0.95(MAPbBr3)0.05",
    "FA1-xMAxPbI3",
    "FA0.9CS0.1PbI2.9Br0.1",
    "(CS0.8FAPbI3)0.9(MAPbBr3)0.1",
    "(C4H9NH3)2PbI 4"  # Test case with space
]

for formula in formulas:
    print(f"Formula: {formula}")
    print("Parsed:", parse_perovskite_formula(formula))
    print()


Formula: (FAPbI3)0.95(MAPbBr3)0.05
Parsed: {'FA': 0.95, 'MA': 0.05, 'CS': 0.0, 'Pb': 1.0, 'Sn': 0.0, 'I': 2.85, 'Br': 0.15, 'Cl': 0.0}

Formula: FA1-xMAxPbI3
Parsed: {'FA': 1.0, 'MA': 1.0, 'CS': 0.0, 'Pb': 1.0, 'Sn': 0.0, 'I': 3.0, 'Br': 0.0, 'Cl': 0.0}

Formula: FA0.9CS0.1PbI2.9Br0.1
Parsed: {'FA': 0.9, 'MA': 0.0, 'CS': 0.1, 'Pb': 1.0, 'Sn': 0.0, 'I': 2.9, 'Br': 0.1, 'Cl': 0.0}

Formula: (CS0.8FAPbI3)0.9(MAPbBr3)0.1
Parsed: {'FA': 0.9, 'MA': 0.1, 'CS': 0.72, 'Pb': 1.0, 'Sn': 0.0, 'I': 2.7, 'Br': 0.3, 'Cl': 0.0}

Formula: (C4H9NH3)2PbI 4
Parsed: {'FA': 0.0, 'MA': 0.0, 'CS': 0.0, 'Pb': 1.0, 'Sn': 0.0, 'I': 4.0, 'Br': 0.0, 'Cl': 0.0}



In [None]:
temp_data

In [None]:
df = pd.DataFrame(temp_data['perovskite_composition'].apply(parse_perovskite_formula))
df

In [None]:
import pandas as pd
df = pd.DataFrame(temp_data['perovskite_composition'].apply(parse_perovskite_formula))

# Expand the dictionary column into separate columns
expanded_df = df['perovskite_composition'].apply(pd.Series)

# Fill missing values with 0 (for elements not present in some rows)
expanded_df = expanded_df.fillna(0)

# Merge with the original DataFrame (optional: remove the original 'Elemental Composition' column)
df = df.drop(columns=["perovskite_composition"]).join(expanded_df)
df

In [None]:
temp_data['perovskite_comp'] = temp_data['perovskite_composition'].apply(parse_perovskite_formula)

In [None]:
import pandas as pd

# Extract perovskite composition data and apply parsing function
df = pd.DataFrame(temp_data['perovskite_composition'].apply(parse_perovskite_formula))

# Expand the dictionary column into separate columns
expanded_df = df['perovskite_composition'].apply(pd.Series)

# Fill missing values with 0 (for elements not present in some rows)
expanded_df = expanded_df.fillna(0)

# Rename columns to "perovskite_XX"
expanded_df = expanded_df.rename(columns=lambda x: f"perovskite_{x}")

# Merge with the original temp_data DataFrame
temp_data = temp_data.join(expanded_df)

In [None]:
temp_data

## some molecular features

In [None]:
temp_data.head().columns

In [None]:
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors, rdMolDescriptors

def compute_molecular_features(smiles):
    try:
        mol = Chem.MolFromSmiles(smiles)
        if mol:
            return [
                Descriptors.MolWt(mol),  # Molecular weight
                Descriptors.ExactMolWt(mol),  # Exact molecular weight (isotope-specific)
                Descriptors.MolLogP(mol),  # LogP (lipophilicity)
                Descriptors.TPSA(mol),  # Topological Polar Surface Area
                Descriptors.NumValenceElectrons(mol),  # Total valence electrons
                rdMolDescriptors.CalcNumRotatableBonds(mol),  # Rotatable bonds
                rdMolDescriptors.CalcNumHBA(mol),  # Hydrogen bond acceptors
                rdMolDescriptors.CalcNumHBD(mol),  # Hydrogen bond donors
                rdMolDescriptors.CalcFractionCSP3(mol),  # Fraction of sp3 carbons
                rdMolDescriptors.CalcNumAromaticRings(mol),  # Number of aromatic rings
                rdMolDescriptors.CalcNumSaturatedRings(mol),  # Number of saturated rings
                rdMolDescriptors.CalcNumHeteroatoms(mol),  # Number of heteroatoms
                rdMolDescriptors.CalcNumHeavyAtoms(mol),  # Number of heavy atoms
                rdMolDescriptors.CalcNumSpiroAtoms(mol),  # Number of spiro atoms
                rdMolDescriptors.CalcNumBridgeheadAtoms(mol),  # Number of bridgehead atoms
                Descriptors.FpDensityMorgan1(mol),  # Morgan fingerprint density (radius=1)
                Descriptors.FpDensityMorgan2(mol),  # Morgan fingerprint density (radius=2)
                Descriptors.FpDensityMorgan3(mol),  # Morgan fingerprint density (radius=3)
                Descriptors.qed(mol),  # Quantitative Estimate of Drug-likeness
                rdMolDescriptors.CalcNumLipinskiHBA(mol),  # Lipinski Hydrogen Bond Acceptors
                rdMolDescriptors.CalcNumLipinskiHBD(mol),  # Lipinski Hydrogen Bond Donors
                rdMolDescriptors.CalcNumRings(mol),  # Total number of rings
                rdMolDescriptors.CalcNumAmideBonds(mol),  # Number of amide bonds
                Descriptors.BalabanJ(mol),  # Balaban’s connectivity index
                Descriptors.BertzCT(mol),  # Bertz complexity
                Descriptors.Chi0(mol),  # Chi connectivity index (order 0)
                Descriptors.Chi1(mol),  # Chi connectivity index (order 1)
                Descriptors.Chi2n(mol),  # Chi connectivity index (order 2, non-H)
                Descriptors.Kappa1(mol),  # Kappa Shape Index (order 1)
                Descriptors.Kappa2(mol),  # Kappa Shape Index (order 2)
            ]
        else:
            return [np.nan] * 30  # Return NaN for missing values
    except:
        return [np.nan] * 30  # Return NaN for exceptions

# Apply function to dataset
mol_features = temp_data['passivating_molecule_SMILES'].apply(compute_molecular_features)

# Convert list to DataFrame
mol_features_df = pd.DataFrame(mol_features.tolist(), 
                               columns=[
                                   'MolWt', 'ExactMolWt', 'LogP', 'TPSA', 'NumValenceElectrons',
                                   'NumRotBonds', 'NumHBA', 'NumHBD', 'FractionCSP3', 'AromaticRings',
                                   'SaturatedRings', 'Heteroatoms', 'HeavyAtoms', 'SpiroAtoms', 
                                   'BridgeheadAtoms', 'FpDensityMorgan1', 'FpDensityMorgan2', 
                                   'FpDensityMorgan3', 'QED', 'LipinskiHBA', 
                                   'LipinskiHBD', 'NumRings', 'NumAmideBonds', 'BalabanJ', 
                                   'BertzCT', 'Chi0', 'Chi1', 'Chi2n', 'Kappa1', 'Kappa2'
                               ],
                               index=temp_data.index)

# Merge with original dataset
temp_data = pd.concat([temp_data, mol_features_df], axis=1)

In [None]:
temp_data.shape

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Assuming `temp_data` contains the full dataset with the required columns
features = [
                                   'MolWt', 'ExactMolWt', 'LogP', 'TPSA', 'NumValenceElectrons',
                                   'NumRotBonds', 'NumHBA', 'NumHBD', 'FractionCSP3', 'AromaticRings',
                                   'SaturatedRings', 'Heteroatoms', 'HeavyAtoms', 'SpiroAtoms', 
                                   'BridgeheadAtoms', 'FpDensityMorgan1', 'FpDensityMorgan2', 
                                   'FpDensityMorgan3', 'QED', 'LipinskiHBA', 
                                   'LipinskiHBD', 'NumRings', 'NumAmideBonds', 'BalabanJ', 
                                   'BertzCT', 'Chi0', 'Chi1', 'Chi2n', 'Kappa1', 'Kappa2'
                               ]
target = 'treated_pce'

# Drop rows with missing values in the features or target
data = temp_data

# Split into features (X) and target (y)
X = data[features]
y = data[target]

# Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train a Random Forest Regressor
model = RandomForestRegressor()
model.fit(X_train, y_train)

# Make predictions
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Evaluate the model
train_mse = mean_squared_error(y_train, y_train_pred)
train_r2 = r2_score(y_train, y_train_pred)

test_mse = mean_squared_error(y_test, y_test_pred)
test_r2 = r2_score(y_test, y_test_pred)

print(f"Training Mean Squared Error: {train_mse:.2f}")
print(f"Training R-squared: {train_r2:.2f}")
print(f"Testing Mean Squared Error: {test_mse:.2f}")
print(f"Testing R-squared: {test_r2:.2f}")

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Initialize and train the model
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

# Make predictions
y_pred = linear_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared: {r2:.2f}")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

coefficients = pd.DataFrame({
    'Feature': features,
    'Coefficient': linear_model.coef_
})

plt.figure(figsize=(8, 6))
coefficients = coefficients.sort_values(by='Coefficient', ascending=False)
sns.barplot(x='Coefficient', y='Feature', data=coefficients)
plt.title('Linear Regression Coefficients')
plt.grid(True)
plt.show()