In [26]:

import os
import yaml
import pandas as pd

ROOT_DIR = "/home/melodino/veh_type_pred"

In [27]:
def load_yaml(file_path):
    """Load a YAML file and return the content."""
    with open(file_path, 'r') as file:
        return yaml.safe_load(file)

def parse_dependencies(dependencies):
    """Parse the list of dependencies into a dictionary of package, version, and build."""
    dep_dict = {}
    for dep in dependencies:
        if isinstance(dep, str):
            parts = dep.split('=')
            if len(parts) == 3:
                package, version, build = parts
                dep_dict[package] = {'version': version, 'build': build}
            elif len(parts) == 2:
                package, version = parts
                dep_dict[package] = {'version': version, 'build': None}
            else:
                dep_dict[package] = {'version': None, 'build': None}  # No version or build specified
        elif isinstance(dep, dict):
            pip_packages = dep['pip']
            for pip_pkg in pip_packages:
                package, version = pip_pkg.split('==')
                dep_dict[package] = {'version': version, 'build': 'pip'}
    return dep_dict


def compare_envs(env1, env2):
    """Compare the dependencies of two environments."""
    dep1 = parse_dependencies(env1.get('dependencies', []))
    dep2 = parse_dependencies(env2.get('dependencies', []))
    
    all_keys = set(dep1.keys()).union(set(dep2.keys()))
    data = []

    for key in all_keys:
        env1_info = dep1.get(key, {'version': 'non_existent', 'build': 'non_existent'})
        env2_info = dep2.get(key, {'version': 'non_existent', 'build': 'non_existent'})

        # Add to data only if builds are different or the package exists in only one environment
        if env1_info['build'] != env2_info['build'] or env1_info['version'] == 'non_existent' or env2_info['version'] == 'non_existent':
            data.append([
                key,
                env1_info['version'],
                env1_info['build'],
                env2_info['version'],
                env2_info['build']
            ])
    
    # Create a DataFrame from the collected data
    df = pd.DataFrame(data, columns=['Package', 'Env1_Version', 'Env1_Build', 'Env2_Version', 'Env2_Build'])
    return df

In [37]:
# Load the environments
env1 = load_yaml(f"{ROOT_DIR}/target_env.yml")
env2 = load_yaml(f"{ROOT_DIR}/env.yml")

# Compare environments and generate DataFrame
comparison_df = compare_envs(env1, env2)
comparison_df.sort_values(by=['Package'], inplace=True)

# Print the DataFrame
with pd.option_context('display.max_rows', None):  # Sets max_rows to None within the block
    display(comparison_df)

Unnamed: 0,Package,Env1_Version,Env1_Build,Env2_Version,Env2_Build
152,_anaconda_depends,2024.02,py311_mkl_1,non_existent,non_existent
359,_libgcc_mutex,non_existent,non_existent,0.1,conda_forge
545,_openmp_mutex,non_existent,non_existent,4.5,2_gnu
312,abseil-cpp,20220623.0,h0e60522_0,non_existent,non_existent
408,absl-py,non_existent,non_existent,2.1.0,pyhd8ed1ab_0
159,aiobotocore,2.7.0,py311haa95532_0,non_existent,non_existent
325,aiohttp,3.9.3,py311h2bbff1b_0,non_existent,non_existent
35,aiohttp-cors,0.7.0,py_0,non_existent,non_existent
584,aioitertools,0.7.1,pyhd3eb1b0_0,non_existent,non_existent
589,aiosignal,1.2.0,pyhd3eb1b0_0,non_existent,non_existent
