In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
pip install '/content/drive/MyDrive/pywhy-llm'

In [None]:
pip install guidance

In [None]:
pip install python-dotenv

In [None]:
from dotenv import load_dotenv
import os
import guidance
from guidance import models

load_dotenv()

os.environ["OPENAI_API_KEY"] = ''

In [None]:
from typing import Dict, Tuple, List

sea_ice_variables = [
    "geopotential_heights",
    "relative_humidity",
    "sea_level_pressure",
    "zonal_wind_at_10_meters",
    "meridional_wind_at_10_meters",
    "sensible_plus_latent_heat_flux",
    "total_precipitation",
    "total_cloud_cover",
    "total_cloud_water_path",
    "surface_net_shortwave_flux",
    "surface_net_longwave_flux",
    "northern_hemisphere_sea_ice_extent",
]

treatment = "surface_net_longwave_flux"
outcome = "northern_hemisphere_sea_ice_extent"

# ground truth confounders to the relationship between surface_net_longwave_flux and northern_hemisphere_sea_ice_extent
sea_ice_confounders = ["total_precipitation"]

sea_ice_relationships: List[Tuple[str, str]] = [
    ("surface_net_longwave_flux", "northern_hemisphere_sea_ice_extent"),

    ("geopotential_heights", "surface_net_longwave_flux"),
    ("geopotential_heights", "relative_humidity"),
    ("geopotential_heights", "sea_level_pressure"),

    ("relative_humidity", "total_cloud_cover"),
    ("relative_humidity", "total_cloud_water_path"),
    ("relative_humidity", "total_precipitation"),
    ("relative_humidity", "surface_net_longwave_flux"),

    ("sea_level_pressure", "relative_humidity"),
    ("sea_level_pressure", "geopotential_heights"),
    ("sea_level_pressure", "zonal_wind_at_10_meters"),
    ("sea_level_pressure", "northern_hemisphere_sea_ice_extent"),
    ("sea_level_pressure", "sensible_plus_latent_heat_flux"),
    ("sea_level_pressure", "meridional_wind_at_10_meters"),

    ("zonal_wind_at_10_meters", "northern_hemisphere_sea_ice_extent"),
    ("zonal_wind_at_10_meters", "sensible_plus_latent_heat_flux"),

    ("meridional_wind_at_10_meters", "northern_hemisphere_sea_ice_extent"),
    ("meridional_wind_at_10_meters", "sensible_plus_latent_heat_flux"),

    ("sensible_plus_latent_heat_flux", "northern_hemisphere_sea_ice_extent"),
    ("sensible_plus_latent_heat_flux", "sea_level_pressure"),
    ("sensible_plus_latent_heat_flux", "zonal_wind_at_10_meters"),
    ("sensible_plus_latent_heat_flux", "meridional_wind_at_10_meters"),
    ("sensible_plus_latent_heat_flux", "total_precipitation"),
    ("sensible_plus_latent_heat_flux", "total_cloud_cover"),
    ("sensible_plus_latent_heat_flux", "total_cloud_water_path"),

    ("total_precipitation", "northern_hemisphere_sea_ice_extent"),
    ("total_precipitation", "relative_humidity"),
    ("total_precipitation", "sensible_plus_latent_heat_flux"),
    ("total_precipitation", "surface_net_longwave_flux"),
    ("total_precipitation", "total_cloud_cover"),
    ("total_precipitation", "total_cloud_water_path"),

    ("total_cloud_water_path", "total_precipitation"),
    ("total_cloud_water_path", "sensible_plus_latent_heat_flux"),
    ("total_cloud_water_path", "relative_humidity"),
    ("total_cloud_water_path", "surface_net_longwave_flux"),
    ("total_cloud_water_path", "surface_net_shortwave_flux"),

    ("total_cloud_cover", "total_precipitation"),
    ("total_cloud_cover", "sensible_plus_latent_heat_flux"),
    ("total_cloud_cover", "relative_humidity"),
    ("total_cloud_cover", "surface_net_longwave_flux"),
    ("total_cloud_cover", "surface_net_shortwave_flux"),

    ("surface_net_shortwave_flux", "northern_hemisphere_sea_ice_extent"),

    ("northern_hemisphere_sea_ice_extent", "sea_level_pressure"),
    ("northern_hemisphere_sea_ice_extent", "zonal_wind_at_10_meters"),
    ("northern_hemisphere_sea_ice_extent", "meridional_wind_at_10_meters"),
    ("northern_hemisphere_sea_ice_extent", "sensible_plus_latent_heat_flux"),
    ("northern_hemisphere_sea_ice_extent", "surface_net_shortwave_flux"),
    ("northern_hemisphere_sea_ice_extent", "surface_net_longwave_flux"),
]

## Helpers

Model type - the type of LLM used
By default it's set to completions models

Relationship strategy - is the type of request made to the LLM (request parent, child, pairwise relationship)

In [None]:
from pywhyllm import ModelType, RelationshipStrategy
model_type = ModelType.Completion
relationship_strategy = RelationshipStrategy.Parent

## Model

In [None]:
from pywhyllm.suggesters.model_suggester import ModelSuggester
m = ModelSuggester('gpt-4')



In [None]:
domain_expertises = m.suggest_domain_expertises("causal mechanisms", sea_ice_variables)

In [None]:
domain_expertises

In [None]:
domain_experts = m.suggest_domain_experts("causal mechanisms", sea_ice_variables)

In [None]:
domain_experts

In [None]:
parents = m.suggest_parents("causal mechanisms", "meterology", "relative_humidity", sea_ice_variables)

In [None]:
parents

In [None]:
children = m.suggest_children("causal mechanisms", "meterology", "relative_humidity", sea_ice_variables)

In [None]:
children

In [None]:
pairwise_relationship = m.suggest_pairwise_relationship("meterology", "total_precipitation", "relative_humidity")

In [None]:
pairwise_relationship

In [None]:
confounders = m.suggest_confounders(treatment, outcome, sea_ice_variables, domain_expertises,"causal mechanisms")

In [None]:
confounders

In [None]:
"""returns a dictionary with the how many times that edge was suggested"""
model_edges = m.suggest_relationships(treatment, outcome, sea_ice_variables, domain_expertises, "causal mechanisms", None, RelationshipStrategy.Pairwise)

In [None]:
model_edges

## Identifier

In [None]:
from pywhyllm.suggesters.identification_suggester import IdentificationSuggester
i = IdentificationSuggester('gpt-4')

In [None]:
"""calls modeler suggest_confounders in the background"""
backdoor = i.suggest_backdoor(treatment, outcome, sea_ice_variables, domain_expertises, "causal mechanisms")

In [None]:
backdoor

In [None]:
"""suggests instrumental variables"""
ivs = i.suggest_ivs(treatment, outcome, sea_ice_variables, domain_expertises)

In [None]:
ivs

## Validator

In [None]:
from pywhyllm.suggesters.validation_suggester import ValidationSuggester
v = ValidationSuggester('gpt-4')

In [None]:
latent_confounders = v.suggest_latent_confounders(treatment, outcome, domain_expertises)

In [None]:
latent_confounders

In [None]:
negative_controls = v.suggest_negative_controls(treatment, outcome, sea_ice_variables, domain_expertises)

In [None]:
negative_controls

In [None]:
critique = v.request_pairwise_critique(domain_expertises[0], "total_precipitation", "relative_humidity")

In [None]:
critique

In [None]:
parent=RelationshipStrategy.Parent
child=RelationshipStrategy.Child
pairwise=RelationshipStrategy.Pairwise

In [None]:
critique = v.critique_graph(sea_ice_variables, model_edges, domain_expertises, "causal mechanisms", None, pairwise)

In [None]:
critique