In [None]:
# Import necessary libraries
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor, HistGradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import dowhy
from dowhy import CausalModel
import re
from collections import Counter



def load_data(filepath):
    df = pd.read_csv(filepath, header=None, skiprows=0)
    df.columns = df.iloc[0]
    df = df[1:]
    return df



# Load and preprocess data
file_path = r"C:\Users\nstep\TSU\SeniorProject\nashvilleDF.csv"
nashvilleDF = load_data(file_path)

#nashvilleDf to lower case

nashvilleDF.head()

#check to see if nashvilleDF has a column named 'id' and show results for 5 rows
nashvilleDF['id'].head()

In [None]:
import numpy as np
import pandas as pd
import logging
import dowhy
from dowhy import CausalModel
from econml.metalearners import TLearner
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor
from joblib import Parallel, delayed

subsetdf = nashvilleDF.dropna()

# Define the outcome
outcome = 'price'

# Store results in a list to maintain order
results = []

# Initialize the learner using HistGradientBoostingRegressor
learner = TLearner(models=HistGradientBoostingRegressor())

def get_causal_estimate(treatment):
    common_causes = [col for col in subsetdf.columns if col != treatment and col != outcome]

    # Create a causal model with your subsetdf
    model = CausalModel(
        data=subsetdf,
        treatment=treatment,
        outcome=outcome,
        common_causes=common_causes,
        logging_level=logging.INFO
    )

    # Identify the causal effect
    identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)

    # Estimate the causal effect using the T-learner from EconML
    causal_estimate = model.estimate_effect(identified_estimand,
                                            method_name="backdoor.econml.metalearners.TLearner",
                                            control_value=0,
                                            treatment_value=1,
                                            target_units="att",
                                            method_params={"learner": learner})
    
    return {
        "causal_estimate": causal_estimate.value,
        "treatment": treatment
    }

# Parallelize the computation using joblib
treatments = [col for col in subsetdf.columns if col != outcome]
results = Parallel(n_jobs=-1)(delayed(get_causal_estimate)(treatment) for treatment in treatments)

# Sort the results by causal estimate value in descending order
sorted_results = sorted(results, key=lambda x: x['causal_estimate'], reverse=True)

# Display the top 10 results
for result in sorted_results[:10]:
    print(f"Treatment: {result['treatment']}, Causal Estimate: {result['causal_estimate']}")
