# 04 Transfer Entropy

In [None]:
%load_ext autoreload
%autoreload 2
import sys

sys.path.append("../")

import jupyter_black

jupyter_black.load()

In [None]:
import math
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import src.cleansing as cleansing
from loguru import logger


logger.remove()  # Remove any previous log handlers
logger.add(lambda msg: print(msg), level="INFO")

In [None]:
df = pd.read_csv("../data/Weather Data.csv")

# rename columns
# parse datetime column
df = cleansing.clean_weather_data(df)

In [None]:
df.head()

In [None]:
# Transfer Entropy uses the conditional mutual information formulation
from src.transfer_entropy import get_transfer_entropy

## The lags

We observed that:
- `temp_c` and `dew_point_temp_c` have high positive correlation
- `visibility_km` and `real_hum_pct` have high negative correlation
- `dew_point_temp_c` and `date_time` also appear to have some correlation

Now, we are going to test their causality relationships.

But first, we need to define the lags - from our interpretation, what lags can we use to find the different causality relationships? 

We have samples of every hour. We can explore the causality between entities with short lags: 1h, 3h; and longer lags; 12h, 24h, 48h

In [None]:
def run_transfer_entropy_for_dataframe(
    df: pd.DataFrame, array_of_lags: list[int], **kwargs
) -> pd.DataFrame:
    numerical_columns = df.select_dtypes(include=[np.number])

    results = []
    # Compare each 2 by 2
    for i in range(len(numerical_columns)):
        for j in range(i + 1, len(numerical_columns)):
            if i != j:
                col_x1 = numerical_columns[i]
                col_x2 = numerical_columns[j]
                for lag in array_of_lags:
                    x1 = df[col_x1].values.tolist()
                    x2 = df[col_x2].values.tolist()
                    te_from_2_to_1 = get_transfer_entropy(
                        target_column=x1, causal_column=x2, lag=lag, **kwargs
                    )
                    te_from_1_to_2 = get_transfer_entropy(
                        target_column=x2, causal_column=x1, lag=lag, **kwargs
                    )
                    # Create a dictionary to store the result
                    result_dict = {
                        "ColumnX1": col_x1,
                        "ColumnX2": col_x2,
                        "Lag": lag,
                        "TE_from_x2_to_x1": te_from_2_to_1,
                        "TE_from_x1_to_x2": te_from_1_to_2,
                    }

                    # Append the result dictionary to the results list
                    results.append(result_dict)
                    break
    return pd.DataFrame(results)

In [None]:
run_transfer_entropy_for_dataframe(df, [1])