In [None]:
import pandas as pd
from statsmodels.tsa.stattools import grangercausalitytests


In [None]:
df_co = pd.read_csv("Daily_Avg_CO.csv")
df_ozone = pd.read_csv("Daily_Avg_Ozone.csv")
df_pm25 = pd.read_csv("Daily_Avg_PM2.5.csv")
df_pm10 = pd.read_csv("Daily_Avg_PM10.csv")




In [None]:
print(df_co.columns)
print(df_ozone.columns)
print(df_pm25.columns)
print(df_pm10.columns)


Index(['Date', 'Daily Max 8-hour CO Concentration'], dtype='object')
Index(['Date', 'Daily Max 8-hour Ozone Concentration'], dtype='object')
Index(['Date', 'Daily Mean PM2.5 Concentration'], dtype='object')
Index(['Date', 'Daily Mean PM10 Concentration'], dtype='object')


In [None]:
for df in [df_co, df_ozone, df_pm25, df_pm10]:
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)


In [None]:
combined_df = pd.concat([df_co, df_ozone, df_pm25, df_pm10], axis=1)
combined_df.dropna(inplace=True)  # Drop rows with missing values


In [None]:
variable_pairs = [
    ('Daily Mean PM2.5 Concentration', 'Daily Max 8-hour CO Concentration'),
    ('Daily Max 8-hour Ozone Concentration', 'Daily Max 8-hour CO Concentration'),
    ('Daily Mean PM2.5 Concentration', 'Daily Max 8-hour Ozone Concentration'),
    ('Daily Mean PM2.5 Concentration', 'Daily Mean PM10 Concentration'),
    ('Daily Mean PM10 Concentration', 'Daily Max 8-hour CO Concentration')
]


In [None]:
max_lag = 5  # Maximum number of lags to test

for x, y in variable_pairs:
    print(f"\nTesting if {x} Granger-causes {y}")
    result = grangercausalitytests(combined_df[[y, x]], max_lag, verbose=True)



Testing if Daily Mean PM2.5 Concentration Granger-causes Daily Max 8-hour CO Concentration

Granger Causality
number of lags (no zero) 1
ssr based F test:         F=26.0556 , p=0.0000  , df_denom=1030, df_num=1
ssr based chi2 test:   chi2=26.1315 , p=0.0000  , df=1
likelihood ratio test: chi2=25.8064 , p=0.0000  , df=1
parameter F test:         F=26.0556 , p=0.0000  , df_denom=1030, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=17.3505 , p=0.0000  , df_denom=1027, df_num=2
ssr based chi2 test:   chi2=34.8699 , p=0.0000  , df=2
likelihood ratio test: chi2=34.2938 , p=0.0000  , df=2
parameter F test:         F=17.3505 , p=0.0000  , df_denom=1027, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=11.1408 , p=0.0000  , df_denom=1024, df_num=3
ssr based chi2 test:   chi2=33.6509 , p=0.0000  , df=3
likelihood ratio test: chi2=33.1134 , p=0.0000  , df=3
parameter F test:         F=11.1408 , p=0.0000  , df_denom=1024, df_nu



ssr based F test:         F=2.2889  , p=0.0440  , df_denom=1018, df_num=5
ssr based chi2 test:   chi2=11.5680 , p=0.0412  , df=5
likelihood ratio test: chi2=11.5035 , p=0.0423  , df=5
parameter F test:         F=2.2889  , p=0.0440  , df_denom=1018, df_num=5

Testing if Daily Mean PM10 Concentration Granger-causes Daily Max 8-hour CO Concentration

Granger Causality
number of lags (no zero) 1
ssr based F test:         F=2.0828  , p=0.1493  , df_denom=1030, df_num=1
ssr based chi2 test:   chi2=2.0888  , p=0.1484  , df=1
likelihood ratio test: chi2=2.0867  , p=0.1486  , df=1
parameter F test:         F=2.0828  , p=0.1493  , df_denom=1030, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=4.8165  , p=0.0083  , df_denom=1027, df_num=2
ssr based chi2 test:   chi2=9.6798  , p=0.0079  , df=2
likelihood ratio test: chi2=9.6347  , p=0.0081  , df=2
parameter F test:         F=4.8165  , p=0.0083  , df_denom=1027, df_num=2

Granger Causality
number of lags (no zero)



PM2.5 and Ozone appear to have a direct influence on CO concentrations.
PM2.5 also seems to have some effect on Ozone and PM10 concentrations, though with more delayed impacts for PM10.