In [1]:
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
from statsmodels.tsa.stattools import adfuller
from arch.unitroot import PhillipsPerron
from scipy.stats import jarque_bera
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.tools import add_constant

In [2]:
data = pd.read_csv("synthetic_data_deepecho.csv")
data.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,No,Date/Time,Date/time end,Altitude [m],Size fraction,Mass v [µg/m**3],Na+ [µg/m**3],[NH4]+ [µg/m**3],...,Cu [ng/m**3],Zn [ng/m**3],Pb [ng/m**3],As [ng/m**3],Se [ng/m**3],Sr [ng/m**3],Rb [ng/m**3],Ba [ng/m**3],La [ng/m**3],Ce [ng/m**3]
0,6215526,8970884,965,2010-02-07 20:34:10,2011-04-14 13:28:20,42,3.5 - 10 µm,19.331,3.7476,0.0,...,0.571,1.106,0.5178,0.0,0.1511,0.0,0.1016,0.0,1.722,1.032
1,3467512,3143223,1220,2011-12-08 00:29:02,2010-06-21 16:47:54,42,3.5 - 10 µm,0.009,0.0,0.0539,...,0.01,0.224,0.5178,0.076,0.0048,18.8602,0.7628,6.2324,4.7471,0.0
2,4726824,10810119,1220,2010-04-09 09:32:58,2012-02-05 04:11:43,42,0.14 - 0.42 µm,19.594,0.1166,0.0058,...,0.688,1.801,0.0,0.0,0.0738,0.0,0.5086,8.053,3.1104,1.001
3,8274909,7748627,1120,2011-01-24 19:07:04,2011-11-21 03:37:56,42,3.5 - 10 µm,18.753,2.9514,0.0213,...,0.388,1.746,1.1215,0.076,0.0258,0.0,1.0539,0.0,6.774,0.0
4,12485358,915797,1220,2010-02-23 02:00:38,2010-11-28 13:43:23,42,0.14 - 0.42 µm,32.891,1.5483,0.0261,...,0.575,1.106,0.0,0.0,0.093,0.0,0.3516,9.777,0.0,0.0


In [3]:
data = data.drop(['Unnamed: 0.1', 'Unnamed: 0'],axis=1)

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 36 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   No                  1000 non-null   int64  
 1   Date/Time           1000 non-null   object 
 2   Date/time end       1000 non-null   object 
 3   Altitude [m]        1000 non-null   int64  
 4   Size fraction       1000 non-null   object 
 5   Mass v [µg/m**3]    1000 non-null   float64
 6   Na+ [µg/m**3]       1000 non-null   float64
 7   [NH4]+ [µg/m**3]    1000 non-null   float64
 8   K+ [µg/m**3]        1000 non-null   float64
 9   Mg2+ [µg/m**3]      1000 non-null   float64
 10  Ca2+ [µg/m**3]      1000 non-null   float64
 11  Cl- [µg/m**3]       1000 non-null   float64
 12  [NO3]- [µg/m**3]    1000 non-null   float64
 13  [SO4]2- [µg/m**3]   1000 non-null   float64
 14  [C2O4]2- [µg/m**3]  1000 non-null   float64
 15  Br- [µg/m**3]       1000 non-null   float64
 16  C org [

In [5]:
stats_data = data.iloc[:, 5:]
stats_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 31 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Mass v [µg/m**3]    1000 non-null   float64
 1   Na+ [µg/m**3]       1000 non-null   float64
 2   [NH4]+ [µg/m**3]    1000 non-null   float64
 3   K+ [µg/m**3]        1000 non-null   float64
 4   Mg2+ [µg/m**3]      1000 non-null   float64
 5   Ca2+ [µg/m**3]      1000 non-null   float64
 6   Cl- [µg/m**3]       1000 non-null   float64
 7   [NO3]- [µg/m**3]    1000 non-null   float64
 8   [SO4]2- [µg/m**3]   1000 non-null   float64
 9   [C2O4]2- [µg/m**3]  1000 non-null   float64
 10  Br- [µg/m**3]       1000 non-null   float64
 11  C org [µg/m**3]     1000 non-null   float64
 12  EC [µg/m**3]        1000 non-null   float64
 13  TC [µg/m**3]        1000 non-null   float64
 14  Ca [ng/m**3]        1000 non-null   float64
 15  Ti [ng/m**3]        1000 non-null   float64
 16  V [ng/m

Performing Stationary Test

In [6]:
#ADF test
adf_results = {
    'Chemical': [],
    'ADF Statistic': [],
    'p-value': [],
    'Critical Value 1%': [],
    'Critical Value 5%': [],
    'Critical Value 10%': []
}

# Loop through each column in the DataFrame and perform the ADF test
for column in stats_data.columns:
    adf_result = adfuller(stats_data[column])
    adf_results['Chemical'].append(column)
    adf_results['ADF Statistic'].append(adf_result[0])
    adf_results['p-value'].append(adf_result[1])
    adf_results['Critical Value 1%'].append(adf_result[4]['1%'])
    adf_results['Critical Value 5%'].append(adf_result[4]['5%'])
    adf_results['Critical Value 10%'].append(adf_result[4]['10%'])

# Convert the dictionary to a pandas DataFrame
adf_df = pd.DataFrame(adf_results)

# Set the 'Chemical' column as the index for better readability
adf_df.set_index('Chemical', inplace=True)

# Display the table
print(adf_df)

                    ADF Statistic       p-value  Critical Value 1%  \
Chemical                                                             
Mass v [µg/m**3]       -33.025266  0.000000e+00          -3.436913   
Na+ [µg/m**3]          -32.268711  0.000000e+00          -3.436913   
[NH4]+ [µg/m**3]        -6.822211  1.988670e-09          -3.437006   
K+ [µg/m**3]           -11.442001  6.146194e-21          -3.436946   
Mg2+ [µg/m**3]         -31.911196  0.000000e+00          -3.436913   
Ca2+ [µg/m**3]         -30.853576  0.000000e+00          -3.436913   
Cl- [µg/m**3]          -14.612499  4.002029e-27          -3.436933   
[NO3]- [µg/m**3]       -32.042382  0.000000e+00          -3.436913   
[SO4]2- [µg/m**3]      -32.677800  0.000000e+00          -3.436913   
[C2O4]2- [µg/m**3]     -33.143431  0.000000e+00          -3.436913   
Br- [µg/m**3]          -31.535730  0.000000e+00          -3.436913   
C org [µg/m**3]        -32.196714  0.000000e+00          -3.436913   
EC [µg/m**3]        

PP Test

In [11]:

# Initialize dictionary to store PP test results
pp_results = {
    'Chemical': [],
    'PP Statistic': [],
    'p-value': [],
    'Critical Value 1%': [],
    'Critical Value 5%': [],
    'Critical Value 10%': []
}

# Loop through each column in the DataFrame and perform the PP test
for column in stats_data.columns:
    series = stats_data[column]
    pp = PhillipsPerron(series)
    pp_results['Chemical'].append(column)
    pp_results['PP Statistic'].append(pp.stat)
    pp_results['p-value'].append(pp.pvalue)
    pp_results['Critical Value 1%'].append(pp.critical_values['1%'])
    pp_results['Critical Value 5%'].append(pp.critical_values['5%'])
    pp_results['Critical Value 10%'].append(pp.critical_values['10%'])

# Convert the dictionary to a pandas DataFrame
pp_df = pd.DataFrame(pp_results)

# Set the 'Chemical' column as the index for better readability
pp_df.set_index('Chemical', inplace=True)

# Display the table
print(pp_df)

                    PP Statistic  p-value  Critical Value 1%  \
Chemical                                                       
Mass v [µg/m**3]      -33.013115      0.0          -3.436913   
Na+ [µg/m**3]         -32.324382      0.0          -3.436913   
[NH4]+ [µg/m**3]      -33.837618      0.0          -3.436913   
K+ [µg/m**3]          -32.510768      0.0          -3.436913   
Mg2+ [µg/m**3]        -31.909811      0.0          -3.436913   
Ca2+ [µg/m**3]        -30.890402      0.0          -3.436913   
Cl- [µg/m**3]         -32.876236      0.0          -3.436913   
[NO3]- [µg/m**3]      -32.058600      0.0          -3.436913   
[SO4]2- [µg/m**3]     -32.685677      0.0          -3.436913   
[C2O4]2- [µg/m**3]    -33.259472      0.0          -3.436913   
Br- [µg/m**3]         -31.646362      0.0          -3.436913   
C org [µg/m**3]       -32.459456      0.0          -3.436913   
EC [µg/m**3]          -30.377782      0.0          -3.436913   
TC [µg/m**3]          -31.016167      0.