In [6]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller

# Load the dataset
df = pd.read_csv("/content/time-series-19-covid-combined.csv")

# Strip any extra spaces from column names
df.columns = df.columns.str.strip()

# Show column names to confirm structure
print("📌 Column names:", df.columns.tolist())

# Convert the 'Date' column to datetime format
df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
df.set_index("Date", inplace=True)

# Sort by date to ensure proper time series order
df.sort_index(inplace=True)

# Group by date to aggregate global confirmed cases per day
daily_df = df.groupby(df.index).sum()

# Create daily new cases from cumulative 'Confirmed' cases
daily_df["daily_cases"] = daily_df["Confirmed"].diff()

# Drop NA values
ts = daily_df["daily_cases"].dropna()

# Perform ADF Test
adf_result = adfuller(ts)

# Display results
print("\n📊 ADF Test Results:")
print(f"ADF Statistic: {adf_result[0]:.4f}")
print(f"p-value: {adf_result[1]:.4f}")
print(f"Used lags: {adf_result[2]}")
print("Critical Values:")
for key, value in adf_result[4].items():
    print(f"   {key}: {value:.4f}")

# Interpretation
if adf_result[1] < 0.05:
    print("\n✅ The series is stationary.")
else:
    print("\n❌ The series is NOT stationary.")


📌 Column names: ['Date', 'Country/Region', 'Province/State', 'Lat', 'Long', 'Confirmed', 'Recovered', 'Deaths']

📊 ADF Test Results:
ADF Statistic: 0.3461
p-value: 0.9794
Used lags: 15
Critical Values:
   1%: -3.4565
   5%: -2.8730
   10%: -2.5729

❌ The series is NOT stationary.
