In [None]:
import pandas as pd

df = pd.read_csv("/content/owid-co2-data.csv")

countries = [
    "China",
    "United States",
    "Germany",
    "Iceland",
    "New Zealand"
]

filtered = df[
    (df["country"].isin(countries)) &
    (df["year"] >= 2015) &
    (df["year"] <= 2024)
][["country", "year", "co2", "co2_per_capita", "population"]].dropna()

print(filtered.shape)
print(filtered.head())

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker

# 1. SETUP PLOT SIZE
plt.figure(figsize=(12, 6))

# 2. CREATE THE LINE CHART
# 'hue' separates the lines by country automatically
sns.lineplot(
    data=filtered,
    x='year',
    y='co2',
    hue='country',
    marker='o',       # Adds dots so you can see the 10 specific data points
    linewidth=2.5,    # Thicker lines for visibility
    palette='bright'  # distinct colors
)

# 3. FORMATTING
plt.title('Timeline of CO₂ Emissions (2015–2024)', fontsize=16, fontweight='bold')
plt.xlabel('Year', fontsize=12)
plt.ylabel('Annual CO₂ Emissions(Million Tonnes)', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.5)

# Ensure the X-axis shows every year (integers)
plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(1))

# Move the legend outside the graph so it doesn't cover the lines
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', title='Country')

plt.tight_layout()
plt.show()

In [None]:
desc = filtered.groupby("country")[["co2", "co2_per_capita", "population"]].describe()
print(desc)


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))

# Boxplot
sns.boxplot(
    data=filtered,
    x='country',
    y='co2',
    palette='Set2'
)

plt.title('Distribution of CO₂ Emissions by Country (2015-2024)', fontsize=14)
plt.xlabel('Country', fontsize=12)
plt.ylabel('Annual CO₂ Emissions (MIllion Tonnes)', fontsize=12)


plt.grid(True, linestyle='--', alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
import statsmodels.api as sm

X = filtered["population"] / 1e6  # scale to millions
X = sm.add_constant(X)
y = filtered["co2"]

model = sm.OLS(y, X).fit()
print(model.summary())


In [58]:
from scipy.stats import shapiro

residuals = model.resid
stat, p = shapiro(residuals)

print("Shapiro–Wilk p-value:", p)


Shapiro–Wilk p-value: 3.777364762115118e-08


In [None]:
import matplotlib.pyplot as plt
import statsmodels.api as sm

plt.scatter(model.fittedvalues, residuals)
plt.axhline(0, linestyle="--")
plt.xlabel("Fitted values")
plt.ylabel("Residuals")
plt.title("Residuals vs Fitted")
plt.show()

sm.qqplot(residuals, line="45")
plt.title("Q–Q Plot of Residuals")
plt.show()


In [None]:
# Levene's Test (Equal Variances)
groups = [filtered[filtered['country'] == c]['co2'] for c in countries]
stat, p_levene = stats.levene(*groups)
print(f"Levene's P-value: {p_levene:.4e}")

In [None]:
import pingouin as pg

welch = pg.welch_anova(
    dv="co2",
    between="country",
    data=filtered
)
print("\nWelch's ANOVA Results:")
print(welch)


In [None]:
games_howell = pg.pairwise_gameshowell(
    dv="co2",
    between="country",
    data=filtered
)

print(games_howell)


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Your data
X_vals = filtered["population"] / 1e6  # Population
y_vals = filtered["co2"]

# Fit regression line manually using coefficients from model
intercept = model.params['const']
slope = model.params['population']

# Predicted values
y_pred = intercept + slope * X_vals

# Plot
plt.figure(figsize=(8,6))
plt.scatter(X_vals, y_vals, color='blue', label='Observed CO₂')
plt.plot(X_vals, y_pred, color='red', linewidth=2, label='Regression Line')
plt.xlabel('Population (millions)')
plt.ylabel('CO₂ Emissions')
plt.title('CO₂ Emissions vs Population')
plt.legend()
plt.grid(True)
plt.show()
