In [9]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

In [3]:
carbon_emissions = pd.read_csv("carbon_emmission.csv")
temperature = pd.read_csv("temperature.csv")

In [6]:
print(carbon_emissions.head())
print(carbon_emissions.shape[0])

   ObjectId Country     Date   Value
0         1   World  1958M03  315.70
1         2   World  1958M04  317.45
2         3   World  1958M05  317.51
3         4   World  1958M06  317.24
4         5   World  1958M07  315.86
1570


In [7]:
print(temperature.head())
print(temperature.shape[0])

   ObjectId                       Country ISO2 ISO3  F1961  F1962  F1963  \
0         1  Afghanistan, Islamic Rep. of   AF  AFG -0.113 -0.164  0.847   
1         2                       Albania   AL  ALB  0.627  0.326  0.075   
2         3                       Algeria   DZ  DZA  0.164  0.114  0.077   
3         4                American Samoa   AS  ASM  0.079 -0.042  0.169   
4         5      Andorra, Principality of   AD  AND  0.736  0.112 -0.752   

   F1964  F1965  F1966  ...  F2013  F2014  F2015  F2016  F2017  F2018  F2019  \
0 -0.764 -0.244  0.226  ...  1.281  0.456  1.093  1.555  1.540  1.544  0.910   
1 -0.166 -0.388  0.559  ...  1.333  1.198  1.569  1.464  1.121  2.028  1.675   
2  0.250 -0.100  0.433  ...  1.192  1.690  1.121  1.757  1.512  1.210  1.115   
3 -0.140 -0.562  0.181  ...  1.257  1.170  1.009  1.539  1.435  1.189  1.539   
4  0.308 -0.490  0.415  ...  0.831  1.946  1.690  1.990  1.925  1.919  1.964   

   F2020  F2021  F2022  
0  0.498  1.327  2.012  
1  1.498  1.

In [8]:
# selecting and computing statistics for temperature changes
temperature_values = temperature.filter(regex='^F').stack()  # extracting all year columns
temperature_stats = {
    "Mean": temperature_values.mean(),
    "Median": temperature_values.median(),
    "Variance": temperature_values.var()
}

# computing statistics for CO2 concentrations
co2_values = carbon_emissions["Value"]  # extracting the Value column
co2_stats = {
    "Mean": co2_values.mean(),
    "Median": co2_values.median(),
    "Variance": co2_values.var()
}

temperature_stats, co2_stats

({'Mean': 0.5377713483146068, 'Median': 0.47, 'Variance': 0.4294524831504378},
 {'Mean': 180.71615286624203,
  'Median': 313.835,
  'Variance': 32600.00200469294})

In [11]:
# --- Time-Series Plot with Enhanced Visuals ---
def create_timeseries_plot(temperature, carbon_emissions):
    """Generates a time-series plot of temperature and CO2 with enhanced styling."""

    # Temperature: averaging across countries for each year
    temperature_years = temperature.filter(regex='^F').mean(axis=0)
    temperature_years.index = temperature_years.index.str.replace('F', '').astype(int)

    # CO2: parsing year and averaging monthly data
    carbon_emissions['Year'] = carbon_emissions['Date'].str[:4].astype(int)
    co2_yearly = carbon_emissions.groupby('Year')['Value'].mean()

    fig = go.Figure()

    # Temperature trace
    fig.add_trace(go.Scatter(
        x=temperature_years.index, y=temperature_years.values,
        mode='lines+markers', name="Temperature Change (°C)",
        line=dict(color='#FF6347', width=2.5),  # Tomato color for temperature
        marker=dict(size=8, color='#FF6347')
    ))

    # CO2 trace
    fig.add_trace(go.Scatter(
        x=co2_yearly.index, y=co2_yearly.values,
        mode='lines+markers', name="CO₂ Concentration (ppm)",
        line=dict(color='#4682B4', width=2, dash='dash'),  # SteelBlue color for CO2
        marker=dict(size=6, color='#4682B4')
    ))

    fig.update_layout(
        title="Time-series of Temperature Change and CO₂ Concentrations",
        xaxis_title="Year",
        yaxis_title="Value",
        template="plotly_white",
        legend_title="Metrics",
        hovermode="x unified", #improved hover behavior
        margin=dict(l=50, r=50, t=80, b=50), #adjust margins
        plot_bgcolor='rgba(0,0,0,0)', #transparent plot background
        paper_bgcolor='rgba(0,0,0,0)' #transparent paper background
    )

    fig.update_xaxes(showline=True, linewidth=1, linecolor='black', gridcolor='lightgray')
    fig.update_yaxes(showline=True, linewidth=1, linecolor='black', gridcolor='lightgray')

    fig.show()

create_timeseries_plot(temperature, carbon_emissions)

# --- Correlation Heatmap with Enhanced Color Scheme ---
def create_correlation_heatmap(temperature, carbon_emissions):
    """Generates a correlation heatmap with an improved color scheme."""

    temperature_years = temperature.filter(regex='^F').mean(axis=0)
    temperature_years.index = temperature_years.index.str.replace('F', '').astype(int)
    carbon_emissions['Year'] = carbon_emissions['Date'].str[:4].astype(int)
    co2_yearly = carbon_emissions.groupby('Year')['Value'].mean()

    merged_data = pd.DataFrame({
        "Temperature Change": temperature_years,
        "CO₂ Concentration": co2_yearly
    }).dropna()

    heatmap_fig = px.imshow(
        merged_data.corr(),
        text_auto=".2f",
        color_continuous_scale=px.colors.diverging.RdBu,  # Use built in diverging RdBu
        title="Correlation Heatmap",
        labels=dict(x="Metrics", y="Metrics", color="Correlation"), # better labels
    )

    heatmap_fig.update_layout(
        template="plotly_white",
        margin=dict(l=50, r=50, t=80, b=50) #adjust margins
    )

    heatmap_fig.show()

create_correlation_heatmap(temperature, carbon_emissions)

# --- Scatter Plot with Enhanced Styling ---
def create_scatter_plot(temperature, carbon_emissions):
    """Generates a scatter plot with enhanced marker styling."""

    temperature_years = temperature.filter(regex='^F').mean(axis=0)
    temperature_years.index = temperature_years.index.str.replace('F', '').astype(int)
    carbon_emissions['Year'] = carbon_emissions['Date'].str[:4].astype(int)
    co2_yearly = carbon_emissions.groupby('Year')['Value'].mean()

    merged_data = pd.DataFrame({
        "Temperature Change": temperature_years,
        "CO₂ Concentration": co2_yearly
    }).dropna()

    scatter_fig = px.scatter(
        merged_data,
        x="CO₂ Concentration", y="Temperature Change",
        labels={"CO₂ Concentration": "CO₂ Concentration (ppm)", "Temperature Change": "Temperature Change (°C)"},
        title="Temperature Change vs CO₂ Concentration",
        template="plotly_white",
        trendline="ols" #Add trendline
    )

    scatter_fig.update_traces(
        marker=dict(size=10, opacity=0.8, line=dict(width=1, color='DarkSlateGray'))
    )

    scatter_fig.update_layout(
        margin=dict(l=50, r=50, t=80, b=50) #adjust margins
    )

    scatter_fig.show()

create_scatter_plot(temperature, carbon_emissions)

In [12]:
from scipy.stats import linregress

# temperature trend
temp_trend = linregress(temperature_years.index, temperature_years.values)
temp_trend_line = temp_trend.slope * temperature_years.index + temp_trend.intercept

# CO2 trend
co2_trend = linregress(co2_yearly.index, co2_yearly.values)
co2_trend_line = co2_trend.slope * co2_yearly.index + co2_trend.intercept

fig_trends = go.Figure()

fig_trends.add_trace(go.Scatter(
    x=temperature_years.index, y=temperature_years.values,
    mode='lines+markers', name="Temperature Change (°C)"
))
fig_trends.add_trace(go.Scatter(
    x=temperature_years.index, y=temp_trend_line,
    mode='lines', name=f"Temperature Trend (Slope: {temp_trend.slope:.2f})", line=dict(dash='dash')
))
fig_trends.add_trace(go.Scatter(
    x=co2_yearly.index, y=co2_yearly.values,
    mode='lines+markers', name="CO₂ Concentration (ppm)"
))
fig_trends.add_trace(go.Scatter(
    x=co2_yearly.index, y=co2_trend_line,
    mode='lines', name=f"CO₂ Trend (Slope: {co2_trend.slope:.2f})", line=dict(dash='dash')
))

fig_trends.update_layout(
    title="Trends in Temperature Change and CO₂ Concentrations",
    xaxis_title="Year",
    yaxis_title="Values",
    template="plotly_white",
    legend_title="Metrics"
)
fig_trends.show()

# seasonal variations in CO2 concentrations
carbon_emissions['Month'] = carbon_emissions['Date'].str[-2:].astype(int)
co2_monthly = carbon_emissions.groupby('Month')['Value'].mean()

fig_seasonal = px.line(
    co2_monthly,
    x=co2_monthly.index,
    y=co2_monthly.values,
    labels={"x": "Month", "y": "CO₂ Concentration (ppm)"},
    title="Seasonal Variations in CO₂ Concentrations",
    markers=True
)
fig_seasonal.update_layout(
    xaxis=dict(tickmode="array", tickvals=list(range(1, 13))),
    template="plotly_white"
)
fig_seasonal.show()

In [13]:
from scipy.stats import pearsonr, spearmanr
from statsmodels.tsa.stattools import grangercausalitytests

# pearson and spearman correlation coefficients
pearson_corr, _ = pearsonr(merged_data["CO₂ Concentration"], merged_data["Temperature Change"])
spearman_corr, _ = spearmanr(merged_data["CO₂ Concentration"], merged_data["Temperature Change"])

# granger causality test
granger_data = merged_data.diff().dropna()  # first differencing to make data stationary
granger_results = grangercausalitytests(granger_data, maxlag=3, verbose=False)

# extracting p-values for causality
granger_p_values = {f"Lag {lag}": round(results[0]['ssr_chi2test'][1], 4)
                    for lag, results in granger_results.items()}

pearson_corr, spearman_corr, granger_p_values

(0.9554282559257313,
 0.9379013371609882,
 {'Lag 1': 0.0617, 'Lag 2': 0.6754, 'Lag 3': 0.2994})

In [14]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import numpy as np

# preparing the data for clustering
clustering_data = merged_data[["Temperature Change", "CO₂ Concentration"]].dropna()

scaler = StandardScaler()
scaled_data = scaler.fit_transform(clustering_data)

# applying K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42)  # assuming 3 clusters for simplicity
clustering_data['Cluster'] = kmeans.fit_predict(scaled_data)

# adding labels for periods with similar climate patterns
clustering_data['Label'] = clustering_data['Cluster'].map({
    0: 'Moderate Temp & CO₂',
    1: 'High Temp & CO₂',
    2: 'Low Temp & CO₂'
})

import plotly.express as px

fig_clusters = px.scatter(
    clustering_data,
    x="CO₂ Concentration",
    y="Temperature Change",
    color="Label",
    color_discrete_sequence=px.colors.qualitative.Set2,
    labels={
        "CO₂ Concentration": "CO₂ Concentration (ppm)",
        "Temperature Change": "Temperature Change (°C)",
        "Label": "Climate Pattern"
    },
    title="Clustering of Years Based on Climate Patterns"
)

fig_clusters.update_layout(
    template="plotly_white",
    legend_title="Climate Pattern"
)

fig_clusters.show()


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.





In [15]:
# setting up a simple predictive model using linear regression
from sklearn.linear_model import LinearRegression

# Preparing data
X = merged_data[["CO₂ Concentration"]].values  # CO₂ concentration as input
y = merged_data["Temperature Change"].values   # temperature change as target

model = LinearRegression()
model.fit(X, y)

# function to simulate "what-if" scenarios
def simulate_temperature_change(co2_percentage_change):
    # Calculate new CO2 concentrations
    current_mean_co2 = merged_data["CO₂ Concentration"].mean()
    new_co2 = current_mean_co2 * (1 + co2_percentage_change / 100)

    # predict temperature change
    predicted_temp = model.predict([[new_co2]])
    return predicted_temp[0]

# simulating scenarios
scenarios = {
    "Increase CO₂ by 10%": simulate_temperature_change(10),
    "Decrease CO₂ by 10%": simulate_temperature_change(-10),
    "Increase CO₂ by 20%": simulate_temperature_change(20),
    "Decrease CO₂ by 20%": simulate_temperature_change(-20),
}

scenarios

{'Increase CO₂ by 10%': 1.0866445037958163,
 'Decrease CO₂ by 10%': -0.059993041237237144,
 'Increase CO₂ by 20%': 1.6599632763123422,
 'Decrease CO₂ by 20%': -0.6333118137537621}