<a href="https://colab.research.google.com/github/swaransheel/Carbon-Emissions-Impact-Analysis-with-Python/blob/main/Analyzing_the_Impact_of_Carbon_Emissions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import pandas as pd
temperature_data=pd.read_csv("/content/temperature.csv")
carbon_data=pd.read_csv("/content/carbon_emmission.csv")
print(temperature_data.head())
print(carbon_data.head())

   ObjectId                       Country ISO2 ISO3  F1961  F1962  F1963  \
0         1  Afghanistan, Islamic Rep. of   AF  AFG -0.113 -0.164  0.847   
1         2                       Albania   AL  ALB  0.627  0.326  0.075   
2         3                       Algeria   DZ  DZA  0.164  0.114  0.077   
3         4                American Samoa   AS  ASM  0.079 -0.042  0.169   
4         5      Andorra, Principality of   AD  AND  0.736  0.112 -0.752   

   F1964  F1965  F1966  ...  F2013  F2014  F2015  F2016  F2017  F2018  F2019  \
0 -0.764 -0.244  0.226  ...  1.281  0.456  1.093  1.555  1.540  1.544  0.910   
1 -0.166 -0.388  0.559  ...  1.333  1.198  1.569  1.464  1.121  2.028  1.675   
2  0.250 -0.100  0.433  ...  1.192  1.690  1.121  1.757  1.512  1.210  1.115   
3 -0.140 -0.562  0.181  ...  1.257  1.170  1.009  1.539  1.435  1.189  1.539   
4  0.308 -0.490  0.415  ...  0.831  1.946  1.690  1.990  1.925  1.919  1.964   

   F2020  F2021  F2022  
0  0.498  1.327  2.012  
1  1.498  1.

In [15]:
temperature_values=temperature_data.filter(regex='^F').stack()
temperature_stats={
    "Mean":temperature_values.mean(),
    "Median":temperature_values.median(),
    "Standard Deviation":temperature_values.std(),
    "Minimum":temperature_values.min(),
    "Maximum":temperature_values.max()
}
co2_values=carbon_data["Value"]
co2_stats={
    "Mean":co2_values.mean(),
    "Median":co2_values.median(),
    "Standard Deviation":co2_values.std(),
    "Minimum":co2_values.min(),
    "Maximum":co2_values.max()
}
temperature_stats,co2_stats

({'Mean': np.float64(0.5377713483146068),
  'Median': 0.47,
  'Standard Deviation': 0.6553262417685086,
  'Minimum': -2.062,
  'Maximum': 3.691},
 {'Mean': np.float64(180.71615286624203),
  'Median': 313.835,
  'Standard Deviation': 180.55470640416144,
  'Minimum': -0.1,
  'Maximum': 424.0})

In [16]:

import plotly.graph_objects as go
import plotly.express as px

temperature_years=temperature_data.filter(regex='^F').mean(axis=0)
temperature_years.index=temperature_years.index.str.replace('F','').astype(int)

carbon_data['Year']=carbon_data['Date'].str[:4].astype(int)
co2_yearly=carbon_data.groupby('Year')['Value'].mean()

fig=go.Figure()
fig.add_trace(go.Scatter(
    x=temperature_years.index,
    y=temperature_years.values,
    mode='lines+markers',
    name='Temperature  change(°C)'
))
fig.add_trace(go.Scatter(
    x=co2_yearly.index,
    y=co2_yearly.values,
    mode='lines+markers',
    name='CO2 Concentrations(ppm)',
    line=dict(dash="dash")
))
fig.update_layout(
    title="Time-series of Temperture Change and CO2 Concentrations",
    xaxis_title="Year",
    yaxis_title="Values",
    template="plotly_white",
    legend_title="Metrics"
)
fig.show()


In [17]:
merged_data=pd.DataFrame({
    "Temperature Change": temperature_years,
    "CO2 Concentrations":co2_yearly
}).dropna()

heatmap_fig=px.imshow(
    merged_data.corr(),
    text_auto=".2f",
    color_continuous_scale="RdBu",
    title="Correlation Heatmap"
)
heatmap_fig.update_layout(
    template="plotly_white"
)
heatmap_fig.show()

In [18]:
scatter_fig=px.scatter(
    merged_data,
    x="CO2 Concentrations",
    y="Temperature Change",
    labels={"Co2 concentrations":"Co2 concentrations(ppm)",
            "Temperature Change":"Temperature Change(°C)"},
    title="Temperature Change and CO2 Concentrations",
    template="plotly_white"
)
scatter_fig.update_traces(marker=dict(size=10,opacity=0.7))
scatter_fig.show()

In [19]:
from scipy.stats import linregress
temp_trend=linregress(temperature_years.index,temperature_years.values)
temp_trend_line=temp_trend.slope*temperature_years.index+temp_trend.intercept

co2_trend=linregress(co2_yearly.index,co2_yearly.values)
co2_trend_line=co2_trend.slope*co2_yearly.index+co2_trend.intercept

fig_trends=go.Figure()
fig_trends.add_trace(go.Scatter(
    x=temperature_years.index,
    y=temperature_years.values,
    mode='lines+markers',
    name="Temperature Change (°C)"
))
fig_trends.add_trace(go.Scatter(
    x=temperature_years.index,
    y=temp_trend_line,
    mode='lines',
    name=f"Temperature Trend (Slope: {temp_trend.slope:.2f})", line=dict(dash='dash')
))
fig_trends.add_trace(go.Scatter(
    x=co2_yearly.index,
    y=co2_yearly.values,
    mode='lines+markers',
    name="CO2 Concentrations (ppm)"
))
fig_trends.add_trace(go.Scatter(
    x=co2_yearly.index,
    y=co2_trend_line,
    mode='lines',
    name=f"CO2 Trend (Slope: {co2_trend.slope:.2f})", line=dict(dash='dash')
))
fig_trends.update_layout(
    title="Temperature and CO2 Trends",
    xaxis_title="Year",
    yaxis_title="Values",
    template="plotly_white",
    legend_title="Metrics"
)
fig_trends.show()

In [7]:
carbon_data['Month']=carbon_data['Date'].str[-2:].astype(int)
co2_monthly=carbon_data.groupby('Month')['Value'].mean()
fig_seasonal=px.line(
    co2_monthly,
    x=co2_monthly.index,
    y=co2_monthly.values,
    labels={"x":"Month","y":"CO2Concentration(ppm)"},
    title="Seasonal Variations in CO₂ Concentrations",
    markers=True
)
fig_seasonal.update_layout(
    xaxis=dict(tickmode="array",tickvals=list(range(1,13))),
    template="plotly_white"
)
fig_seasonal.show()

In [8]:
from scipy.stats import pearsonr,spearmanr
from statsmodels.tsa.stattools import grangercausalitytests

pearson_corr,_=pearsonr(merged_data["CO2 Concentrations"],merged_data["Temperature Change"])
spearman_corr,_=spearmanr(merged_data["CO2 Concentrations"],merged_data["Temperature Change"])

granger_data=merged_data.diff().dropna()
granger_results=grangercausalitytests(granger_data,maxlag=3,verbose=False)

granger_p_values={f"Lag{lag}":round(results[0]['ssr_chi2test'][1],4)
    for lag,results in granger_results.items()}
print(pearson_corr)
print(spearman_corr)
print(granger_p_values)


0.9554282559257312
0.9379013371609882
{'Lag1': np.float64(0.0617), 'Lag2': np.float64(0.6754), 'Lag3': np.float64(0.2994)}



verbose is deprecated since functions should not print results



In [9]:
import statsmodels.api as sm

merged_data["CO2 Lag 1"]=merged_data["CO2 Concentrations"].shift(1)
merged_data["CO2 Lag 2"]=merged_data["CO2 Concentrations"].shift(2)
merged_data["CO2 Lag 3"]=merged_data["CO2 Concentrations"].shift(3)

lagged_data=merged_data.dropna()

X=lagged_data[['CO2 Concentrations','CO2 Lag 1','CO2 Lag 2','CO2 Lag 3']]
y=lagged_data['Temperature Change']
X=sm.add_constant(X)
model=sm.OLS(y,X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:     Temperature Change   R-squared:                       0.949
Model:                            OLS   Adj. R-squared:                  0.945
Method:                 Least Squares   F-statistic:                     252.5
Date:                Wed, 30 Jul 2025   Prob (F-statistic):           2.97e-34
Time:                        19:29:40   Log-Likelihood:                 45.098
No. Observations:                  59   AIC:                            -80.20
Df Residuals:                      54   BIC:                            -69.81
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                 -4.7980      0

In [10]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import numpy as np

clustering_data =merged_data[["Temperature Change","CO2 Concentrations"]].dropna()
scaler=StandardScaler()
scaled_data=scaler.fit_transform(clustering_data)

kmeans=KMeans(n_clusters=3,random_state=42)
clustering_data['Cluster']=kmeans.fit_predict(scaled_data)
clustering_data['Label']=clustering_data['Cluster'].map({
    0:"Moderate temp & CO2",
    1:"High temp & Low CO2",
    2:"Low temp & High CO2"
})
import plotly.express as px
fig_clusters=px.scatter(
    clustering_data,
    x="CO2 Concentrations",
    y="Temperature Change",
    color="Label",
    color_discrete_sequence=px.colors.qualitative.Set2,
    labels={"CO2 Concentration":"CO2 Concentration(ppm)","Temperature Change":"Temperature Change(°C)",
            "Label":"Climate pattern"},
    title="Clustering of Years Based on Climate Patterns",
)
fig_clusters.update_layout(
    template="plotly_white",
    legend_title="Climate Patterns"
)
fig_clusters.show()

In [12]:
from sklearn.linear_model import LinearRegression
X=merged_data[["CO2 Concentrations"]].values
y=merged_data["Temperature Change"].values
model =LinearRegression()
model.fit(X,y)
def simulate_temperature_change(co2_percentage_change):
  current_mean_co2=merged_data["CO2 Concentrations"].mean()
  new_co2=current_mean_co2*(1+co2_percentage_change/100)

  predicted_temp=model.predict([[new_co2]])
  return predicted_temp[0]
scenarios={
    "Increase CO2 by 10%": simulate_temperature_change(10),
    "Decrease CO2 by 10%": simulate_temperature_change(-10),
    "Increase CO2 by 20%": simulate_temperature_change(20),
    "Decrease CO2 by 20%": simulate_temperature_change(-20)
}
scenarios


{'Increase CO2 by 10%': np.float64(1.0866445037958163),
 'Decrease CO2 by 10%': np.float64(-0.059993041237237144),
 'Increase CO2 by 20%': np.float64(1.6599632763123422),
 'Decrease CO2 by 20%': np.float64(-0.6333118137537621)}