In [1]:
import pandas as pd

  from pandas.core import (


In [6]:
temperature_data=pd.read_csv("temperature.csv")
co2_data=pd.read_csv("carbon_emmission.csv")

In [8]:
temperature_data_preview=temperature_data.head()
co2_data_preview=co2_data.head()

In [9]:
temperature_data_preview,co2_data_preview

(   ObjectId                       Country ISO2 ISO3  F1961  F1962  F1963  \
 0         1  Afghanistan, Islamic Rep. of   AF  AFG -0.113 -0.164  0.847   
 1         2                       Albania   AL  ALB  0.627  0.326  0.075   
 2         3                       Algeria   DZ  DZA  0.164  0.114  0.077   
 3         4                American Samoa   AS  ASM  0.079 -0.042  0.169   
 4         5      Andorra, Principality of   AD  AND  0.736  0.112 -0.752   
 
    F1964  F1965  F1966  ...  F2013  F2014  F2015  F2016  F2017  F2018  F2019  \
 0 -0.764 -0.244  0.226  ...  1.281  0.456  1.093  1.555  1.540  1.544  0.910   
 1 -0.166 -0.388  0.559  ...  1.333  1.198  1.569  1.464  1.121  2.028  1.675   
 2  0.250 -0.100  0.433  ...  1.192  1.690  1.121  1.757  1.512  1.210  1.115   
 3 -0.140 -0.562  0.181  ...  1.257  1.170  1.009  1.539  1.435  1.189  1.539   
 4  0.308 -0.490  0.415  ...  0.831  1.946  1.690  1.990  1.925  1.919  1.964   
 
    F2020  F2021  F2022  
 0  0.498  1.327  2.01

In [14]:
# selecting and computing statistics for temperature changes
temperature_values=temperature_data.filter(regex='^F').stack()
temperature_stats={
    "Mean":temperature_values.mean(),
    "Median":temperature_values.median(),
    "Variance":temperature_values.var(),
}
# computing statistics for CO2 concentrations
co2_values=co2_data["Value"]
co2_stats={
    "Mean":co2_values.mean(),
    "Median":co2_values.median(),
    "Variance":co2_values.var(),
}

In [15]:
temperature_stats,co2_stats

({'Mean': 0.5377713483146068, 'Median': 0.47, 'Variance': 0.4294524831504413},
 {'Mean': 180.71615286624203, 'Median': 313.835, 'Variance': 32600.002004693})

In [23]:
import plotly.graph_objects as go
import plotly.express as px

# extracting time-series data for plotting
# temperature: averaging across countries for each year

temperature_years=temperature_data.filter(regex='^F').mean(axis=0)
temperature_years.index=temperature_years.index.str.replace("F",'').astype(int)

# CO2: parsing year and averaging monthly data
co2_data["Year"]=co2_data['Date'].str[:4].astype(int)
co2_yearly=co2_data.groupby("Year")["Value"].mean()

# time-series plot for temperature and CO2 levels
fig=go.Figure()
fig.add_trace(go.Scatter(
    x=temperature_years.index,y=temperature_years.values,
    mode="lines+markers",name="Temperature Change(°C)"
))

fig.add_trace(go.Scatter(
    x=co2_yearly.index,y=co2_yearly.values,
    mode='lines+markers',name="CO₂ Concentration (ppm)",line=dict(dash='dash')
))

fig.update_layout(
    title="Time-series of Temperature Change and CO₂ Concentrations",
    xaxis_title="Year",
    yaxis_title="Values",
    template="plotly_dark",
    legend_title="Metrics",
)
fig.show()

In [27]:
# correlation heatmap
merged_data=pd.DataFrame({
    "Temperature Change": temperature_years,
    "CO₂ Concentration": co2_yearly
}).dropna()

heatmap_fig=px.imshow(
    merged_data.corr(),
    text_auto=".2f",
    color_continuous_scale="RdBu",
    title="Corelation heatmap"
)
heatmap_fig.update_layout(
    template="plotly_dark"
)

heatmap_fig.show()

In [31]:
scatter_fig=px.scatter(
    merged_data,
    x="CO₂ Concentration",y="Temperature Change",
    labels={"CO₂ Concentration":"CO₂ Concentration(ppm)","Temperature Change":"Temperature Change (°C)"},
    title="Temperature Change vs CO₂ Concentration",
    template="plotly_dark"
    )
scatter_fig.update_traces(marker=dict(size=10,opacity=0.8))
scatter_fig.show()

In [34]:
from scipy.stats import linregress

temp_trend=linregress(temperature_years.index,temperature_years.values)
temp_trend_line=temp_trend.slope*temperature_years.index + temp_trend.intercept

co2_trend=linregress(co2_yearly.index,co2_yearly.values)
co2_trend_line=co2_trend.slope*co2_yearly.index+co2_trend.intercept

fig_trends=go.Figure()
fig_trends.add_trace(go.Scatter(
    x=temperature_years.index,y=temperature_years.values,
    mode="lines+markers",name="Temperature Change(°C)"
))

fig_trends.add_trace(go.Scatter(
    x=temperature_years.index,y=temp_trend_line,
    mode="lines",name=f"Temperature Trend(Slope: {temp_trend.slope:.2f})", line=dict(dash='dash')
))
fig_trends.add_trace(go.Scatter(
    x=co2_yearly.index, y=co2_yearly.values,
    mode='lines+markers', name="CO₂ Concentration (ppm)"
))
fig_trends.add_trace(go.Scatter(
    x=co2_yearly.index, y=co2_trend_line,
    mode='lines', name=f"CO₂ Trend (Slope: {co2_trend.slope:.2f})", line=dict(dash='dash')
))

fig_trends.update_layout(
    title="Trends in Temperature Change and CO₂ Concentrations",
    xaxis_title="Year",
    yaxis_title="Values",
    template="plotly_dark",
    legend_title="Metrics"
)
fig_trends.show()

In [39]:
# seasonal variations in CO2 concentrations
co2_data['Month'] = co2_data['Date'].str[-2:].astype(int)
co2_monthly = co2_data.groupby('Month')['Value'].mean()

fig_seasonal = px.line(
    co2_monthly,
    x=co2_monthly.index,
    y=co2_monthly.values,
    labels={"x": "Month", "y": "CO₂ Concentration (ppm)"},
    title="Seasonal Variations in CO₂ Concentrations",
    markers=True,
)

fig_seasonal.update_layout(
    template="plotly_dark",
     xaxis=dict(tickvals=list(range(1, 13))),
)
fig_seasonal.show()

In [48]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import numpy as np

# preparing the data for clustering
clustering_data = merged_data[["Temperature Change", "CO₂ Concentration"]].dropna()

scaler = StandardScaler()
scaled_data = scaler.fit_transform(clustering_data)

# applying K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42)  # assuming 3 clusters for simplicity
clustering_data['Cluster'] = kmeans.fit_predict(scaled_data)

# adding labels for periods with similar climate patterns
clustering_data['Label'] = clustering_data['Cluster'].map({
    0: 'Moderate Temp & CO₂',
    1: 'High Temp & CO₂',
    2: 'Low Temp & CO₂'
})

import plotly.express as px

fig_clusters = px.scatter(
    clustering_data,
    x="CO₂ Concentration",
    y="Temperature Change",
    color="Label",
    color_discrete_sequence=px.colors.qualitative.Set2,
    labels={
        "CO₂ Concentration": "CO₂ Concentration (ppm)",
        "Temperature Change": "Temperature Change (°C)",
       
    },
    title="Clustering of Years Based on Climate Patterns"
)

fig_clusters.update_layout(
    template="plotly_dark",
    legend_title="Climate Pattern"
)

fig_clusters.show()




KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.





In [64]:
from sklearn.linear_model import LinearRegression

# Preparing data
X = merged_data[["CO₂ Concentration"]].values  # CO₂ concentration as input
y = merged_data["Temperature Change"].values   # temperature change as target

model = LinearRegression()
model.fit(X, y)

# function to simulate "what-if" scenarios
def simulate_temperature_change(co2_percentage_change):
    # Calculate new CO2 concentrations
    current_mean_co2 = merged_data["CO₂ Concentration"].mean()
    new_co2 = current_mean_co2 * (1 + co2_percentage_change / 100)

    # predict temperature change
    predicted_temp = model.predict([[new_co2]])
    return predicted_temp[0]

# simulating scenarios
scenarios = {
    "Increase CO₂ by 10%": simulate_temperature_change(10),
    "Decrease CO₂ by 10%": simulate_temperature_change(-10),
    "Increase CO₂ by 20%": simulate_temperature_change(20),
    "Decrease CO₂ by 20%": simulate_temperature_change(-20),
}
scenarios

{'Increase CO₂ by 10%': 1.0866445037958163,
 'Decrease CO₂ by 10%': -0.059993041237237144,
 'Increase CO₂ by 20%': 1.6599632763123422,
 'Decrease CO₂ by 20%': -0.6333118137537621}