In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
temp = pd.read_csv("GlobalTemperatures.csv")
co2 = pd.read_csv("co2_emission.csv")


In [None]:
temp.dropna(inplace=True)
co2.dropna(inplace=True)
temp['dt'] = pd.to_datetime(temp['dt'])
co2['Year'] = co2['Year'].astype(int)


In [None]:
temp['year'] = temp['dt'].dt.year
avg_temp = temp.groupby('year')['LandAverageTemperature'].mean().reset_index()


In [None]:
co2_global = co2.groupby('Year')['Annual CO₂ emissions (tonnes )'].sum().reset_index()

In [None]:
climate = pd.merge(avg_temp, co2_global, left_on='year', right_on='Year')


In [None]:
fig, ax1 = plt.subplots(figsize=(10,6))
sns.lineplot(data=climate, x='year', y='LandAverageTemperature', ax=ax1, color='orange', label='Temperature')
ax2 = ax1.twinx()
sns.lineplot(data=climate, x='year', y='Annual CO₂ emissions (tonnes )', ax=ax2, color='green', label='CO2 Emissions')
ax1.set_title('Global Temperature vs CO₂ Emissions Over Time')
ax1.set_xlabel('Year')
plt.show()

In [None]:
corr = climate['LandAverageTemperature'].corr(climate['Annual CO₂ emissions (tonnes )'])
print("Correlation between Temperature & CO₂:", round(corr, 2))

In [None]:
plt.figure(figsize=(8,6))
sns.scatterplot(data=climate, x='Annual CO₂ emissions (tonnes )', y='LandAverageTemperature', color='red')
plt.title('CO₂ Emissions vs Global Temperature')
plt.xlabel('CO₂ Emissions (tonnes)')
plt.ylabel('Average Temperature (°C)')
plt.show()

In [None]:
co2_country = co2.groupby('Entity')['Annual CO₂ emissions (tonnes )'].sum().sort_values(ascending=False).head(10)
plt.figure(figsize=(10,5))
sns.barplot(x=co2_country.values, y=co2_country.index, palette='Reds_r')
plt.title('Top 10 Countries by Total CO₂ Emissions')
plt.xlabel('Total Emissions (tonnes)')
plt.ylabel('Country')
plt.show()

In [None]:
climate['decade'] = (climate['year'] // 10) * 10
decade_summary = climate.groupby('decade')[['LandAverageTemperature', 'Annual CO₂ emissions (tonnes )']].mean().reset_index()

plt.figure(figsize=(10,6))
sns.lineplot(x='decade', y='LandAverageTemperature', data=decade_summary, color='orange', label='Temp')
sns.lineplot(x='decade', y='Annual CO₂ emissions (tonnes )', data=decade_summary, color='green', label='CO₂')
plt.title('Average Temperature & CO₂ Emissions by Decade')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(6,4))
sns.heatmap(climate.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()


In [None]:
climate['Temp_Change'] = climate['LandAverageTemperature'].diff()
climate['CO2_Change'] = climate['Annual CO₂ emissions (tonnes )'].diff()

plt.figure(figsize=(12,5))
sns.lineplot(x='year', y='Temp_Change', data=climate, color='orange', label='Δ Temperature')
sns.lineplot(x='year', y='CO2_Change', data=climate, color='green', label='Δ CO₂')
plt.title('Yearly Change in Temperature and CO₂ Emissions')
plt.legend()
plt.show()

In [None]:
co2.columns


In [None]:
climate['Temp_Change'] = climate['LandAverageTemperature'].diff()
climate['CO2_Change'] = climate['Annual CO₂ emissions (tonnes )'].diff()

sns.scatterplot(data=climate, x='CO2_Change', y='Temp_Change', color='teal')
plt.title('Yearly Change: CO₂ vs Temperature')
plt.xlabel('Δ CO₂ Emissions')
plt.ylabel('Δ Temperature (°C)')
plt.show()

In [None]:
import numpy as np

for lag in range(1, 6):
    shifted = climate['Annual CO₂ emissions (tonnes )'].shift(lag)
    corr = climate['LandAverageTemperature'].corr(shifted)
    print(f"Correlation with CO₂ lagged by {lag} years: {round(corr, 2)}")

In [None]:
industrial = co2[co2['Entity'].isin(['China','United States','Germany','India'])]
sns.lineplot(data=industrial, x='Year', y='Annual CO₂ emissions (tonnes )', hue='Entity')
plt.title('Industrial Nations CO₂ Trends')
plt.show()

In [None]:
import plotly.express as px
fig = px.line(climate, x='Year', y='LandAverageTemperature', title='Temperature Change Over Years (Interactive)')
fig.show()

In [None]:
import statsmodels.api as sm
X = sm.add_constant(climate['Annual CO₂ emissions (tonnes )'])
y = climate['LandAverageTemperature']
model = sm.OLS(y, X).fit()
print(model.summary())


In [None]:
climate['decade'] = (climate['year'] // 10) * 10
decade = climate.groupby('decade')[['LandAverageTemperature','Annual CO₂ emissions (tonnes )']].mean().reset_index()
sns.lineplot(data=decade, x='decade', y='LandAverageTemperature', label='Avg Temp')
sns.lineplot(data=decade, x='decade', y='Annual CO₂ emissions (tonnes )', label='CO2')
plt.title('Decadal Trends of CO₂ and Temperature')
plt.legend()
plt.show()


In [None]:
selected = co2[co2['Entity'].isin(['China','India','United States','Germany'])]
sns.lineplot(data=selected, x='Year', y='Annual CO₂ emissions (tonnes )', hue='Entity')
plt.title('CO₂ Emissions by Major Industrial Countries')
plt.show()


In [None]:
lag_corrs = []
for lag in range(0,6):
    corr = climate['LandAverageTemperature'].corr(climate['Annual CO₂ emissions (tonnes )'].shift(lag))
    lag_corrs.append({'Lag': lag, 'Correlation': corr})
lag_df = pd.DataFrame(lag_corrs)
sns.barplot(data=lag_df, x='Lag', y='Correlation', palette='coolwarm')
plt.title('Correlation between CO₂ and Temperature with Lag')
plt.show()


In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
X = climate[['Annual CO₂ emissions (tonnes )']]
y = climate['LandAverageTemperature']
model.fit(X, y)
future = pd.DataFrame({'Annual CO₂ emissions (tonnes )': [1e10, 1.5e10, 2e10]})
preds = model.predict(future)
print(preds)


In [None]:
from google.colab import drive
drive.mount('/content/drive')