<a href="https://colab.research.google.com/github/rcrudi/M2MDT-CO2-and-the-Oceans/blob/main/Capstone1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Project to analyse and co-relate carbon emissions, sea level and sea temperature.

The idea is to correlate these factors and talk about the consequences in the presentation.

In [1]:
# Import necessary libraries
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, LinearAxis, Range1d, HoverTool
from bokeh.io import output_notebook, show
from bokeh.palettes import Category10
from bokeh.layouts import gridplot
import numpy as np

# --- SECTION 1: DATA LOADING AND PREPARATION ---
# Define columns to be imported from the CSV files
cols_ocean = ['Year', 'Glob']
cols_co2 = ['year', 'country', 'co2']

# Load and clean the ocean temperature DataFrame
df_ocean = pd.read_csv('Oceans.csv', usecols=cols_ocean)
df_ocean = df_ocean.dropna()

# Load and clean the global CO2 emissions DataFrame
df_co2 = pd.read_csv('Co2.csv', usecols=cols_co2)
df_co2 = df_co2.dropna()
df_co2 = df_co2[df_co2['country'] == 'World'].drop(columns=['country'])

# Load the new CSV file for sea level
df_gmsl = pd.read_csv('GMSL.csv')

# Get only the "Year" itself from column, no decimals.
df_gmsl['Year'] = df_gmsl['Year'].apply(lambda x: int(x))

# Group by year and get the mean GMSL.
df_gmsl_anual = df_gmsl.groupby('Year')['GMSL'].mean().reset_index()

# Merge all dataframes into a single 'df'
df = pd.merge(df_ocean, df_co2, left_on='Year', right_on='year')
df = df.drop(columns=['year'])
df = pd.merge(df, df_gmsl_anual, on='Year')

# --- SECTION 2: PLOT CREATION ---
# Prepare the DataFrame for Bokeh
source = ColumnDataSource(df)

# Plot 1A: Scatter Plot - Temperature vs. CO₂ (Independent Scale)
p1a = figure(
    title='Correlation: Temp. Anomaly vs. CO₂ Emissions',
    height=450,
    width=400,
    x_axis_label='CO₂ Emissions',
    y_axis_label='Temperature Anomaly (Celsius)',
    tools="pan,box_zoom,reset,save"
)

# Plot the scatter points
p1a.scatter(x='co2', y='Glob', source=source, size=8, color=Category10[3][1], alpha=0.6)

# Add the trend line for Temp vs. CO₂
(m_co2, b_co2) = np.polyfit(df['co2'], df['Glob'], 1)
x_fit_co2 = np.array([df['co2'].min(), df['co2'].max()])
y_fit_co2 = m_co2 * x_fit_co2 + b_co2
p1a.line(x_fit_co2, y_fit_co2, line_color="red", line_width=3, line_dash="dashed")

# Plot 1B: Scatter Plot - Temperature vs. Sea Level (Independent Scale)
p1b = figure(
    title='Correlation: Temp. Anomaly vs. Sea Level',
    height=450,
    width=400,
    x_axis_label='Sea Level (mm)',
    y_axis_label='Temperature Anomaly (Celsius)',
    tools="pan,box_zoom,reset,save"
)
p1b.scatter(x='GMSL', y='Glob', source=source, size=8, color=Category10[3][2], alpha=0.6)

# Add the trend line for Temp vs. Sea Level
(m_gmsl, b_gmsl) = np.polyfit(df['GMSL'], df['Glob'], 1)
x_fit_gmsl = np.array([df['GMSL'].min(), df['GMSL'].max()])
y_fit_gmsl = m_gmsl * x_fit_gmsl + b_gmsl
p1b.line(x_fit_gmsl, y_fit_gmsl, line_color="red", line_width=3, line_dash="dashed")

# --- Add HoverTool to both p1 scatter plots ---
hover1 = HoverTool(tooltips=[
    ("Year", "@Year"),
    ("Temp.", "@Glob{0.00}"),
    ("CO₂", "@co2{0,0}"),
    ("Sea Level", "@GMSL{0.0}")
])

p1a.add_tools(hover1)
p1b.add_tools(hover1)

# Combine the two scatter plots in a grid
p1 = gridplot([[p1a, p1b]])

# Plot 2: Normalized Trends (All three variables)
#--------------------------------------
# Normalize the data to a 0-1 range
df['Glob_norm'] = (df['Glob'] - df['Glob'].min()) / (df['Glob'].max() - df['Glob'].min())
df['co2_norm'] = (df['co2'] - df['co2'].min()) / (df['co2'].max() - df['co2'].min())
df['GMSL_norm'] = (df['GMSL'] - df['GMSL'].min()) / (df['GMSL'].max() - df['GMSL'].min())

# Update Source with the normalized data
source = ColumnDataSource(df)

p2 = figure(
    title='Normalized Trends: All Three Variables',
    height=450,
    width=800,
    x_axis_label='Year',
    y_axis_label='Normalized Value (0 to 1)',
    tools="pan,box_zoom,reset,save"
)

# Plot the normalized lines, the renders are required so HoverTool will show a single tooltip, instead of 3 (one on each line)
temp_line = p2.line(x='Year', y='Glob_norm', source=source, legend_label='Normalized Temp.', line_width=3, color=Category10[3][0])
co2_line = p2.line(x='Year', y='co2_norm', source=source, legend_label='Normalized CO₂', line_width=3, color=Category10[3][1])
gmsl_line = p2.line(x='Year', y='GMSL_norm', source=source, legend_label='Normalized Sea Level', line_width=3, color=Category10[3][2])


# Add HoverTool
hover2 = HoverTool(tooltips=[("Year", "@Year"), ("Temp.", "@Glob{0.00}"), ("CO₂", "@co2{0,0}"), ("Sea Level", "@GMSL{0.0}")], mode='mouse', renderers=[temp_line, co2_line, gmsl_line])

p2.add_tools(hover2)

# Move legend to the top left
p2.legend.location = "top_left"

# Plot 3: Three Separate Linked Plots
#----------------------------------------------------
p3_temp = figure(
    title='Global Temperature Anomaly',
    height=300,
    width=800,
    x_axis_label='Year',
    y_axis_label='Temperature Anomaly (°C)',
    tools="pan,box_zoom,reset,save"
)
p3_temp.line(x='Year', y='Glob', source=source, line_width=3, color=Category10[3][0], legend_label='Temperature')
p3_temp.legend.location = "top_left"

p3_co2 = figure(
    title='Global CO₂ Emissions',
    height=300,
    width=800,
    x_axis_label='Year',
    y_axis_label='CO₂ Emissions',
    x_range=p3_temp.x_range,  # Links the X-axis
    tools="pan,box_zoom,reset,save"
)
p3_co2.line(x='Year', y='co2', source=source, line_width=3, color=Category10[3][1], legend_label='CO₂ Emissions')
p3_co2.legend.location = "top_left"

p3_gmsl = figure(
    title='Global Mean Sea Level',
    height=300,
    width=800,
    x_axis_label='Year',
    y_axis_label='Sea Level (mm)',
    x_range=p3_temp.x_range,  # Links the X-axis
    tools="pan,box_zoom,reset,save"
)
p3_gmsl.line(x='Year', y='GMSL', source=source, line_width=3, color=Category10[3][2], legend_label='Sea Level')
p3_gmsl.legend.location = "top_left"

# Add HoverTool to all three plots
hover3 = HoverTool(tooltips=[("Year", "@Year"), ("Temp.", "@Glob"), ("CO₂", "@co2"), ("Sea Level", "@GMSL")])
p3_temp.add_tools(hover3)
p3_co2.add_tools(hover3)
p3_gmsl.add_tools(hover3)

# --- SECTION 3: FINAL DISPLAY ---
# Combine and display the plots
grid = gridplot([[p1], [p2], [p3_temp], [p3_co2], [p3_gmsl]])
output_notebook()
show(p1)

In [2]:
show(p2)

In [3]:
show(p3_temp)

In [4]:
show(p3_co2)

In [5]:
show(p3_gmsl)