# A Python workbook for the analyses for the Postitive Tipping (PosTip) EDITS Fast-Track project

In [1]:
# Imports and reading

import pandas as pd
import numpy as np

import re
from sklearn.linear_model import LinearRegression
from scipy.stats import linregress
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import nbformat

import plotly.express as px

VERSION_FOR_DATA = "v25"

PATH = "/mnt/c/Users/simon.destercke/Documents/misc/iiasa/DoSI"
fn_data = f"{PATH}/adjusted_datasets_{VERSION_FOR_DATA}.csv"

In [2]:
dosi_df = pd.read_csv(fn_data, converters={"Indicator Number": str})
dosi_df["Value"] = pd.to_numeric(dosi_df["Value"], errors="coerce")
dosi_df = dosi_df.dropna(subset=["Value"])

# Correct for trailing spaces in the data
dosi_df["Spatial Scale"] = dosi_df["Spatial Scale"].str.rstrip()
dosi_df["Innovation Name"] = dosi_df["Innovation Name"].str.rstrip()

In [3]:
dosi_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 53860 entries, 0 to 54011
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Year              53860 non-null  int64  
 1   Value             53860 non-null  float64
 2   Innovation Name   53860 non-null  object 
 3   Indicator Number  53860 non-null  object 
 4   Indicator Name    53860 non-null  object 
 5   Description       53860 non-null  object 
 6   Metric            53860 non-null  object 
 7   Data Source       53447 non-null  object 
 8   Comments          44297 non-null  object 
 9   Spatial Scale     53860 non-null  object 
 10  File              53860 non-null  object 
 11  Sheet             53860 non-null  object 
dtypes: float64(1), int64(1), object(10)
memory usage: 5.3+ MB


In [4]:
grouping_columns = ["Innovation Name","Description", "Metric", "Spatial Scale"]
adoptions_df = dosi_df[dosi_df["Indicator Number"]=="1.1"][["Year", "Value", *grouping_columns]]

In [5]:
adoptions_df["combined_name"] = adoptions_df[grouping_columns].astype(str).agg('-'.join, axis=1)

In [6]:
adoptions_pivot = adoptions_df.pivot(index='Year', columns='combined_name', values='Value')

In [7]:
dupes = adoptions_df[adoptions_df.duplicated(subset=['Year', 'combined_name'], keep=False)]

In [8]:
print(len(adoptions_df["combined_name"].unique()))

359


In [9]:
correlation_matrix = adoptions_pivot.corr()

In [10]:
# Estimate figure size: 360 x 360 is big, so use a large DPI
figsize_per_cell = 0.25  # You can increase this if it's still too tight
size = int(figsize_per_cell * correlation_matrix.shape[0])

plt.figure(figsize=(size, size))
sns.heatmap(correlation_matrix, cmap='coolwarm', center=0, square=True, xticklabels=True, yticklabels=True)

# Rotate axis labels for readability
plt.xticks(rotation=90, fontsize=4)
plt.yticks(rotation=0, fontsize=4)
plt.tight_layout()

# Save to PDF
plt.savefig("time_series_correlation_heatmap.pdf", format='pdf', dpi=300)
plt.close()

In [13]:
# Flatten correlation matrix to long format
corr_long = correlation_matrix.reset_index().melt(id_vars='combined_name',
                                                  var_name='compared_to',
                                                  value_name='correlation')

# Generate interactive heatmap
fig = px.imshow(
    correlation_matrix.values,
    x=correlation_matrix.columns,
    y=correlation_matrix.index,
    color_continuous_scale='RdBu',
    zmin=-1,
    zmax=1,
    labels={'color': 'Correlation'},
    text_auto='.2f'
)

# Add hover info
fig.update_traces(
    hovertemplate="<b>%{y}</b> vs <b>%{x}</b><br>Correlation: %{z:.2f}<extra></extra>"
)

# Improve layout for large matrix
fig.update_layout(
    width=6000,
    height=6000,
    title="Interactive Time Series Correlation Heatmap",
    xaxis_tickangle=90
)

# Show in browser or save to HTML
fig.write_html("interactive_correlation_heatmap.html")
# fig.show()

In [None]:
# Now just look for each innovation

grouping_columns = ["Indicator Name","Description", "Metric"]

pdf_postip_within_innovation = PdfPages(f"{PATH}/correlations_within_innovation.pdf")
for innovation in dosi_df["Innovation Name"].unique():
    for region in dosi_df[dosi_df["Innovation Name"] == innovation]["Spatial Scale"].unique():
        innovation_df = dosi_df[(dosi_df["Innovation Name"]==innovation) & (dosi_df["Spatial Scale"]==region)][["Year", "Value", *grouping_columns]]
        innovation_df["combined_name"] = innovation_df[grouping_columns].astype(str).agg('-'.join, axis=1)
        title = innovation + " in " + region
        print(title)
        number_of_indicators = len(innovation_df["combined_name"].unique())
        if number_of_indicators > 1:
            innovation_pivot = innovation_df.pivot(index='Year', columns='combined_name', values='Value')
            correlation_matrix = innovation_pivot.corr()

            plt.figure(figsize=(4, 4))
            sns.heatmap(correlation_matrix, cmap='coolwarm', center=0, square=True, xticklabels=True, yticklabels=True)

            # Rotate axis labels for readability
            plt.xticks(rotation=90, fontsize=4)
            plt.yticks(rotation=0, fontsize=4)
            plt.title(innovation)
            plt.tight_layout()

            # Save to PDF
            pdf_postip_within_innovation.savefig()
            plt.close()
        
pdf_postip_within_innovation.close()
    

active mobility in Amsterdam
active mobility in Beijing
active mobility in China
active mobility in Copenhagen
active mobility in Denmark
active mobility in Global
active mobility in The Netherlands
climate protest in Bangladesh
climate protest in Germany
climate protest in Global
climate protest in India
climate protest in Sweden
climate protest in UK
climate protest in US
co-housing in Denmark
co-housing in Germany
co-housing in Global
co-housing in Switzerland
co-housing in US
co-housing in Canton de Vaud (Switzerland)
car ownership in Berlin
car ownership in Global
car ownership in Hamburg
car ownership in Heidelberg
car sharing in Germany
car sharing in Global
mobesity in France
mobesity in Germany
mobesity in Global
digital skills in Denmark
digital skills in Global
digital skills in Italy
digital skills in Norway
digital skills in Poland
digital skills in Portugal
digital skills in Sweden
downsizing in Global
downsizing in Switzerland
drivers licence in Global
drivers licence in