# A Python workbook for the analyses for the Postitive Tipping (PosTip) EDITS Fast-Track project

In [137]:
# Imports and reading

import pandas as pd
import numpy as np

import re
from sklearn.linear_model import LinearRegression
from scipy.stats import linregress
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import nbformat

import plotly.express as px
import plotly.graph_objects as go

VERSION_FOR_DATA = "v25"
VERSION_FOR_FITPARAMETERS = "v26"
VERSION_FOR_METADATA = "v25"
YEAR_PADDING_FOR_PLOTTING = 10

PATH = "/mnt/c/Users/simon.destercke/Documents/misc/iiasa/DoSI"
fn_data = f"{PATH}/adjusted_datasets_{VERSION_FOR_DATA}.csv"
fn_summary = f"""{PATH}/summary_table_{VERSION_FOR_FITPARAMETERS}.csv"""
fn_clusters = f"""{PATH}/PosTip_Clusters.csv""" # Summary file by Charlie
fn_early = f"""{PATH}/EarlyAdopterRegions_perInnovation_21March.csv""" # Early Adopting regions
fn_metadata = f"{PATH}/metadata_master_{VERSION_FOR_METADATA}.xlsx"

dosi_df = pd.read_csv(fn_data, converters={"Indicator Number": str})
dosi_df["Value"] = pd.to_numeric(dosi_df["Value"], errors="coerce")
dosi_df = dosi_df.dropna(subset=["Value"])

# Correct for trailing spaces in the data
dosi_df["Spatial Scale"] = dosi_df["Spatial Scale"].str.rstrip()
dosi_df["Innovation Name"] = dosi_df["Innovation Name"].str.rstrip()

summary_df = pd.read_csv(fn_summary, converters={"Indicator Number": str})

clusters_df = pd.read_csv(fn_clusters, skiprows=15, nrows=28, usecols=[8, 35, 36, 37, 38, 39], encoding='ISO-8859-1', header=0)
clusters_df.rename(columns={clusters_df.columns[0]: 'innovation code'}, inplace=True) # If there is an error here, then there may be a column reference error, e.g. the first column of the csv file is empty and pd.red_csv skips it
clusters_dict = {
    col: clusters_df.loc[~clusters_df[col].isna(), "innovation code"].tolist()
    for col in clusters_df.columns[1:]
}

early_df = pd.read_csv(fn_early, usecols = [0,1])
early_dict = dict(zip(early_df.iloc[:, 0], early_df.iloc[:, 1]))


In [70]:
# Metadata / codes

def convert_to_three_digit_notation(s):
    return re.sub(r"([a-zA-Z])(\d+)", lambda m: f"{m.group(1)}{int(m.group(2)):03}", s)


def read_metadata_table(fn, columns):
    df = pd.read_excel(fn, usecols=columns, dtype=str).dropna().reset_index(drop=True)
    df.iloc[:, 1] = df.iloc[:, 1].apply(convert_to_three_digit_notation)
    return df.set_index(df.columns[0])[df.columns[1]].to_dict()

metadata = dict()
metadata["Innovation Name"] = read_metadata_table(fn_metadata, "A,D")
metadata["Spatial Scale"] = read_metadata_table(fn_metadata, "G,I")
metadata["Indicator Number"] = read_metadata_table(
    fn_metadata, "L,O"
)  # Column M is the indicator name. Superfluous because maps 1-1 on indicator number
metadata["Description"] = read_metadata_table(fn_metadata, "R,S")
metadata["Metric"] = read_metadata_table(fn_metadata, "V,W")

for key, nested_dict in metadata.items():
    if isinstance(nested_dict, dict):  # Ensure the value is a dictionary
        metadata[key] = {
            k.lower() if isinstance(k, str) else k: v for k, v in nested_dict.items()
        }

In [154]:
# Attach codes to data file

dosi_df["Innovation Code"] = dosi_df["Innovation Name"].str.lower().map(metadata["Innovation Name"])
dosi_df["Region Code"] = dosi_df["Spatial Scale"].str.lower().map(metadata["Spatial Scale"])
dosi_df["Early Adopter Code"] = dosi_df["Innovation Code"].map(early_dict)
dosi_df["Indicator Code"] = dosi_df["Indicator Number"].str.lower().map(metadata["Indicator Number"])
dosi_df["Description Code"] = dosi_df["Description"].str.lower().map(metadata["Description"])
dosi_df["Metric Code"] = dosi_df["Metric"].str.lower().map(metadata["Metric"])
dosi_df["Code"] = dosi_df[['Innovation Code', 'Region Code', 'Indicator Code', 'Description Code', 'Metric Code']].agg('_'.join, axis=1)

In [145]:
def FPLogValue_with_scaling(x, t0, Dt, s):
    """
    Logistic function with vertical scaling.|
    """
    return s / (1 + np.exp(-np.log(81) * (x - t0) / Dt))

In [None]:
# Set default iterator values to experiment/test

innovation = "crs"
code = "act_net_1.1Ado_d167_m096"
cluster = "sufficiency"

In [229]:
region = early_dict[innovation] # Early adopting region

innovation_df = dosi_df[(dosi_df["Innovation Code"] == innovation) & (dosi_df["Region Code"] == region)].copy()
innovation_summary_df = summary_df[(summary_df["Code"].str.split('_').str[0] == innovation) & (summary_df["Code"].str.split('_').str[1] == region)]


In [230]:
year_min = innovation_df["Year"].min() - YEAR_PADDING_FOR_PLOTTING
year_max = innovation_df["Year"].max() + YEAR_PADDING_FOR_PLOTTING

years_for_plotting = np.linspace(year_min,year_max, 100)

# Generate a color palette using Plotly (or you can use matplotlib or another method)
colors = px.colors.qualitative.Set1  # Set1 is a predefined color palette

figures = []

In [231]:
fig = go.Figure()

for i, code in enumerate(innovation_summary_df["Code"]):
    t0 = innovation_summary_df[innovation_summary_df["Code"] == code]["log_t0"].iloc[0]
    Dt = innovation_summary_df[innovation_summary_df["Code"] == code]["log_Dt"].iloc[0]
    K = innovation_summary_df[innovation_summary_df["Code"] == code]["log_K"].iloc[0]

    # Assign color from the color cycle
    color = colors[i % len(colors)]  # Cycle through the colors if more codes than colors

    # Add the points trace (same color as line)
    fig.add_trace(
        go.Scatter(
            x=dosi_df[dosi_df["Code"] == code]["Year"],
            y=dosi_df[dosi_df["Code"] == code]["Value"] / K,
            mode='markers',
            name=f'{code} K-normalized data',  # This can be the same name to link with the line in the legend
            hovertemplate=f'{code} Point<br>x=%{{x}}<br>y=%{{y}}<extra></extra>',  # Custom tooltip
            marker=dict(size=8, color=color)  # Same color for points as the line
        )
    )

    fig.add_trace(
        go.Scatter(
            x=years_for_plotting,
            y=FPLogValue_with_scaling(years_for_plotting, t0, Dt, K) / K,
            mode='lines',
            name=code,               # Legend label
            showlegend = False,
            line=dict(color=color, width=2),
            hovertemplate=f'{code}<br>x=%{{x:.2f}}<br>y=%{{y:.2f}}<extra></extra>',  # Custom tooltip
        )
    )

    fig.update_layout(
    title='Multiple Series Curve Plot',
    xaxis_title='X Axis',
    yaxis_title='Y Axis',
    #hovermode='x unified'
    yaxis=dict(
        range=[0, 1.2]  # Set the y-axis limits to [0, 5]
    )
)

#fig.show()
fig.write_html('logs.html')


overflow encountered in exp



In [232]:
EARLY_ADOPTING_REGIONS_ONLY = True

# Now within clusters, only adoption

cluster_innovations_df = dosi_df[(dosi_df["Innovation Code"].isin(clusters_dict[cluster])) & (dosi_df["Indicator Number"] == '1.1')].copy()
cluster_innovations_summary_df = summary_df[(summary_df["Code"].str.split('_').str[0].isin(clusters_dict[cluster])) & (summary_df["Indicator Number"] == "1.1")]

if EARLY_ADOPTING_REGIONS_ONLY:
    # Only the early adopting regions?
    cluster_innovations_df = dosi_df[(dosi_df["Innovation Code"].isin(clusters_dict[cluster])) & (dosi_df["Indicator Number"] == '1.1') &
                                    (dosi_df["Region Code"] == dosi_df["Early Adopter Code"])].copy()
    cluster_innovations_summary_df = summary_df[(summary_df["Code"].str.split('_').str[0].isin(clusters_dict[cluster])) & (summary_df["Code"].str.split('_').str[1] == summary_df["Code"].str.split('_').str[0].map(early_dict)) & (summary_df["Indicator Number"] == "1.1")]

    # Only the early adopting regions and market shares?
    cluster_innovations_df = dosi_df[(dosi_df["Innovation Code"].isin(clusters_dict[cluster])) & (dosi_df["Indicator Number"] == '1.1') &
                                    (dosi_df["Region Code"] == dosi_df["Early Adopter Code"]) &
                                    (dosi_df["Metric"] == "market share")].copy()
    cluster_innovations_summary_df = summary_df[(summary_df["Code"].str.split('_').str[0].isin(clusters_dict[cluster])) & (summary_df["Code"].str.split('_').str[1] == summary_df["Code"].str.split('_').str[0].map(early_dict)) &
                                                (summary_df["Indicator Number"] == "1.1") &
                                                (summary_df["Metric"] == "market share")]


year_min = cluster_innovations_df["Year"].min() - YEAR_PADDING_FOR_PLOTTING
year_max = cluster_innovations_df["Year"].max() + YEAR_PADDING_FOR_PLOTTING

years_for_plotting = np.linspace(year_min,year_max, 100)

# Generate a color palette using Plotly (or you can use matplotlib or another method)
colors = px.colors.qualitative.Set1  # Set1 is a predefined color palette

figures = []

In [233]:
cluster_innovations_summary_df

Unnamed: 0,Code,Innovation Name,Spatial Scale,Indicator Number,Indicator Name,Description,Metric,Category,slope_log,slope_exp,...,select_non1.1s_allregions_AGREE?,FINAL AGREED,use_logfit_FIN,use_linfit_FIN,select_1.1_earegion_FIN,select_1.1_allregions_FIN,select_non1.1s_earegion_FIN,select_non1.1s_allregions_FIN,Delete from working file,Unnamed: 89
22,act_net_1.1Ado_d332_m185,active mobility,The Netherlands,1.1,Adoption over time,% trips by walking and biking,market share,b7,0.070443,0.026144,...,,,1.0,1.0,1.0,1.0,0.0,0.0,,
264,eat_ger_1.1Ado_d326_m185,eating less meat,Germany,1.1,Adoption over time,red meat as a share of meat consumption,market share,b1,-0.013897,0.000752,...,,,1.0,1.0,1.0,1.0,0.0,0.0,,
499,ebi_net_1.1Ado_d333_m185,e-bikes,The Netherlands,1.1,Adoption over time,e-bikes as a share of bikes sold,market share,b8,0.19154,0.1091,...,,,1.0,1.0,1.0,1.0,0.0,0.0,,
1031,org_den_1.1Ado_d327_m185,organic food consumption,Denmark,1.1,Adoption over time,organic as a share of retail sales,market share,b2,0.13792,0.001524,...,,,1.0,1.0,1.0,1.0,0.0,0.0,,
1263,qui_uki_1.1Ado_d348_m185,quitting smoking,UK,1.1,Adoption over Time,share of payments that are non-cash,market share,s7,-0.061392,-0.045612,...,,,1.0,1.0,1.0,1.0,0.0,0.0,,


In [234]:
fig = go.Figure()

for i, code in enumerate(cluster_innovations_summary_df["Code"]):
    t0 = cluster_innovations_summary_df[cluster_innovations_summary_df["Code"] == code]["log_t0"].iloc[0]
    Dt = cluster_innovations_summary_df[cluster_innovations_summary_df["Code"] == code]["log_Dt"].iloc[0]
    K = cluster_innovations_summary_df[cluster_innovations_summary_df["Code"] == code]["log_K"].iloc[0]

    innovation_name = cluster_innovations_summary_df[cluster_innovations_summary_df["Code"] == code]["Innovation Name"].iloc[0]
    region_name = cluster_innovations_summary_df[cluster_innovations_summary_df["Code"] == code]["Spatial Scale"].iloc[0]

    # Assign color from the color cycle
    color = colors[i % len(colors)]  # Cycle through the colors if more codes than colors

    # Add the points trace (same color as line)
    fig.add_trace(
        go.Scatter(
            x=dosi_df[dosi_df["Code"] == code]["Year"],
            y=dosi_df[dosi_df["Code"] == code]["Value"] / K,
            mode='markers',
            name=f'{innovation_name} K-normalized data ({region_name})',  # This can be the same name to link with the line in the legend
            hovertemplate=f'{code} Point<br>x=%{{x}}<br>y=%{{y}}<extra></extra>',  # Custom tooltip
            marker=dict(size=8, color=color)  # Same color for points as the line
        )
    )

    fig.add_trace(
        go.Scatter(
            x=years_for_plotting,
            y=FPLogValue_with_scaling(years_for_plotting, t0, Dt, K) / K,
            mode='lines',
            name=code,               # Legend label
            showlegend = False,
            line=dict(color=color, width=2),
            hovertemplate=f'{code}<br>x=%{{x:.2f}}<br>y=%{{y:.2f}}<extra></extra>',  # Custom tooltip
        )
    )

    fig.update_layout(
    title="Cluster " + cluster + (" Early Adopting Regions Only" if EARLY_ADOPTING_REGIONS_ONLY else ""),
    xaxis_title='X Axis',
    yaxis_title='Y Axis',
    #hovermode='x unified'
    yaxis=dict(
        range=[0, 1.2]  # Set the y-axis limits to [0, 5]
    )
)

#fig.show()
fig.write_html('cluster.html')

In [207]:
# Prepare to save to a PDF using matplotlib's PdfPages
output_pdf_path = "output_graphs.pdf"
with PdfPages(output_pdf_path) as pdf:
    for i, fig in enumerate(figures):
        # Save the Plotly figure as a PNG image
        image_path = f"temp_image_{i}.png"
        fig.write_image(image_path)  # Save the current figure as a PNG file
        
        # Read the image and add it to the PDF
        img = plt.imread(image_path)  # Use Matplotlib to read the image
        fig, ax = plt.subplots(figsize=(8, 6))  # Set figure size
        ax.imshow(img)
        ax.axis('off')  # Turn off axis
        
        # Save the image to a new page in the PDF
        pdf.savefig(fig)  # Save current figure to PDF
        plt.close(fig)  # Close the matplotlib figure to free memory
        
        # Optional: Delete the temporary PNG file
        os.remove(image_path)

print(f"PDF saved to {output_pdf_path}")

PDF saved to output_graphs.pdf
