In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from shapely import wkt 
import geopandas as gpd
import folium
from branca.colormap import LinearColormap

In [2]:
year = 2017
NUTS_Level = 2
OPENAI_CHAT_MODEL = "solidrust/Codestral-22B-v0.1-hf-AWQ"
model = OPENAI_CHAT_MODEL
model_shortname = model.split("/")[1].split("-")[0]


df = pd.read_csv(f'./output/gdp_{year}_nuts_{NUTS_Level}_llm_{model_shortname}.csv')

In [3]:
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,NUTS_ID,LEVL_CODE,CNTR_CODE,NAME_LATN,NUTS_NAME,MOUNT_TYPE,URBN_TYPE,COAST_TYPE,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2017_predicted
0,0,0,AT112,3,AT,Nordburgenland,Nordburgenland,,,,...,29600.0,30600.0,31500.0,32400.0,33300.0,34200.0,33100.0,34800.0,,35000
1,1,7,AT113,3,AT,Südburgenland,Südburgenland,,,,...,24100.0,24900.0,25700.0,27000.0,27700.0,28400.0,27000.0,28700.0,,27000
2,2,14,AL011,3,AL,Dibër,Dibër,,,,...,2400.0,2600.0,2800.0,3000.0,3400.0,3500.0,3400.0,,,11
3,3,21,AL012,3,AL,Durrës,Durrës,,,,...,3300.0,3600.0,3700.0,4100.0,4400.0,4800.0,4500.0,,,12
4,4,28,AL013,3,AL,Kukës,Kukës,,,,...,2300.0,2300.0,2300.0,2500.0,2800.0,2900.0,2900.0,,,25000


In [4]:
try :
    df['geometry'] = df['geometry'].apply(wkt.loads)
except:
    print("geometry loading wkt: already done")
gdf = gpd.GeoDataFrame(df, geometry='geometry')
if gdf.crs is None:
    gdf = gdf.set_crs(epsg=3035)  # Ajustez si nécessaire (EPSG initial probable)
gdf = gdf.to_crs(epsg=4326)

gdf = gdf[gdf["LEVL_CODE"] == NUTS_Level]

gdf['diff_eurostat_llm'] =  gdf['2017_predicted'] - gdf['2017']
gdf_json = gdf.to_crs(epsg=4326).to_json()

In [5]:
min_diff = gdf['diff_eurostat_llm'].min()
max_diff = gdf['diff_eurostat_llm'].max()
max_abs_diff = max(abs(min_diff), abs(max_diff))

colormap = LinearColormap(
    colors=['blue', 'white', 'red'],
    vmin=-max_abs_diff,
    vmax=max_abs_diff,
    caption=f'Difference between eurostat {year} and {model_shortname}'
)


In [12]:
m = folium.Map(location=[50, 20], zoom_start=5)

def style_function(feature):
    value = feature['properties']['diff_eurostat_llm']
    return {
        'fillColor': colormap(value),
        'color': 'black',
        'weight': 0.5,
        'fillOpacity': 0.7,
        'lineOpacity': 0.2
    }

folium.Choropleth(
    geo_data=gdf_json,
    data=gdf,  
    columns=['NUTS_ID', 'diff_eurostat_llm'], 
    key_on='feature.properties.NUTS_ID',  
    fill_color='RdBu',  
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name=f'Difference between eurostat {year} and {model_shortname}'
).add_to(m)

style_function = lambda x: {'fillColor': '#ffffff', 
                            'color':'#000000', 
                            'fillOpacity': 0.1, 
                            'weight': 0.1}
highlight_function = lambda x: {'fillColor': '#000000', 
                                'color':'#000000', 
                                'fillOpacity': 0.50, 
                                'weight': 0.1}

hover = folium.features.GeoJson(
    gdf,
    style_function=style_function, 
    control=False,
    highlight_function=highlight_function, 
    tooltip=folium.features.GeoJsonTooltip(
        fields=['NUTS_NAME',"2017", "2017_predicted", "diff_eurostat_llm"],
        aliases=["Region: ", "Eurostat GDP: ", "LLM predicted: ", "diff: "],
        style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
    )
)
m.add_child(hover)
m.keep_in_front(m)


m.save("./output/carte_diff_2017_2016.html")

In [11]:
gdf[["2017", "2017_predicted", "diff_eurostat_llm"]]

Unnamed: 0,2017,2017_predicted,diff_eurostat_llm
1270,32700.0,26500,6200.0
1271,29800.0,35000,-5200.0
1272,37500.0,2,37498.0
1273,33600.0,35000,-1400.0
1274,36900.0,28500,8400.0
...,...,...,...
1583,6700.0,45000,-38300.0
1584,6100.0,83,6017.0
1585,6300.0,25000,-18700.0
1586,6300.0,1,6299.0
