In [7]:
import geopandas as gpd
import pandas as pd

# Load the GeoJSON file into a GeoDataFrame
cantons_gdf = gpd.read_file('UsedData/cantons.geojson')

df_clean = pd.read_csv('Results/charger_data_geocoded.csv')

# Assuming your dataframe with longitude and latitude is named df_clean
# Convert df_clean into a GeoDataFrame
geometry = gpd.points_from_xy(df_clean.lon, df_clean.lat)
stations_gdf = gpd.GeoDataFrame(df_clean, geometry=geometry)

# Perform spatial join
joined_gdf = gpd.sjoin(stations_gdf, cantons_gdf, op="within")

# Identify records in stations_gdf that didn't join with cantons_gdf
unmatched_chargers = stations_gdf[~stations_gdf.index.isin(joined_gdf.index)]

# Add the canton abbreviation to df_clean
df_clean['canton_short'] = joined_gdf['id']
df_clean['canton'] = joined_gdf['name']
df_clean = df_clean.dropna()
# If you want to reset the index for unmatched_stations for better readability
# unmatched_stations = unmatched_stations.reset_index(drop=True)



canton_mapping = {
    'Bern/Berne': 'Bern',
    'Genève': 'Genf',
    'Valais/Wallis':'Wallis',
    'Graubünden/Grigioni':'Graubünden',
    'Ticino':'Tessin',
    'Vaud':'Waadt',
    'Neuchâtel':'Neuenburg'
    }


df_clean['canton'] = df_clean['canton'].replace(canton_mapping)


print(df_clean)
print(unmatched_chargers)

      country        lat       lon canton_short       canton
0         CHE  46.950698  7.392739           BE         Bern
1         CHE  46.950698  7.392739           BE         Bern
2         CHE  47.547299  7.592312           BS  Basel-Stadt
3         CHE  47.547299  7.592312           BS  Basel-Stadt
4         CHE  46.965888  6.854200           NE    Neuenburg
...       ...        ...       ...          ...          ...
12590     CHE  47.519055  8.718913           ZH       Zürich
12591     CHE  47.355510  8.325744           AG       Aargau
12592     CHE  47.479983  9.531401           SG   St. Gallen
12593     CHE  47.355510  8.325744           AG       Aargau
12594     CHE  47.355510  8.325744           AG       Aargau

[12476 rows x 5 columns]
      country        lat         lon                     geometry
2838      CHE  47.151997    9.511668     POINT (9.51167 47.15200)
2839      CHE  47.151992    9.511602     POINT (9.51160 47.15199)
2840      CHE  47.152038    9.511660     POI

  if await self.run_code(code, result, async_=asy):


In [8]:
import pandas as pd
df =pd.read_csv('UsedData/pop_dens.csv',delimiter=';')
df_popDens = df[['GEO_NAME', 'UNIT', 'VALUE']]
df_popDens = df_popDens[df_popDens['UNIT'] != "Einwohner/Innen"]


#result_df = pd.merge(df_clean, df_popDens, left_on='canton_standart', right_on='GEO_NAME', how='left')

#adding Freiburg, bc is was missing
df_popDens.loc[25] = ('Freiburg','Einwohner pro km²',1543)
df_popDens

Unnamed: 0,GEO_NAME,UNIT,VALUE
1,Zürich,Einwohner pro km²,951.3
3,Bern,Einwohner pro km²,180.1
5,Luzern,Einwohner pro km²,297.3
7,Uri,Einwohner pro km²,35.3
9,Schwyz,Einwohner pro km²,193.7
11,Obwalden,Einwohner pro km²,80.5
13,Nidwalden,Einwohner pro km²,184.0
15,Glarus,Einwohner pro km²,60.9
17,Zug,Einwohner pro km²,633.2
19,Freiburg,Einwohner pro km²,210.1


In [9]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

# List of canton abbreviations
cantons = {

    'Aargau': 'ag',
    'Appenzell Innerrhoden': 'ai',
    'Appenzell Ausserrhoden': 'ar',
    'Basel-Stadt': 'bs',
    'Basel-Landschaft': 'bl',
    'Bern': 'be',
    'Freiburg': 'fr',
    'Genf': 'ge',
    'Glarus': 'gl',
    'Graubünden': 'gr',
    'Jura': 'ju',
    'Luzern': 'lu',
    'Neuenburg': 'ne',
    'Nidwalden': 'nw',
    'Obwalden': 'ow',
    'Schaffhausen': 'sh',
    'Schwyz': 'sz',
    'Solothurn': 'so',
    'St. Gallen': 'sg',
    'Thurgau': 'tg',
    'Tessin': 'ti',
    'Uri': 'ur',
    'Wallis': 'vs',
    'Waadt': 'vd',
    'Zug': 'zg',
    'Zürich': 'zh'
}

#crating lists to store information, to later put into a dataframe
mean_incomes_list = []
canton_list = []
abbreviation_list = []


# Dictionary to store median incomes
median_incomes = {}

pattern = re.compile(r'CHF ([\d ]+)')

# Loop through each canton and scrape the median income
for canton, abbreviation in cantons.items():
    url = f"https://www.jobs.ch/de/lohn/kanton/?canton={abbreviation}"
    response = requests.get(url)

    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract all the <p> tags from the body
    paragraphs = soup.body.find_all('p')

    #print(paragraphs[23].get_text())

    text =paragraphs[23].get_text()

		#regex to get median income numercial value
    matches = re.findall(r"([\d ]+)", text)
    mean_income = int(matches[0].replace('\u202f', ''))
    #print(mean_income)


    # Append the values to the lists
    mean_incomes_list.append(mean_income)
    canton_list.append(canton)
    abbreviation_list.append(abbreviation.upper())

# Create a DataFrame
scrape_df = pd.DataFrame({
    'medianIncome': mean_incomes_list,
    'canton': canton_list,
    'abbreviation': abbreviation_list
})



scrape_df

Unnamed: 0,medianIncome,canton,abbreviation
0,75390,Aargau,AG
1,84571,Appenzell Innerrhoden,AI
2,71941,Appenzell Ausserrhoden,AR
3,78164,Basel-Stadt,BS
4,77037,Basel-Landschaft,BL
5,74961,Bern,BE
6,71995,Freiburg,FR
7,76981,Genf,GE
8,72836,Glarus,GL
9,70107,Graubünden,GR


In [10]:
final_df = pd.merge(df_popDens,scrape_df, left_on='GEO_NAME', right_on='canton', how='left')
popParameters_df = final_df[['canton', 'abbreviation', 'VALUE', 'medianIncome']]
popParameters_df

popParameters_df = popParameters_df.rename(columns={'VALUE':'popDens'})
popParameters_df

Unnamed: 0,canton,abbreviation,popDens,medianIncome
0,Zürich,ZH,951.3,83208
1,Bern,BE,180.1,74961
2,Luzern,LU,297.3,73627
3,Uri,UR,35.3,71500
4,Schwyz,SZ,193.7,75039
5,Obwalden,OW,80.5,72101
6,Nidwalden,NW,184.0,75985
7,Glarus,GL,60.9,72836
8,Zug,ZG,633.2,84998
9,Freiburg,FR,210.1,71995
