In [None]:
from pathlib import Path

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
from IPython.display import display
from shapely.geometry import Point
from sklearn.preprocessing import MinMaxScaler

Personal Vehicle Data Source: https://www.insee.fr/fr/statistiques/2012694#graphique-TCRD_001_tab1_regions2016

In [None]:
personal_vehicles_file = Path("../data/TCRD_001.xlsx")
excel_file = Path("../data/base-ic-evol-struct-pop-2019.xlsx")
domicile_file = Path("../data/ac_ina_125.xlsx")

In [None]:
# Personal Vehicles Info
personal_vehicles = pd.read_excel(
    personal_vehicles_file, sheet_name=0, skiprows=3
)
personal_vehicles = personal_vehicles.rename(columns={"Unnamed: 0": "DEP"})
dep_to_car_rate_mapping = dict(
    zip(
        personal_vehicles["DEP"],
        personal_vehicles["Taux d’équipement des ménages"],
    )
)

# Share of elderly people using at-home services
domicile_rates = pd.read_excel(domicile_file, sheet_name=2, skiprows=2)
dep_to_domicile_rates_mapping = dict(
    zip(
        domicile_rates["Département"],
        domicile_rates[
            "Part des personnes âgées dépendantes vivant en ménage en 2015 (%)"
        ],
    )
)

statistics = pd.read_excel(excel_file, sheet_name=0, skiprows=5)
com_to_reg_mapping = dict(zip(statistics["COM"], statistics["DEP"]))

In [None]:
df = pd.read_csv("../data/df_question_1.csv")
df.drop(["geometry", "nom", "code"], axis=1, inplace=True)
df["COM"] = df["COM"].astype(str)
df["DEP"] = df["COM"].map(com_to_reg_mapping)
df["personal_vehicle_rate"] = df["DEP"].map(dep_to_car_rate_mapping) / 100
df["at_home_rate"] = df["DEP"].map(dep_to_domicile_rates_mapping) / 100
df.head(5)

## Question 2: Based on web/public data concerning local players, estimate the competitive intensity (number of competitors, estimated turnover & number of employees, etc.)ineachofthemaincity.

### Availability of personal cars

In [None]:
geojson_file = Path("../data/communes.geojson")
gdf = gpd.read_file(geojson_file)
gdf = gdf.merge(df, left_on="code", right_on="COM")

In [None]:
# gdf['log_count_soins_a_domicile'] = np.log(gdf['count_soins_a_domicile'] + 1)
gdf.plot(
    column="personal_vehicle_rate", cmap="Greens", legend=True, figsize=(10, 5)
)
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()

### Competitve Analysis: Indirect Competitors - Infirmier and Hébergement des personnes âgées

In [None]:
gdf["log_Infirmier (en nombre) 2021"] = np.log(
    gdf["Infirmier (en nombre) 2021"] + 1
)
gdf.plot(
    column="log_Infirmier (en nombre) 2021",
    cmap="cividis",
    legend=True,
    figsize=(10, 5),
)
plt.show()

In [None]:
gdf["log_Hébergement des personnes âgées (en nombre) 2021"] = np.log(
    gdf["Hébergement des personnes âgées (en nombre) 2021"] + 1
)
gdf.plot(
    column="log_Hébergement des personnes âgées (en nombre) 2021",
    cmap="cividis",
    legend=True,
    figsize=(10, 5),
)
plt.show()

### Home Care Services: Competitve Analysis

Data source: https://www.data.gouv.fr/fr/datasets/services-daide-et-de-soins-a-domicile/

In [None]:
soins_a_domicile_file = Path("../data/base-soins-a-domicile.xlsx")
soins_a_domicile = pd.read_excel(
    soins_a_domicile_file, sheet_name=0, skiprows=0
)
soins_a_domicile.head(3)

In [None]:
soins_a_domicile = soins_a_domicile.dropna(
    subset=["coordinates.latitude", "coordinates.longitude"]
)
m = folium.Map(location=[48.9131, 5.4426], zoom_start=7)
marker_cluster = MarkerCluster().add_to(m)

for index, row in soins_a_domicile.iterrows():
    latitude = row["coordinates.latitude"]
    longitude = row["coordinates.longitude"]

    folium.Marker(
        location=[latitude, longitude], popup=row["title"], icon=None
    ).add_to(marker_cluster)

display(m)

In [None]:
geojson_file = Path("../data/communes.geojson")
gdf = gpd.read_file(geojson_file)
gdf = gdf[gdf["code"].isin(df["COM"].astype(str))]

In [None]:
soins_a_domicile_geometry = [
    Point(xy)
    for xy in zip(
        soins_a_domicile["coordinates.longitude"],
        soins_a_domicile["coordinates.latitude"],
    )
]
soins_a_domicile_gdf = gpd.GeoDataFrame(
    soins_a_domicile, geometry=soins_a_domicile_geometry
)

# Perform spatial join
result = gpd.sjoin(gdf, soins_a_domicile_gdf, how="left", op="contains")
count_per_location = (
    result.groupby("code").size().reset_index(name="count_soins_a_domicile")
)
gdf = gdf.merge(count_per_location, on="code", how="left")
gdf["count_soins_a_domicile"] = (
    gdf["count_soins_a_domicile"].fillna(0).astype(int)
)

In [None]:
code_to_house_service_mapping = dict(
    zip(gdf["code"], gdf["count_soins_a_domicile"])
)
df["house_services"] = df["COM"].map(code_to_house_service_mapping)

In [None]:
gdf["log_count_soins_a_domicile"] = np.log(gdf["count_soins_a_domicile"] + 1)
gdf.plot(
    column="log_count_soins_a_domicile",
    cmap="cividis",
    legend=True,
    figsize=(10, 5),
)
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()

## Question 3: Based on web/public data concerning local players, estimate the competitive intensity (number of competitors, estimated turnover & number of employees, etc.)ineachofthemaincity.

In [None]:
AVG_PRICE = 300
AVG_SALARY = 2000

AVG_WORKFORCE_PER_CLIENT = 0.2

NURSES_WEIGHT = 0.05
RESIDENCE_WEIGHT = 0.5

PROB_SERVICE_USAGE = 0.3

In [None]:
scaler = MinMaxScaler()
df["solvency_norm"] = scaler.fit_transform(df[["solvency"]])
df["density_norm"] = scaler.fit_transform(df[["density"]])

df["competitors"] = (
    df["house_services"]
    + NURSES_WEIGHT * df["Infirmier (en nombre) 2021"]
    + RESIDENCE_WEIGHT * df["Hébergement des personnes âgées (en nombre) 2021"]
)
df["clients"] = (
    df["target_population"]
    * PROB_SERVICE_USAGE
    * df["at_home_rate"]
    / df["competitors"]
)
solvency_rate = 1 + df["solvency_norm"]
df["expected_turnover"] = df["clients"] * solvency_rate * AVG_PRICE

In [None]:
productivity_increase_coef = 1 + 0.8 * df["personal_vehicle_rate"]
productivity_decrease_coef = 1 - df["density_norm"] * 0.2

df["expenses"] = (
    AVG_WORKFORCE_PER_CLIENT
    * productivity_increase_coef
    * productivity_decrease_coef
    * AVG_SALARY
)

In [None]:
df["expected_profit"] = df["expected_turnover"] - df["expenses"]
df.sort_values(by="expected_profit", ascending=False).head(10)

In [None]:
# Cities by expected profit

gdf = gpd.read_file(geojson_file)
gdf = gdf.merge(df, left_on="code", right_on="COM")

fig, ax = plt.subplots(1, 1, figsize=(15, 5))
gdf.plot(
    column="expected_profit",
    cmap="Greens",
    linewidth=0.8,
    ax=ax,
    edgecolor="0.8",
    legend=True,
)

ax.set_axis_off()
plt.show()