# 💊 PharmaX Analysis - Medical Prescription & Reimbursement (France, 2023)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as mtick
import os

In [None]:
# Load and prepare data
file_path = 'data/open_medic_2023.csv'
df = pd.read_csv(file_path, encoding='ISO-8859-1', sep=';')
df.rename(columns={
    'l_cip13': 'medication',
    'BOITES': 'boxes',
    'REM': 'reimbursement',
    'age': 'age',
    'sexe': 'gender',
    'BEN_REG': 'region'
}, inplace=True)
df = df[['medication', 'boxes', 'reimbursement', 'age', 'gender', 'region']]
df.dropna(inplace=True)

## Data Loading & Preparation
We load the dataset and rename key columns for clarity. Missing values are dropped, and data types are adjusted (e.g., decimal commas to dots for numeric conversion).

In [None]:
# Convert reimbursement to float (handle commas)
df['reimbursement'] = df['reimbursement'].astype(str).str.replace(',', '.')
df['reimbursement'] = pd.to_numeric(df['reimbursement'], errors='coerce')
print("Sample reimbursement values:", df['reimbursement'].head())
print("Total reimbursement sum:", df['reimbursement'].sum())

## Regional Analysis - Prescriptions and Reimbursements
We analyze the distribution of prescriptions and reimbursements by region across France. We use the `BEN_REG` field to map regional codes to readable names.

In [None]:
# Map region codes to names
region_map = {
    11: 'Île-de-France', 24: 'Centre-Val de Loire', 27: 'Bourgogne-Franche-Comté',
    28: 'Normandie', 32: 'Hauts-de-France', 44: 'Grand Est',
    52: 'Pays de la Loire', 53: 'Bretagne', 75: 'Nouvelle-Aquitaine',
    76: 'Occitanie', 84: 'Auvergne-Rhône-Alpes', 93: 'PACA', 94: 'Corse'
}
df['region_name'] = df['region'].map(region_map)

## Top Reimbursed Medications
We group data by medication name and calculate total reimbursement. The top 10 medications that cost the most in reimbursements are visualized.

In [None]:
# Top 10 reimbursed medications
top_reimbursed = df.groupby('medication')['reimbursement'].sum().sort_values(ascending=False).head(10)
plt.figure(figsize=(10, 5))
sns.barplot(x=top_reimbursed.values, y=top_reimbursed.index, palette='crest')
plt.title('Top 10 Reimbursed Medications (France, 2023)')
plt.xlabel('Total Reimbursement (€)')
plt.ylabel('Medication')
plt.tight_layout()
plt.savefig('assets/top_10_reimbursed_meds.png')
plt.show()

## Regional Analysis - Prescriptions and Reimbursements
We analyze the distribution of prescriptions and reimbursements by region across France. We use the `BEN_REG` field to map regional codes to readable names.
- **Prescriptions by Region**: Shows where the highest volume of medications is prescribed.
- **Reimbursements by Region**: Displays where the most reimbursement money is allocated.

In [None]:
# Reimbursement by region
reimbursement_summary = df.groupby('region_name')['reimbursement'].sum().sort_values(ascending=False)
plt.figure(figsize=(10, 6))
sns.barplot(x=reimbursement_summary.values, y=reimbursement_summary.index, palette='rocket')
plt.gca().xaxis.set_major_formatter(mtick.FuncFormatter(lambda x, _: f'{x/1e6:.1f}M€'))
plt.title('Total Reimbursement by Region (France, 2023)')
plt.xlabel('Reimbursement Amount (in millions €)')
plt.ylabel('Region')
plt.tight_layout()
plt.savefig('assets/reimbursement_by_region.png')
plt.show()

In [None]:
# Prescriptions by region
region_summary = df.groupby('region_name')[['boxes', 'reimbursement']].sum().sort_values(by='boxes', ascending=False)
plt.figure(figsize=(10, 6))
sns.barplot(x=region_summary['boxes'], y=region_summary.index, palette='mako')
plt.gca().xaxis.set_major_formatter(mtick.FuncFormatter(lambda x, _: f'{int(x/1e6)}M'))
plt.title('Total Prescriptions by Region (France, 2023)')
plt.xlabel('Number of Boxes (in millions)')
plt.ylabel('Region')
plt.tight_layout()
plt.savefig('assets/prescriptions_by_region.png')
plt.show()

## Most Prescribed Medications
Which drugs are prescribed the most? This section ranks medications by the number of boxes distributed.

In [None]:
# Top 10 most prescribed medications
top_meds = df.groupby('medication')['boxes'].sum().sort_values(ascending=False).head(10)
plt.figure(figsize=(10, 5))
sns.barplot(x=top_meds.values, y=top_meds.index, palette='crest')
plt.title('Top 10 Most Prescribed Medications (France, 2023)')
plt.xlabel('Number of Boxes')
plt.ylabel('Medication')
plt.tight_layout()
plt.savefig('assets/top_10_meds.png')
plt.show()

## Summary
- Dataset: `open_medic_2023.csv`
- Analysis Scope: France, 2023
- Fields analyzed: `region`, `medication`, `reimbursement`, `boxes`
- Outputs: Graphs highlighting regional disparities and top drugs (by usage and cost)

This notebook offers valuable insights into the landscape of prescription drugs and healthcare reimbursement in France for 2023.