In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import os
import warnings
warnings.filterwarnings('ignore')

# Configuration des graphiques
plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)

print("=" * 80)
print("GREEN BIKES TOURS - ANALYSE DES DONN√âES DU TOUR NOCTURNE")
print("=" * 80)
print(f"P√©riode d'analyse: 23 mai 2022 - 19 juin 2022")
print(f"Focus: Tour nocturne √† v√©lo (18h15 - 23h00)")
print("=" * 80)

# =============================================================================
# 1. CHARGEMENT ET AUDIT INITIAL DES DONN√âES
# =============================================================================
print("\nüìÅ √âTAPE 1: CHARGEMENT DES DONN√âES")
print("-" * 50)

# Dictionnaire pour stocker tous les dataframes
data_files = {}
load_errors = []

# Fonction pour charger les fichiers avec gestion d'erreurs
def load_file_safe(file_path, file_name, sep=';', encoding='utf-8'):
    """Charge un fichier de mani√®re s√©curis√©e avec plusieurs tentatives"""
    try:
        # Tentative 1: CSV avec s√©parateur sp√©cifi√©
        if file_path.endswith('.csv'):
            df = pd.read_csv(file_path, sep=sep, encoding=encoding)
        # Tentative 2: Excel
        elif file_path.endswith(('.xlsx', '.xls')):
            df = pd.read_excel(file_path)
        else:
            # Tentative par d√©faut
            df = pd.read_csv(file_path, sep=sep, encoding=encoding)
        
        print(f"‚úÖ {file_name}: {df.shape[0]} lignes, {df.shape[1]} colonnes")
        return df
    except Exception as e:
        print(f"‚ùå Erreur {file_name}: {e}")
        load_errors.append(f"{file_name}: {e}")
        return None

# Chargement des fichiers principaux
print("\nüîÑ Chargement des fichiers principaux...")

# 1. Employ√©s
employees_df = load_file_safe('data/Employees/employees_table.csv', 'Employ√©s')
if employees_df is not None:
    data_files['employees'] = employees_df

# 2. R√©servations
bookings_df = load_file_safe('data/Bookings/Bookings.xlsx', 'R√©servations')
if bookings_df is not None:
    data_files['bookings'] = bookings_df

# 3. Clients
customers_df = load_file_safe('data/Customers/customers_extract.xlsx', 'Clients')
if customers_df is not None:
    data_files['customers'] = customers_df

# 4. Logs des employ√©s
employee_logs_df = load_file_safe('data/Employee Logs/Employee_Night_Logs.xlsx', 'Logs employ√©s')
if employee_logs_df is not None:
    data_files['employee_logs'] = employee_logs_df

# 5. Planning du bureau
office_schedule_df = load_file_safe('data/Office Schedule/Employee_Evening_Schedule.xlsx', 'Planning bureau')
if office_schedule_df is not None:
    data_files['office_schedule'] = office_schedule_df

# 6. Avis
reviews_df = load_file_safe('data/Reviews/tour_reviews.csv', 'Avis clients')
if reviews_df is not None:
    data_files['reviews'] = reviews_df

# 7. M√©t√©o
weather_df = load_file_safe('data/Weather/weather.csv', 'M√©t√©o')
if weather_df is not None:
    data_files['weather'] = weather_df

# 8. Chargement des logs (tous les sous-dossiers)
print("\nüîÑ Chargement des fichiers de logs...")
logs_folders = [
    'Count_Send_Logs',
    'Launch_Logs', 
    'Launchpad_1_Logs',
    'Launchpad_2_Logs',
    'Meeting_Point_Logs',
    'Office_Logs',
    'Return_Logs'
]

logs_data = {}
for folder in logs_folders:
    folder_path = f'data/Logs/{folder}'
    if os.path.exists(folder_path):
        logs_data[folder] = []
        for file in os.listdir(folder_path):
            if file.endswith(('.csv', '.xlsx')):
                file_path = os.path.join(folder_path, file)
                df = load_file_safe(file_path, f'{folder}/{file}')
                if df is not None:
                    df['source_file'] = file
                    df['log_type'] = folder
                    logs_data[folder].append(df)

# 9. Chargement des ventes du bureau (Office Sales)
print("\nüîÑ Chargement des fichiers de ventes...")
office_sales_data = []
sales_folder = 'data/Office Sales'
if os.path.exists(sales_folder):
    for file in os.listdir(sales_folder):
        if file.endswith(('.csv', '.xlsx')):
            file_path = os.path.join(sales_folder, file)
            df = load_file_safe(file_path, f'Office Sales/{file}')
            if df is not None:
                df['source_file'] = file
                office_sales_data.append(df)

GREEN BIKES TOURS - ANALYSE DES DONN√âES DU TOUR NOCTURNE
P√©riode d'analyse: 23 mai 2022 - 19 juin 2022
Focus: Tour nocturne √† v√©lo (18h15 - 23h00)

üìÅ √âTAPE 1: CHARGEMENT DES DONN√âES
--------------------------------------------------

üîÑ Chargement des fichiers principaux...
‚úÖ Employ√©s: 23 lignes, 11 colonnes
‚úÖ R√©servations: 16 lignes, 7 colonnes
‚úÖ Clients: 1304 lignes, 7 colonnes
‚úÖ Logs employ√©s: 7 lignes, 3 colonnes
‚úÖ Planning bureau: 7 lignes, 4 colonnes
‚úÖ Avis clients: 156 lignes, 3 colonnes
‚úÖ M√©t√©o: 84 lignes, 5 colonnes

üîÑ Chargement des fichiers de logs...
‚úÖ Count_Send_Logs/01-06-2022_night_count_send.csv: 120 lignes, 1 colonnes
‚úÖ Count_Send_Logs/02-06-2022_night_count_send.csv: 130 lignes, 1 colonnes
‚úÖ Count_Send_Logs/03-06-2022_night_count_send.csv: 132 lignes, 1 colonnes
‚úÖ Count_Send_Logs/04-06-2022_night_count_send.csv: 150 lignes, 1 colonnes
‚úÖ Count_Send_Logs/05-06-2022_night_count_send.csv: 149 lignes, 1 colonnes
‚úÖ Count_Send_Log