In [23]:
import csv
import os
from datetime import datetime, timedelta
import copy

def get_ordered_booking_file_paths(root_dir):
    # List to hold file paths with corresponding dates
    files_with_dates = []

    # Walk through the root directory
    for dirpath, dirnames, filenames in os.walk(root_dir):
        try:
            dir_date = datetime.strptime(os.path.basename(dirpath), '%d-%m-%Y')
        except ValueError:
            continue 

        # Find all files that match the pattern
        for filename in filenames:
            if (filename.startswith('maeva_cleaned')) and filename.endswith('.csv'):
                files_with_dates.append((os.path.join(dirpath, filename), dir_date))

    # Sort the list of files by the date part
    files_with_dates.sort(key=lambda x: x[1])
    print('Ordered Files :',files_with_dates)
    # Extract and return only the file paths in sorted order
    sorted_file_paths = [file_path for file_path, _ in files_with_dates]
    return sorted_file_paths

def fixed_date_range(year):
    start_date = datetime(year, 4, 1)
    end_date = datetime(year, 4, 3)
    return start_date, end_date

def parse_date(date_str):
    if  date_str=="" or any(c.isalpha() for c in date_str):
        return None
    date_formats = [
        '%Y-%m-%d %H:%M:%S',  
        '%Y-%m-%d',          
        '%d-%m-%Y',       
        '%d/%m/%Y'            
    ]
    
    for fmt in date_formats:
        try:
            return datetime.strptime(date_str, fmt)
        except ValueError:
            continue
    print('Date:' , date_str)
    print(date_str == "")
    raise ValueError(f"Date format not recognized: {date_str}")
def add_days(nb_days, date_initiale_str):
    date_initiale = parse_date(date_initiale_str)
    nouvelle_date = date_initiale + timedelta(days=nb_days)
    return nouvelle_date.strftime('%d/%m/%Y')

In [None]:


# Main execution block
#root_directory = './Booking'

root_directory = 'C:/Users/Keller/Documents/Jobdev/pricing/pricing/reconstruction_maeva/data/maeva'

file_paths = get_ordered_booking_file_paths(root_directory)

for path in file_paths:
    #print(path)
    continue

# Load data from files
annonces_par_fichier = []
missing_dates_dict = {}

for file_path in file_paths:
    if not os.path.isfile(file_path):
        print(f"File not found: {file_path}")
        continue

    annonces = {}
    #print('Processing :', file_path)
    with open(file_path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        print(file_path)
        for row in reader:
            key = f"{row['typologie']}-{row['nom']}-{row['n_offre']}-{row['localite']}"
            if key not in annonces:
                annonces[key] = []
            annonces[key].append({
                'date_debut': parse_date(row['date_debut']),
                'date_fin': parse_date(row['date_fin']),
                'date_price': parse_date(row['date_price']),
                'Nb semaines': parse_date(row['Nb semaines']),
                'row': row
            })
    annonces_par_fichier.append(annonces)

# Process missing keys and update dates
print('Processing missing dates...')
year_of_interest = 2024
start_date, end_date = fixed_date_range(year_of_interest)

In [25]:
def add_days(nb_days, date_initiale_str):
    date_initiale = parse_date(date_initiale_str)
    nouvelle_date = date_initiale + timedelta(days=nb_days)
    return nouvelle_date.strftime('%d/%m/%Y')

In [27]:
missing_annonces = []
for i in range(1, len(annonces_par_fichier)):
    print('File', i, '  and File', i-1)
    prev_annonce = annonces_par_fichier[i - 1]
    curr_annonce = annonces_par_fichier[i]
    common_keys = set(prev_annonce.keys()) & set(curr_annonce.keys())
    missing_keys = set(prev_annonce.keys()) - set(curr_annonce.keys())

    print(common_keys)
    print(missing_keys)
    break

File 1   and File 0
{'1 Pièce 4 Personnes-Résidence Casino-603024-Canet-en-Roussillon', "4 Pièces pour 6 Personnes-RÉsidence Les Chalets D'aurouze-309074-La Joue du Loup", 'Appartement 6 personnes - 1 chambre + 1 coin nuit - Terrasse ou balcon - Vue mer-Résidence Pierre  and  Vacances Les Parcs de Grimaud-131045-Grimaud - Port Grimaud', 'Appartement PRALIN 108 pour 6 Personnes-Résidence Pralin-160563-Méribel - Mottaret', '2 Pièces pour 4 Personnes-Résidence Ondines-595304-Saint Cyprien - Pyrénées Orientales', '3 Pièces 8 Personnes-Résidence Odalys Rochebrune Le Vallon-1915-Orcières 1850', 'Maisons & Villas pour 6 Personnes-Résidence Les Captivantes-235244-Port Leucate', '2 Pièces pour 6 Personnes-Résidence Aravis-1090394-Les Ménuires', '2 Pièces 4 Personnes-Résidence Arcelle-52033-Val Thorens', 'Maisons & Villas pour 4 Personnes-Résidence Grande Bleue-235004-Port Leucate', '3 Pièces pour 6 Personnes-Résidence Rochasset-205345-Les Contamines Montjoie', '2 Pièces pour 6 Personnes-Résiden

In [None]:
missing_annonces = []
for i in range(1, len(annonces_par_fichier)):
    print('File', i, '  and File', i-1)
    prev_annonce = annonces_par_fichier[i - 1]
    curr_annonce = annonces_par_fichier[i]
    common_keys = set(prev_annonce.keys()) & set(curr_annonce.keys())
    missing_keys = set(prev_annonce.keys()) - set(curr_annonce.keys())

    for key in missing_keys:
        annonces_par_fichier[i][key] = []

    for key in common_keys:
        if len(prev_annonce[key]) > len(curr_annonce[key]):
            for index in range(len(curr_annonce[key]), len(prev_annonce[key])):
                annonce = copy.deepcopy(prev_annonce[key][index])
                date_debut = annonce['date_debut']
                date_fin = annonce['date_fin']
                if(date_debut is not None):
                    annonce['date_debut'] = date_debut + timedelta(days=7)
                    annonce['date_fin'] = date_fin + timedelta(days=7)
                    annonce['Nb semaines'] = annonce['Nb semaines'] + 1
                    annonce['row']['date_debut'] = add_days(7, annonce['row']['date_debut'])
                    annonce['row']['date_fin'] = add_days(7, annonce['row']['date_fin'])
                    annonce['row']['Nb semaines'] =  annonce['row']['Nb semaines'] + 1
                    missing_annonces.append(annonce)
                    annonces_par_fichier[i][key].append(annonce)


In [20]:
def write_missing_annonces(missing_annonces, file_name):
    print('Missings ', len(missing_annonces)) 
    
    # Get the headers from the first row, but filter out the 'Unnamed: 12' column
    header = [field for field in missing_annonces[0]['row'].keys() if field != 'Unnamed: 12']
    print(header)
    
    # Writing to CSV
    with open(file_name, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=header)
        writer.writeheader()
        for item in missing_annonces:
            # Remove the 'Unnamed: 12' column if it exists in the row
            row = {key: value for key, value in item['row'].items() if key in header}
            writer.writerow(row)

In [None]:
write_missing_annonces(missing_annonces,'maeva_missing.csv')

In [None]:
write_missing_annonces(missing_annonces,'maeva_missings.csv')

In [22]:
# Creating hosting DB for test
for i, prev_annonce in enumerate(annonces_par_fichier):
    if i == 0:
        keys = set(prev_annonce.keys())
        key = next(iter(keys))
    print('File:',i)    
    for index in range(len(prev_annonce[key])):
        annonce = prev_annonce[key][index]
        header = [field for field in annonce['row'].keys() if field != 'Unnamed: 12']
        
        with open(f'test/{key}_{i}.csv', mode='a', newline='', encoding='utf-8') as file:
            writer = csv.DictWriter(file, fieldnames=header)
            if file.tell() == 0:  
                writer.writeheader()
            row = {k: v for k, v in annonce['row'].items() if k in header}
            writer.writerow(row)

File: 0


FileNotFoundError: [Errno 2] No such file or directory: 'test/1 Pièce 4 Personnes-Résidence Casino-603024-Canet-en-Roussillon_0.csv'