In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import glob
import os

In [None]:
YEAR = 2022

directory = "data/opendata_txt_" + str(YEAR)
path_pattern = os.path.join(directory, "*.txt")
files = glob.glob(path_pattern)

dataframes = []
for file in files:
    try:
        df = pd.read_csv(file, delimiter=";").drop(columns="dessin")
        dataframes.append(df)
        print(f"Successfully read: {file}, number of rows: {len(df)}")
    except Exception as e:
        print(f"Failed to read {file}: {e}")
        break

if dataframes:
    combined_df = pd.concat(dataframes, ignore_index=True)
else:
    combined_df = None
    print("No DataFrames to concatenate.")

In [None]:
combined_df.head()

In [None]:
# df_champs_elysees => libelle: Av_Champs_Elysees 
# df_convention => libelle: Convention
# df_st_antoine => libelle: St_Antoine

# Filter data for 'Av_Champs_Elysees'
# Remove all rows with "libelle_nd_amont" not equal to "Av_Champs_Elysees-Washington" or "libelle_nd_aval" not equal to "Av_Champs_Elysees-Berri"
df_champs_elysees = combined_df[combined_df['libelle'] == 'Av_Champs_Elysees']
df_champs_elysees = df_champs_elysees[(df_champs_elysees['libelle_nd_amont'] == 'Av_Champs_Elysees-Washington') & (df_champs_elysees['libelle_nd_aval'] == 'Av_Champs_Elysees-Berri')]

# Filter data for 'Convention'
# Remove all rows with "Libelle noeud amont" not equal to "Convention-Blomet" or "Libelle noeud aval" not equal to "Lecourbe-Convention"
df_convention = combined_df[combined_df['libelle'] == 'Convention']
df_convention = df_convention[(df_convention['libelle_nd_amont'] == 'Convention-Blomet') & (df_convention['libelle_nd_aval'] == 'Lecourbe-Convention')]

# Filter data for 'St_Antoine'
# Remove all rows with "Libelle noeud amont" not equal to "Bastille-St_Antoine" or "Libelle noeud aval" not equal to "St_Antoine-Jacques_Coeur"
df_st_antoine = combined_df[combined_df['libelle'] == 'St_Antoine']
df_st_antoine = df_st_antoine[(df_st_antoine['libelle_nd_amont'] == 'Bastille-St_Antoine') & (df_st_antoine['libelle_nd_aval'] == 'St_Antoine-Jacques_Coeur')]

In [None]:
len(df_champs_elysees), len(df_convention), len(df_st_antoine)

In [None]:
df_champs_elysees.to_csv(f'data/champs/champs_elysees_{str(YEAR)}.csv', index=False)
df_convention.to_csv(f'data/convention/convention_{str(YEAR)}.csv', index=False)
df_st_antoine.to_csv(f'data/antoine/st_antoine_{str(YEAR)}.csv', index=False)

In [None]:
df_champs_elysees.info()

In [None]:
df_convention.info()

In [None]:
df_st_antoine.info()