In [57]:
import os
import pandas as pd

# Define the root folder
folder_path = 'data/processed/cities'

# List all Excel files in the folder
excel_files = [f for f in os.listdir(folder_path) if f.endswith(('.xlsx', '.xls'))]
print(excel_files)
# Initialize an empty list to store DataFrames
dataframes = []

for file in excel_files:
    file_path = os.path.join(folder_path, file)
    
    if file!="ZUR.xlsx":
        continue
    try:
        # Read the Excel file into a DataFrame
        df = pd.read_excel(file_path)
        
        # Rename the first column to "cluster" and set it as the index
        df.rename(columns={df.columns[0]: "cluster"}, inplace=True)
        df.set_index("cluster", inplace=True)
        
        # Remove the last row of the DataFrame
        df = df.iloc[:-1, :-1]
        
        # Append the processed DataFrame to the list
        dataframes.append(df)
    except Exception as e:
        print(f"Error reading {file}: {e}")

# # Concatenate all DataFrames into one, ignoring index
combined_df = pd.concat(dataframes, axis=0).fillna(0).reset_index().drop(columns="cluster")
new_columns = [col.split('_', 1) if '_' in col else ["general", col] for col in combined_df.columns]

combined_df.columns = pd.MultiIndex.from_tuples(new_columns, names=["Category", "Type"])
combined_df.sort_index(axis=1, inplace=True)

# Drop columns where all values are 0
#combined_df = combined_df.loc[:, (combined_df > 0).any(axis=0)]

# Calculate total for each numeric column
totals = combined_df.select_dtypes(include="number").sum()

# Add a row called "Total" at the bottom
total_row = pd.DataFrame([totals], index=["Total"])

# Adjust for multi-index columns
total_row.columns = combined_df.columns  # Ensures alignment with multi-index structure
combined_df = pd.concat([combined_df, total_row])

['CYP.xlsx', 'BUC.xlsx', 'BRU.xlsx', 'BDX.xlsx', 'LON.xlsx', 'LUX.xlsx', 'ZUR.xlsx', 'BEL.xlsx', 'CDG.xlsx']


In [58]:
for i, j in combined_df.iloc[-1][combined_df.iloc[-1] > 1].index.values:
    print(f"{i}-{j}")

leisure-park
tourism-museum


In [59]:
combined_df.iloc[[-1]]

Category,amenity,amenity,amenity,amenity,amenity,amenity,amenity,boundary,building,building,...,natural,office,place,place,tourism,tourism,tourism,tourism,tourism,waterway
Type,bench,fountain,luggage_locker,parking,place_of_worship,ranger_station,theatre,administrative,public,yes,...,water,company,house,square,artwork,attraction,information,museum,viewpoint,waterfall
Total,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,2,0,0
