In [1]:
import pandas as pd
import os
from glob import glob

# Function to create the 'file' column as specified
def add_file_column(df):
    df['file'] = df.apply(
        lambda row: f"{row['city_id']}_{str(row['place_id']).zfill(7)}_{row['year']}_{str(row['month']).zfill(2)}"
                    f"_{str(row['northdeg']).zfill(3)}_{row['lat']}_{row['lon']}_{row['panoid']}.jpg",
        axis=1
    )
    return df

# Function to add the 'global_label' column with unique autoincrementing int for each (city_id, place_id) tuple
def add_global_label(df):
    # Create a dictionary mapping each unique (city_id, place_id) to an integer
    unique_labels = {key: i for i, key in enumerate(df[['city_id', 'place_id']].drop_duplicates().itertuples(index=False))}
    # Map each (city_id, place_id) in the DataFrame to its corresponding global_label
    df['global_label'] = df.apply(lambda row: unique_labels[(row['city_id'], row['place_id'])], axis=1)
    return df

In [2]:
# Folder path containing all the CSV files
folder_path = 'dataset/Dataframes'

# Load all CSV files into a single DataFrame
all_files = glob(os.path.join(folder_path, '*.csv'))
dfs = [pd.read_csv(file) for file in all_files]
combined_df = pd.concat(dfs, ignore_index=True)

# Sort by 'city_id', 'place_id', 'year', 'month'
combined_df = combined_df.sort_values(by=['city_id', 'place_id', 'year', 'month'])

# Add 'file' column
combined_df = add_file_column(combined_df)

# Add 'global_label' column
combined_df = add_global_label(combined_df)

# Set 'global_label' as the index
combined_df.reset_index(drop=True, inplace=True)

# Save the final DataFrame to a new CSV file
combined_df.to_csv('catalog.csv')

print("CSV file created successfully: 'catalog.csv'")

CSV file created successfully: 'catalog.csv'


In [3]:
combined_df

Unnamed: 0,place_id,year,month,northdeg,city_id,lat,lon,panoid,file,global_label
0,1,2016,11,228,Bangkok,13.715420,100.484880,8hQkYBLClYeLNAEhsfdSFw,Bangkok_0000001_2016_11_228_13.71541995884607_...,0
1,1,2017,5,412,Bangkok,13.715410,100.484870,mHq6ocn8sYGs6oodHGRcnw,Bangkok_0000001_2017_05_412_13.7154102892804_1...,0
2,1,2017,10,414,Bangkok,13.715398,100.484878,Ee8m_wrncil2kYnl2z823w,Bangkok_0000001_2017_10_414_13.71539826153708_...,0
3,1,2018,5,231,Bangkok,13.715431,100.484876,jyEkxACv3OLf0-Wv2lWx6g,Bangkok_0000001_2018_05_231_13.7154309617333_1...,0
4,1,2018,10,231,Bangkok,13.715407,100.484886,EhU2KuL90dcomee9jg2SwA,Bangkok_0000001_2018_10_231_13.71540703891692_...,0
...,...,...,...,...,...,...,...,...,...,...
529501,1971,2007,9,508,WashingtonDC,38.920762,-77.000588,0ajdBnylaFMXbPtm3t4wLw,WashingtonDC_0001971_2007_09_508_38.9207617618...,64393
529502,1971,2009,7,158,WashingtonDC,38.920796,-77.000596,Pm3sPFSQHqWE_2VJyXWWhA,WashingtonDC_0001971_2009_07_158_38.9207958042...,64393
529503,1971,2017,6,152,WashingtonDC,38.920782,-77.000591,82xLFBkVVmdElZBdPOH5ew,WashingtonDC_0001971_2017_06_152_38.9207820758...,64393
529504,1971,2018,7,517,WashingtonDC,38.920779,-77.000589,1vCJLIpbPcwF4uprG77M4w,WashingtonDC_0001971_2018_07_517_38.9207790684...,64393
