#### Creating a fucntion to read the CSV files for each city and put them in one dataframe

In [144]:
import pandas as pd

def merge_csv_files(file_list):
    #create an empty list to store the dataframes
    dataframes = []
    # Loop through the list of files and read them into pandas
    for file in file_list:
        df = pd.read_csv(file)
        dataframes.append(df)
    
    #add quarter column for grouping
    for i in range(len(dataframes)):
        #remove the path and extract the file name
        filename = file_list[i].split('/')[-1]
        #split the filename by '_' and extract the quarter and year
        parts = filename.split('_')
        #extract the quarter and year and concatenate them
        quarter_year = f"{parts[2]}_{parts[3].split('.')[0]}"
        #add the quarter column to the dataframe
        dataframes[i]['quarter'] = quarter_year 

    #remove unnecessary columns
    for i in range(len(dataframes)):
        dataframes[i] = dataframes[i].drop(columns=[
                    'last_review',
                   'number_of_reviews_ltm',
                   'license',
                   'neighbourhood_group'
        ])

        #fill missing values in price column
        dataframes[i]['price'] = dataframes[i]['price'].fillna(dataframes[i]['price'].mean())
    
    #concatenate the dataframes
    merged_dataframe = pd.concat(dataframes, ignore_index=True)
    return merged_dataframe



### Calling the function for all the cities

In [145]:
#Calling the function for Rome
file_list = ['data/rome_listing_Q1_24.csv', 'data/rome_listing_Q4_23.csv', 'data/rome_listing_Q3_23.csv', 'data/rome_listing_Q2_23.csv']
merged_data_rome = merge_csv_files(file_list)
merged_data_rome

Unnamed: 0,id,name,host_id,host_name,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365,quarter
0,2737,"Elif's room in cozy, clean flat.",3047,Elif,VIII Appia Antica,41.871360,12.482150,Private room,56.0,7,5,0.04,6,365,Q1_24
1,139330,abbraccia Morfeo (2b),679555,Mario,V Prenestino/Centocelle,41.881420,12.544220,Private room,94.0,2,26,0.29,4,62,Q1_24
2,3079,Cozy apartment (2-4)with Colisseum view,3504,Laura,I Centro Storico,41.895000,12.491170,Entire home/apt,120.0,90,21,0.13,6,253,Q1_24
3,140801,"Rome Studio Rental, Colosseum",685600,Juan Carlos,I Centro Storico,41.887390,12.496900,Entire home/apt,81.0,3,192,1.26,1,222,Q1_24
4,11834,"Rome ""Charming Boschetto Studio""",44552,Serena,I Centro Storico,41.895447,12.491181,Entire home/apt,110.0,2,224,1.43,1,181,Q1_24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113940,907665329364448150,Rental unit in Rome · ★New · 2 bedrooms · 4 be...,494485,Massimo,XII Monte Verde,41.858597,12.439723,Entire home/apt,112.0,30,0,,25,365,Q2_23
113941,907740873805877631,Rental unit in Rome · ★New · 2 bedrooms · 3 be...,255338453,Laura,I Centro Storico,41.894868,12.491906,Entire home/apt,512.0,1,0,,2,144,Q2_23
113942,907795429693803226,Rental unit in Rome · ★New · 1 bedroom · 1 bed...,465355743,Lorenzo,XIV Monte Mario,41.911356,12.444160,Entire home/apt,200.0,4,0,,2,349,Q2_23
113943,907797510485153442,Rental unit in Rome · ★New · 2 bedrooms · 3 be...,507258639,Evgeniia,I Centro Storico,41.898956,12.501748,Entire home/apt,128.0,1,0,,2,307,Q2_23


In [141]:
#testing if the price value is filled
merged_data_rome["price"].isnull().sum()

0

In [146]:
#testing quarter column
merged_data_rome["quarter"].unique()

array(['Q1_24', 'Q4_23', 'Q3_23', 'Q2_23'], dtype=object)

In [None]:
#Call the function for Madrid
file_list = ['data/madrid_listing_Q1_24.csv', 'data/madrid_listing_Q4_23.csv', 'data/madrid_listing_Q3_23.csv', 'data/madrid_listing_Q2_23.csv']
merged_data_madrid = merge_csv_files(file_list)
merged_data_madrid

In [None]:
#testing if the price value is filled
merged_data_madrid["price"].isnull().sum()

0

In [None]:
#testing quarter column
merged_data_madrid["quarter"].unique()

array(['Q1', 'Q4', 'Q3', 'Q2'], dtype=object)

In [None]:
#Calling the function for Barcelona
file_list = ['data/barcelona_listing_Q1_24.csv', 'data/barcelona_listing_Q4_23.csv', 'data/barcelona_listing_Q3_23.csv', 'data/barcelona_listing_Q2_23.csv']
merged_data_barcelona = merge_csv_files(file_list)
merged_data_barcelona

In [None]:
#testing if the price value is filled
merged_data_barcelona["price"].isnull().sum()

In [None]:
#testing quarter column
merged_data_barcelona["quarter"].unique()

In [None]:
#Calling the function for Istanbul
file_list = ['data/istanbul_listing_Q1_24.csv', 'data/istanbul_listing_Q4_23.csv', 'data/istanbul_listing_Q3_23.csv', 'data/istanbul_listing_Q2_23.csv']
merged_data_istanbul = merge_csv_files(file_list)
merged_data_istanbul

In [None]:
#testing if the price value is filled
merged_data_istanbul["price"].isnull().sum()

In [None]:
#testing quarter column
merged_data_istanbul["quarter"].unique()

In [None]:
#Calling the function for London
file_list = ['data/london_listing_Q1_24.csv', 'data/london_listing_Q4_23.csv', 'data/london_listing_Q3_23.csv', 'data/london_listing_Q2_23.csv']
merged_data_london = merge_csv_files(file_list)
merged_data_london

In [None]:
#testing if the price value is filled
merged_data_london["price"].isnull().sum()

In [None]:
#testing quarter column
merged_data_london["quarter"].unique()

In [None]:
#Calling the function for Paris
file_list = ['data/paris_listing_Q1_24.csv', 'data/paris_listing_Q4_23.csv', 'data/paris_listing_Q3_23.csv', 'data/paris_listing_Q2_23.csv']
merged_data_paris = merge_csv_files(file_list)
merged_data_paris

In [None]:
#testing if the price value is filled
merged_data_paris["price"].isnull().sum()

In [None]:
#testing quarter column
merged_data_paris["quarter"].unique()

### Creating final data frame with all city data

In [None]:
def merge_data_frames(df_list):
    #create an empty list to store the merged dataframes for each city
    dataframes_merged = []
    # Loop through the list of data frames (df_list defined below) and append them to the empty list dataframes_merged
    for df in df_list:
        dataframes_merged.append(df)

    #add city column for grouping
    #define the city names
    city_names = ['Rome', 'Madrid', 'Barcelona', 'Istanbul', 'London', 'Paris']
    #loop through the dataframes
    for i in range(len(df_list)):
        #add city column by matching the index of the city_names list with the index of the dataframes in the df_list
        df_list[i]['city'] = city_names[i]
    
    #concatenate the dataframes
    final_dataframe = pd.concat(dataframes_merged, ignore_index=True)
    return final_dataframe


In [None]:
#defining the list of dataframes
df_list = [merged_data_rome, merged_data_madrid, merged_data_barcelona, merged_data_istanbul, merged_data_london, merged_data_paris]
final_dataframe = merge_data_frames(df_list)

In [None]:
final_dataframe

Unnamed: 0,id,name,host_id,host_name,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365,quarter,city
0,2737,"Elif's room in cozy, clean flat.",3047,Elif,VIII Appia Antica,41.871360,12.482150,Private room,56.0,7,5,0.04,6,365,Q1,Rome
1,139330,abbraccia Morfeo (2b),679555,Mario,V Prenestino/Centocelle,41.881420,12.544220,Private room,94.0,2,26,0.29,4,62,Q1,Rome
2,3079,Cozy apartment (2-4)with Colisseum view,3504,Laura,I Centro Storico,41.895000,12.491170,Entire home/apt,120.0,90,21,0.13,6,253,Q1,Rome
3,140801,"Rome Studio Rental, Colosseum",685600,Juan Carlos,I Centro Storico,41.887390,12.496900,Entire home/apt,81.0,3,192,1.26,1,222,Q1,Rome
4,11834,"Rome ""Charming Boschetto Studio""",44552,Serena,I Centro Storico,41.895447,12.491181,Entire home/apt,110.0,2,224,1.43,1,181,Q1,Rome
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1092243,2404876,Rental unit in Paris · ★4.77 · 3 bedrooms · 3 ...,2616123,Corinne,Popincourt,48.863560,2.371900,Entire home/apt,195.0,3,113,1.01,1,8,Q2,Paris
1092244,876625296659823798,Rental unit in Paris · ★New · 1 bedroom · 1 be...,148422008,Anne,Batignolles-Monceau,48.885302,2.311331,Entire home/apt,62.0,100,0,,1,203,Q2,Paris
1092245,574584027642384480,Condo in Paris · ★5.0 · 4 bedrooms · 3 beds · ...,109810931,Lionel,Vaugirard,48.854590,2.292530,Entire home/apt,400.0,3,6,0.55,1,34,Q2,Paris
1092246,50718612,Hostel in Paris · ★4.96 · 1 bedroom · 6 beds ·...,315206797,Les Piaules,Popincourt,48.868940,2.377910,Private room,479.0,1,23,1.02,10,292,Q2,Paris


In [None]:
final_dataframe["city"].unique()

array(['Rome', 'Madrid', 'Barcelona', 'Istanbul', 'London', 'Paris'],
      dtype=object)

In [125]:

file_list = ['data/rome_listing_Q1_24.csv', 'data/rome_listing_Q4_23.csv', 'data/rome_listing_Q3_23.csv', 'data/rome_listing_Q2_23.csv']
for i in range(len(file_list)):
    #extracting 24 from the file name
    print (file_list[i].split('_')[2], file_list[i].split('_')[3].split('.')[0])



    #print (file_list[i].split('_')[2])
    #print (file_list[i].split('_')[2:4].split('.')[0])



Q1 24
Q4 23
Q3 23
Q2 23
