<a href="https://colab.research.google.com/github/kamrulkonok/UPC_ML_Project/blob/Konok/merge_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
import zipfile
import os
import pandas as pd
import plotly.graph_objects as go

In [23]:
zip_file_path = '/content/airbnb_data.zip'

extracted_folder_path = '/content/airbnb_data/'

if not os.path.exists(extracted_folder_path):
    os.makedirs(extracted_folder_path)

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extracted_folder_path)

file_names = os.listdir(extracted_folder_path)
merged_df = pd.DataFrame()

In [24]:
for file_name in file_names:
    if file_name.endswith('.csv'):
        file_path = os.path.join(extracted_folder_path, file_name)
        base_name = os.path.splitext(file_name)[0]
        parts = base_name.split('_')
        city = parts[0]
        day_type = parts[1]
        df = pd.read_csv(file_path)

        df['city'] = city
        df['day_type'] = day_type

        merged_df = pd.concat([merged_df, df], ignore_index=True)

In [25]:
output_csv_path = '/content/airbnb_data.csv'
merged_df.to_csv(output_csv_path, index=False)

print("Merged CSV file created at:", output_csv_path)

Merged CSV file created at: /content/airbnb_data.csv


In [26]:
df = pd.read_csv('/content/airbnb_data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,realSum,room_type,room_shared,room_private,person_capacity,host_is_superhost,multi,biz,cleanliness_rating,...,dist,metro_dist,attr_index,attr_index_norm,rest_index,rest_index_norm,lng,lat,city,day_type
0,0,238.990459,Entire home/apt,False,False,6.0,True,0,1,10.0,...,0.359355,0.352643,404.404677,24.116552,893.477343,67.656853,19.05074,47.50076,budapest,weekdays
1,1,300.794285,Entire home/apt,False,False,6.0,False,0,1,9.0,...,0.929427,0.200235,1676.87603,100.0,452.539717,34.2677,19.04493,47.50405,budapest,weekdays
2,2,162.381915,Entire home/apt,False,False,4.0,True,0,0,10.0,...,2.45084,0.279452,163.588493,9.755551,191.99227,14.538245,19.0217,47.49882,budapest,weekdays
3,3,118.43775,Entire home/apt,False,False,2.0,False,0,0,9.0,...,1.559449,0.477971,191.719844,11.433155,326.215588,24.702048,19.06301,47.51126,budapest,weekdays
4,4,134.417446,Entire home/apt,False,False,4.0,True,1,0,10.0,...,1.113803,0.270102,198.60346,11.843658,635.515884,48.123218,19.069,47.499,budapest,weekdays


In [27]:
df_weekdays = df[df['day_type'] == 'weekdays']
df_weekends = df[df['day_type'] == 'weekends']

def plot_heatmap(df, day_type, col='realSum', title="Airbnb Prices"):
    trace = go.Densitymapbox(
        lat=df["lat"],
        lon=df["lng"],
        z=df[col],
        radius=10,
        colorscale="Twilight",
        opacity=0.7,
        showscale=True,
        colorbar=dict(
            title=title,
            titleside="top",
            thickness=20,
            ticksuffix="€"
        )
    )

    mapbox_style = "carto-positron"
    center_lat = df["lat"].mean()
    center_lon = df["lng"].mean()
    layout = go.Layout(
        title=f"<b>Airbnb Prices</b> ({day_type.capitalize()})",
        titlefont=dict(size=16),
        mapbox=dict(
            style=mapbox_style,
            center=dict(lat=center_lat, lon=center_lon),
            zoom=10
        ),
        hovermode="closest",
        margin=dict(l=30, r=30, t=50, b=30)
    )

    fig = go.Figure(data=[trace], layout=layout)

    return fig

In [28]:
fig_weekdays = plot_heatmap(df_weekdays, 'weekday')
fig_weekends = plot_heatmap(df_weekends, 'weekend')

fig_weekdays.show()
fig_weekends.show()