In [None]:
import pandas as pd
import plotly.express as px  # For Plotly Express, which is easy-to-use for quick visualizations
import plotly.graph_objects as go 

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots  # Correct import for make_subplots

In [None]:
apt_buying = pd.read_excel("../data/raw/Apartment_buying_cost_over_time.xlsx", sheet_name = 1)
apt_buying.rename(columns = {"Unnamed: 0" : "city".lstrip()}, inplace = True)

buying_one_br = apt_buying.drop(range(3,7))

buying_one_br.columns = buying_one_br.columns.str.strip()
buying_one_br.columns = [col.split()[0] for col in buying_one_br.columns]


monthly_salary = pd.read_excel("../data/raw/numbeo_stats.xlsx", sheet_name = 0)
monthly_salary.fillna(method='ffill', inplace=True)
monthly_salary.rename(columns = {"City" : "city".lstrip()}, inplace = True)
monthly_salary["city"] = [value.lower() for value in monthly_salary["city"]]


monthly_salary = monthly_salary.drop(range(3,6))
monthly_salary =monthly_salary.drop(range(6,9))

buying_one_br = buying_one_br.round(2)

buying_one_br

In [None]:


# Example data for monthly rent (monthly_salary)
data_rent = {
    'Unnamed: 0': ['1 bed apartment (rent)', '1 bed apartment (rent)', '1 bed apartment (rent)', 
                   'Av salary (after tax)', 'Av salary (after tax)', 'Av salary (after tax)'],
    'city': ['lisbon', 'berlin', 'paris', 'lisbon', 'berlin', 'paris'],
    '2019': [894.00, 905.53, 1201.81, 931.03, 2360.33, 2308.96],
    '2020': [843.29, 889.28, 1222.21, 1028.22, 2397.59, 2510.77],
    '2021': [868.30, 1036.35, 1256.88, 1034.12, 2971.54, 2791.18],
    '2022': [1024.46, 1237.16, 1273.16, 1062.71, 2987.30, 2601.82],
    '2023': [1259.74, 1276.16, 1333.65, 1169.53, 3128.02, 2846.41],
    '2024': [1413.16, 1186.62, 1258.63, 1235.38, 2952.47, 3076.78]
}

# Example data for buying a 1-bedroom apartment (buying_one_br)
data_buying_one_br = {
    'city': ['Lisbon 1br', 'Berlin 1br', 'Paris 1 br'],
    '2019': [940.91, 1522.45, 1918.05],
    '2020': [958.12, 1472.30, 2032.62],
    '2021': [1054.79, 1689.52, 2086.78],
    '2022': [1321.22, 2071.55, 2076.73],
    '2023': [1137.77, 2003.01, 1974.07],
    '2024': [1295.57, 1937.36, 2055.01]
}

# Create DataFrames for both rent and buying data
monthly_salary = pd.DataFrame(data_rent)
buying_one_br = pd.DataFrame(data_buying_one_br)

# Convert 'city' columns to lowercase and strip extra suffixes for buying_one_br
monthly_salary["city"] = monthly_salary["city"].str.lower()

# Strip the '1br' or '1 br' from city names in buying_one_br for matching
buying_one_br["city"] = buying_one_br["city"].str.extract(r'([a-zA-Z]+)')[0].str.lower()

# Create a subplot grid: 1 row, 3 columns (one for each city)
fig = make_subplots(
    rows=1, cols=3, 
    subplot_titles=["Lisbon", "Berlin", "Paris"], 
    shared_yaxes=True,
    vertical_spacing=0.1
)

# Function to plot the data for each city
def plot_city_data(city_name, row, col):
    # Filter the data for the specific city
    rent_data = monthly_salary[(monthly_salary['city'] == city_name) & (monthly_salary['Unnamed: 0'] == '1 bed apartment (rent)')]
    salary_data = monthly_salary[(monthly_salary['city'] == city_name) & (monthly_salary['Unnamed: 0'] == 'Av salary (after tax)')]
    buying_data = buying_one_br[buying_one_br['city'] == f"{city_name}"]

    # Extract year columns and corresponding values
    years = ['2019', '2020', '2021', '2022', '2023', '2024']
    rent_values = rent_data[years].values.flatten()
    salary_values = salary_data[years].values.flatten()
    buying_values = buying_data[years].values.flatten()

    # Plot rent vs salary for the city
    fig.add_trace(go.Scatter(
        x=years, y=rent_values, mode='lines+markers', name=f'{city_name} Rent',
        line=dict(color='blue', width=2),
        marker=dict(size=8, color='blue')
    ), row=row, col=col)
    
    # Plot buying price vs salary for the city
    fig.add_trace(go.Scatter(
        x=years, y=buying_values, mode='lines+markers', name=f'{city_name} Buying Price',
        line=dict(color='green', width=2),
        marker=dict(size=8, color='green')
    ), row=row, col=col)
    
    # Plot income (after tax) for the city
    fig.add_trace(go.Scatter(
        x=years, y=salary_values, mode='lines+markers', name=f'{city_name} Income',
        line=dict(color='red', width=2),
        marker=dict(size=8, color='red')
    ), row=row, col=col)

# Plot data for each city in the respective subplot
plot_city_data('lisbon', 1, 1)
plot_city_data('berlin', 1, 2)
plot_city_data('paris', 1, 3)

# Update layout of the figure
fig.update_layout(
    title="Income vs Rent and Buying Price for Cities",
    xaxis_title="Year",
    yaxis_title="Amount (€)",
    showlegend=True,
    height=600,
    width=1000,
)

# Show the plot
fig.show()


In [None]:
# Strip the '1br' or '1 br' from city names in buying_one_br for matching
buying_one_br["city"] = buying_one_br["city"].str.extract(r'([a-zA-Z]+)')[0].str.lower()

# Create a subplot grid: 1 row, 3 columns (one for each city)
fig = make_subplots(
    rows=1, cols=3, 
    subplot_titles=["Lisbon", "Berlin", "Paris"], 
    shared_yaxes=True,
    vertical_spacing=0.1
)

# Function to plot the data for each city
def plot_city_data(city_name, row, col):
    # Filter the data for the specific city
    rent_data = monthly_salary[(monthly_salary['city'] == city_name) & (monthly_salary['Unnamed: 0'] == '1 bed apartment (rent)')]
    salary_data = monthly_salary[(monthly_salary['city'] == city_name) & (monthly_salary['Unnamed: 0'] == 'Av salary (after tax)')]
    buying_data = buying_one_br[buying_one_br['city'] == f"{city_name}"]

    # Extract year columns and corresponding values
    years = ['2019', '2020', '2021', '2022', '2023', '2024']
    rent_values = rent_data[years].values.flatten()
    salary_values = salary_data[years].values.flatten()
    buying_values = buying_data[years].values.flatten()

    # Plot rent vs salary for the city
    fig.add_trace(go.Scatter(
        x=years, y=rent_values, mode='lines+markers', name=f'{city_name} Rent',
        line=dict(color='blue', width=2),
        marker=dict(size=8, color='blue')
    ), row=row, col=col)
    
    # Plot buying price vs salary for the city
    fig.add_trace(go.Scatter(
        x=years, y=buying_values, mode='lines+markers', name=f'{city_name} Buying Price',
        line=dict(color='green', width=2),
        marker=dict(size=8, color='green')
    ), row=row, col=col)
    
    # Plot income (after tax) for the city
    fig.add_trace(go.Scatter(
        x=years, y=salary_values, mode='lines+markers', name=f'{city_name} Income',
        line=dict(color='red', width=2),
        marker=dict(size=8, color='red')
    ), row=row, col=col)

# Plot data for each city in the respective subplot
plot_city_data('lisbon', 1, 1)
plot_city_data('berlin', 1, 2)
plot_city_data('paris', 1, 3)

# Update layout of the figure
fig.update_layout(
    title="Income vs Rent and Buying Price for Cities",
    xaxis_title="Year",
    yaxis_title="Amount (€)",
    showlegend=True,
    height=600,
    width=1000,
)

# Show the plot
fig.show()