In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px  # For Plotly Express, which is easy-to-use for quick visualizations
import plotly.graph_objects as go 

In [None]:
from functions import concatenate_eurostat_countries, cleaning_eurostat_data_for_viz, clean_countries_numbeo, clean_cities

In [None]:
eurostat_df = concatenate_eurostat_countries()
cities = clean_cities()
countries = clean_countries_numbeo()
income_tidy, housing_tidy, rental_tidy, income_df, housing_df, rental_df = cleaning_eurostat_data_for_viz(eurostat_df)
eurostat_df

In [None]:
countries = clean_countries_numbeo()
countries

In [None]:
income_tidy, housing_tidy, rental_tidy, income_df, housing_df, rental_df = cleaning_eurostat_data_for_viz(eurostat_df)
income_df

In [None]:
# Concatenate the DataFrames into one long DataFrame
combined_data = pd.concat([housing_df, rental_df], axis=0)
df_portugal = combined_data[combined_data['Country'] == 'PT']
df_portugal = df_portugal.reset_index().rename(columns={'index': 'Type'})
df_portugal_melted = df_portugal.melt(id_vars=['Type', 'Country'], var_name='Year', value_name='Value')
df_portugal_melted

In [None]:
fig = px.line(
    df_portugal_melted,
    x='Year',
    y='Value',
    color='Type',  # Different lines for Income and Rental
    markers=True,
    title="Housing vs Rental in Portugal (2019-2023)"
)

fig.show()

In [None]:
portugal_income = income_tidy[income_tidy["Country"] == "PT"]
fig = px.line(
    portugal_income,
    x='Year',          # x-axis: Year
    y='Income',        # y-axis: Income
    markers=True,      # Add markers at each data point
    title="Average income in Portugal (2019-2023)"  # Chart title
)

# Show the plot
fig.show()

In [None]:
cities_one_bed_apt = cities[cities['Type'] == "1 bed apartment (rent)"]
cities_one_bed_apt_melted = cities_one_bed_apt.melt(id_vars=["Type", "City"], var_name="Year", value_name="Value")
fig = px.line(
    cities_one_bed_apt_melted, 
    x="Year", 
    y="Value",
    color="City",       
    line_group="City",  
    facet_col="Type",      
    markers=True 
)

fig.show()

In [None]:
cities_salaries = cities[cities['Type'] == "Av salary (after tax)"]
cities_salaries_melted = cities_salaries.melt(id_vars=["Type", "City"], var_name="Year", value_name="Value")
fig = px.line(
    cities_salaries_melted, 
    x="Year", 
    y="Value",
    color="City",       
    line_group="City",  
    facet_col="Type",      
    markers=True,  # Added comma here
    title="Average salaries per month in Lisbon, Berlin and Paris (2019-2024)",  # Title of the chart
    labels={"Value": "Amount (€)", "Year": "Year"}  # Axis labels
)

fig.show()

In [None]:
three_bed_apt_rent = cities[cities['Type'] == "3 bed apartment (rent)"]
three_bed_apt_rent = three_bed_apt_rent.melt(id_vars=["Type", "City"], var_name="Year", value_name="Value")
fig = px.line(
    three_bed_apt_rent, 
    x="Year", 
    y="Value",
    color="City",       
    line_group="City",  
    facet_col="Type",      
    markers=True, 
     title="Average rent for 3bedroom apartment per month in Lisbon, Berlin and Paris (2019-2024)",  # Title of the chart
    labels={"Value": "Amount (€)", "Year": "Year"} )

fig.show()

In [None]:
df_lisbon = cities[cities["City"] == "Lisbon"]
df_portugal = countries[countries["Country"] == "PT"]
display(cities)
display(countries)

In [None]:
national_average = pd.concat([df_lisbon, df_portugal], ignore_index=True)    
national_average.rename(columns={"City": "City/Country"}, inplace=True) 
national_average.drop(columns=["Country"], inplace=True)
national_average.loc[[4,5,6,7,8], "City/Country"] = "Portugal"
national_average

In [None]:
salary_data = national_average[
    (national_average['Type'].isin(['Av salary (after tax)', 'Min wage (after tax)'])) &
    (national_average['City/Country'].isin(['Portugal', 'Lisbon']))]

salary_data_melted = salary_data.melt(id_vars=["Type", "City/Country"], var_name="Year", value_name="Value")
fig = px.line(
    salary_data_melted,
    x="Year",
    y="Value",
    color="Type",
    line_dash="City/Country",   # Different lines for Lisbon and Portugal
    markers=True,
    title="Average Salary and Minimum Wage in Lisbon with National Average (2019-2024)",
    labels={"Value": "Amount (€)", "Year": "Year", "Type": "Type of Income"}
)

fig.show()

In [None]:
def cleaning_mortgages():
    mortgages = pd.read_excel("../data/raw/Apartment_buying_cost_over_time.xlsx", sheet_name=1)
    mortgages.rename(columns={"Unnamed: 0": "Type", "2019 cost monthly": "2019", "2020 cost monthly": "2020", "2021 cost monthly": "2021","2022 cost monthly": "2022", "2023 cost monthly": "2023", "2024 cost monthly": "2024"  }, inplace=True) 
    mortgages = mortgages.dropna().round(2)
    cities_mortgages = pd.concat([cities, mortgages], ignore_index=True)
    cities_mortgages.loc[[12,15],"City"] = "Lisbon"
    cities_mortgages.loc[[13,16],"City"] = "Berlin"
    cities_mortgages.loc[[14,17],"City"] = "Paris"
    return cities_mortgages

cities_mortgages = cleaning_mortgages()

In [None]:
def all_data_together_cities(countries, cities_mortgages):
    min_wage = countries[countries["Type"] == "Min wage (after tax)"]
    final = pd.concat([cities_mortgages, min_wage], ignore_index=True)
    final.drop(columns=["Country"], inplace=True)
    final.loc[18, "City"] = "Lisbon"
    final.loc[19, "City"] = "Berlin"
    final.loc[20, "City"] = "Paris"
    final.loc[[12,13,14], "Type"] = "Mortgage 1bed"
    final.loc[[15,16,17], "Type"] = "Mortgage 3bed"
    return final 

final = all_data_together_cities(countries, cities_mortgages)
final

In [None]:
final = pd.concat([cities_mortgages, min_wage], ignore_index=True)
final.drop(columns=["Country"], inplace=True)
final.loc[18, "City"] = "Lisbon"
final.loc[19, "City"] = "Berlin"
final.loc[20, "City"] = "Paris"
final.loc[[12,13,14], "Type"] = "Mortgage 1bed"
final.loc[[15,16,17], "Type"] = "Mortgage 3bed"
final

In [None]:
def get_percentage_cities(final):
    # Filter the data for each necessary category
    cities_filtered = final[final['Type'].isin(['1 bed apartment (rent)', 'Mortgage 1bed', 'Av salary (after tax)', 'Min wage (after tax)'])]
    cities_filtered.columns = cities_filtered.columns.str.strip()
    cities_filtered = cities_filtered.melt(id_vars=["City", "Type"], var_name="Year", value_name="Value")


    # Pivot the table to get separate columns for each Type within each City and Year
    cities_pivot = cities_filtered.pivot_table(index=["City", "Year"], columns="Type", values="Value").reset_index()

    # Calculate percentages
    cities_pivot['% Avg Salary for Rent'] = (cities_pivot['1 bed apartment (rent)'] / cities_pivot['Av salary (after tax)']) * 100
    cities_pivot['% Avg Salary for Mortgage'] = (cities_pivot['Mortgage 1bed'] / cities_pivot['Av salary (after tax)']) * 100
    cities_pivot['% Min Wage for Rent'] = (cities_pivot['1 bed apartment (rent)'] / cities_pivot['Min wage (after tax)']) * 100
    cities_pivot['% Min Wage for Mortgage'] = (cities_pivot['Mortgage 1bed'] / cities_pivot['Min wage (after tax)']) * 100

    # Display the results
    display_columns = ['City', 'Year', '% Avg Salary for Rent', '% Avg Salary for Mortgage', '% Min Wage for Rent', '% Min Wage for Mortgage']
    return cities_pivot[display_columns]

display_data = get_percentage_cities(final)
display_data