In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px  # For Plotly Express, which is easy-to-use for quick visualizations
import plotly.graph_objects as go 

In [None]:
from functions import concatenate_eurostat_countries, cleaning_eurostat_data_for_viz, clean_countries_numbeo, clean_cities
eurostat_df = concatenate_eurostat_countries()
countries = clean_countries_numbeo()
cities = clean_cities()
income_tidy, housing_tidy, rental_tidy, income_df, housing_df, rental_df = cleaning_eurostat_data_for_viz(eurostat_df)

In [None]:
from functions import adding_minimum_wage, cleaning_mortgages, all_data_together_cities, get_percentage_cities 
eurostat_df = adding_minimum_wage(eurostat_df)

In [None]:
def cleaning_mortgages():
    mortgages = pd.read_excel("../data/raw/Apartment_buying_cost_over_time.xlsx", sheet_name=1)
    mortgages.rename(columns={"Unnamed: 0": "Type", "2019 cost monthly": "2019", "2020 cost monthly": "2020", "2021 cost monthly": "2021","2022 cost monthly": "2022", "2023 cost monthly": "2023", "2024 cost monthly": "2024"  }, inplace=True) 
    mortgages = mortgages.dropna().round(2)
    cities_mortgages = pd.concat([cities, mortgages], ignore_index=True)
    #replacing NaN values with city names
    cities_mortgages.loc[[12,15],"City"] = "Lisbon"
    cities_mortgages.loc[[13,16],"City"] = "Berlin"
    cities_mortgages.loc[[14,17],"City"] = "Paris"
    return cities_mortgages

cities_mortgages = cleaning_mortgages()

In [None]:
def adding_minimum_wage(eurostat_df):
    min_wage = pd.read_excel("../data/raw/week_3_project_data.xlsx", sheet_name=3)
    min_wage.index = ["Min Wage"] * len(min_wage)
    min_wage_yr = min_wage.select_dtypes(include='number')

    # Multiply only numeric columns by 12
    min_wage_yr = min_wage_yr * 12

    min_wage_yr['Unnamed: 0'] = min_wage['Unnamed: 0']

    # Reorder columns to have 'Country' as the first column
    min_wage_yr = min_wage_yr[['Unnamed: 0'] + [col for col in min_wage_yr.columns if col != 'Unnamed: 0']]
    min_wage_yr.rename(columns={"Unnamed: 0": "Country"}, inplace=True) 
    eurostat_df = pd.concat([eurostat_df, min_wage_yr])
    return eurostat_df
    
eurostat_df = adding_minimum_wage(eurostat_df)

In [None]:
def all_data_together_cities(countries, cities_mortgages):
    # min_wage from countries table in the same sheet as cities
    min_wage = countries[countries["Type"] == "Min wage (after tax)"]
    # add min_wage to cities table with mortgages
    final = pd.concat([cities_mortgages, min_wage], ignore_index=True)
    #get rid of the column country, as it was in countries table
    final.drop(columns=["Country"], inplace=True)
    #replacing NaN values with city names 
    final.loc[18, "City"] = "Lisbon"
    final.loc[19, "City"] = "Berlin"
    final.loc[20, "City"] = "Paris"
    final.loc[[12,13,14], "Type"] = "Mortgage 1bed"
    final.loc[[15,16,17], "Type"] = "Mortgage 3bed"
    return final 

final = all_data_together_cities(countries, cities_mortgages)

In [None]:
portugal_income = income_tidy[income_tidy["Country"] == "PT"]
portugal_income
fig = px.line(
    portugal_income,
    x='Year',          # x-axis: Year
    y='Income',        # y-axis: Income
    markers=True,      # Add markers at each data point
    title="Average income in Portugal (2019-2023)"  # Chart title
)

# Show the plot
fig.show()

In [None]:
# Concatenate the DataFrames into one long DataFrame
combined_data = pd.concat([housing_df, rental_df], axis=0)
df_portugal = combined_data[combined_data['Country'] == 'PT']
df_portugal = df_portugal.reset_index().rename(columns={'index': 'Type'})
df_portugal_melted = df_portugal.melt(id_vars=['Type', 'Country'], var_name='Year', value_name='Value')
fig = px.line(
    df_portugal_melted,
    x='Year',
    y='Value',
    color='Type',  # Different lines for Income and Rental
    markers=True,
    title="Housing vs Rental index in Portugal (2019-2023)"
)

fig.show()

In [None]:
df_lisbon = cities[cities["City"] == "Lisbon"]
df_portugal = countries[countries["Country"] == "PT"]

df_rent_salary = df_lisbon[df_lisbon['Type'].isin(['1 bed apartment (rent)', 'Av salary (after tax)'])]
df_rent_salary_melted = df_rent_salary.melt(id_vars=["Type", "City"], var_name="Year", value_name="Value")
fig = px.line(
    df_rent_salary_melted,
    x="Year",              
    y="Value",             
    color="Type",          # Different lines for different Types (rent and salary)
    markers=True,          
    title="Comparison between Rent and Salary in Lisbon (2019-2024)", # Title of the chart
    labels={"Value": "Amount (€)", "Year": "Year", "Type": "Type of Data"},
)

# Show the plot
fig.show()

In [None]:
cities_one_bed_apt = cities[cities['Type'] == "1 bed apartment (rent)"]
cities_one_bed_apt_melted = cities_one_bed_apt.melt(id_vars=["Type", "City"], var_name="Year", value_name="Value")

fig = px.line(
    cities_one_bed_apt_melted, 
    x="Year", 
    y="Value",
    color="City",       
    line_group="City",  
    facet_col="Type",      
    markers=True,
    title="Average rent for 1-bedroom apartment per month in Lisbon, Berlin, and Paris (2019-2024)",  # Title of the chart
    labels={"Value": "Amount (€)", "Year": "Year"},  # Axis labels
    color_discrete_map={
        "Lisbon": "pink",
        "Paris": "blue",
        "Berlin": "green"
    }
)

fig.show()

In [None]:
cities_salaries = cities[cities['Type'] == "Av salary (after tax)"]
cities_salaries_melted = cities_salaries.melt(id_vars=["Type", "City"], var_name="Year", value_name="Value")
fig = px.line(
    cities_salaries_melted, 
    x="Year", 
    y="Value",
    color="City",       
    line_group="City",  
    facet_col="Type",      
    markers=True,
    title="Average salaries per month in Lisbon, Berlin and Paris (2019-2024)",  # Title of the chart
    labels={"Value": "Amount (€)", "Year": "Year"},
    color_discrete_map={
        "Lisbon": "pink",
        "Paris": "blue",
        "Berlin": "green"
    }
)

fig.show()

In [None]:
three_bed_apt_rent = cities[cities['Type'] == "3 bed apartment (rent)"]
three_bed_apt_rent = three_bed_apt_rent.melt(id_vars=["Type", "City"], var_name="Year", value_name="Value")
fig = px.line(
    three_bed_apt_rent, 
    x="Year", 
    y="Value",
    color="City",       
    line_group="City",  
    facet_col="Type",      
    markers=True, 
    title="Average rent for 3bedroom apartment per month in Lisbon, Berlin and Paris (2019-2024)",  # Title of the chart
    labels={"Value": "Amount (€)", "Year": "Year"},
    color_discrete_map={
        "Lisbon": "pink",
        "Paris": "blue",
        "Berlin": "green"
    })

fig.show()

In [None]:
national_average = pd.concat([df_lisbon, df_portugal], ignore_index=True)    
national_average.rename(columns={"City": "City/Country"}, inplace=True) 
national_average.drop(columns=["Country"], inplace=True)
national_average.loc[[4,5,6,7,8], "City/Country"] = "Portugal"

salary_data = national_average[
    (national_average['Type'].isin(['Av salary (after tax)', 'Min wage (after tax)'])) &
    (national_average['City/Country'].isin(['Portugal', 'Lisbon']))]

salary_data_melted = salary_data.melt(id_vars=["Type", "City/Country"], var_name="Year", value_name="Value")
fig = px.line(
    salary_data_melted,
    x="Year",
    y="Value",
    color="Type",
    line_dash="City/Country",   # Different lines for Lisbon and Portugal
    markers=True,
    title="Average Salary and Minimum Wage in Lisbon with National Average (2019-2024)",
    labels={"Value": "Amount (€)", "Year": "Year", "Type": "Type of Income"}
)

fig.show()

In [None]:
df_rent_salary_all = cities[cities['Type'].isin(['1 bed apartment (rent)', 'Av salary (after tax)'])]
df_rent_salary_all_melted = df_rent_salary_all.melt(id_vars=["Type", "City"], var_name="Year", value_name="Value")

fig = px.line(
    df_rent_salary_all_melted,
    x="Year",
    y="Value",
    color="Type",               # Different lines for rent and salary
    line_group="Type",          # Keep each 'Type' line consistent
    markers=True,
    facet_col="City",           # Separate blocks for each City
    title="Comparison between Rent and Salary in Lisbon, Berlin, and Paris (2019-2024)",
    labels={"Value": "Amount (€)", "Year": "Year", "Type": "Type of Data"},
    color_discrete_map={
        "Lisbon": "pink",
        "Paris": "blue",
        "Berlin": "green"
    })

# Show the plot
fig.show()

In [None]:
df_rent_salary_countries = countries[countries['Type'].isin(['1 bed apartment (rent)', 'Min wage (after tax)'])]
df_rent_salary_countries_melted = df_rent_salary_countries.melt(id_vars=["Type", "Country"], var_name="Year", value_name="Value")
fig = px.line(
    df_rent_salary_countries_melted,
    x="Year",
    y="Value",
    color="Type",              
    line_group="Type",          
    markers=True,
    facet_col="Country",          
    title="Comparison between Rent and Minimum wage in Portugal, Germany and France (2019-2024)",
    labels={"Value": "Amount (€)", "Year": "Year", "Type": "Type of Data"},
    color_discrete_map={
        "PT": "pink",
        "FR": "blue",
        "DE": "green"
    })

# Show the plot
fig.show()

In [None]:
countries_salary = countries[countries['Type'].isin(['1 bed apartment (rent)', 'Min wage (after tax)'])]
countries_salary_melted = countries_salary.melt(id_vars=["Type", "Country"], var_name="Year", value_name="Value")

# Initialize the figure
fig = go.Figure()

# Define color for each country
country_colors = {
    "PT": "pink",
    "DE": "green",
    "FR": "aliceblue"
}

# Loop through each country to create traces for rent, minimum wage, and the shaded area
for country in countries_salary_melted['Country'].unique():
    # Filter data for the current country
    country_data = countries_salary_melted[countries_salary_melted['Country'] == country]
    
    # Separate rent and minimum wage data
    rent_data = country_data[country_data['Type'] == '1 bed apartment (rent)']
    min_wage_data = country_data[country_data['Type'] == 'Min wage (after tax)']
    
    # Add the minimum wage line
    fig.add_trace(go.Scatter(
        x=min_wage_data['Year'],
        y=min_wage_data['Value'],
        mode='lines+markers',
        name=f'{country} Min Wage',
        line=dict(color=country_colors[country]),
        legendgroup=country
    ))

    # Add the rent line with a shaded area below it to the minimum wage line
    fig.add_trace(go.Scatter(
        x=rent_data['Year'],
        y=rent_data['Value'],
        mode='lines+markers',
        name=f'{country} Rent',
        line=dict(color=country_colors[country], dash='dash'),
        fill='tonexty',  # Shading between rent and minimum wage
        legendgroup=country
    ))

# Update layout for title and labels
fig.update_layout(
    title="Comparison between Rent (1bed) and Minimum Wage in Portugal, Germany and France",
    xaxis_title="Year",
    yaxis_title="Amount (€)",
    legend_title="Type of Data"
)

# Show the figure
fig.show()