In [None]:
import pandas as pd
import plotly.express as px  # For Plotly Express, which is easy-to-use for quick visualizations
import plotly.graph_objects as go 

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

HOUSING RENTAL INCOME FORMATTING
---

In [None]:
housing = pd.read_excel("../data/raw/week_3_project_data.xlsx", sheet_name=0)
housing.index = ['Housing'] * len(housing)
housing

In [None]:
rental = pd.read_excel("../data/raw/week_3_project_data.xlsx", sheet_name=1)
rental.index = ['Rental'] * len(rental)
rental

In [None]:
income = pd.read_excel("../data/raw/week_3_project_data.xlsx", sheet_name=2)
income.index = ['Income'] * len(income)
income

In [None]:
final_df = pd.concat([housing, rental, income], ignore_index=True)
index_labels = ['Housing'] * len(housing) + ['Rental'] * len(rental) + ['Income'] * len(income)
final_df.index = index_labels

In [None]:
final_df.rename(columns={"Unnamed: 0": "Country"}, inplace=True)
final_df

In [None]:
income_df = final_df[final_df.index == 'Income']
housing_df = final_df[final_df.index == "Housing"]
rental_df = final_df[final_df.index == "Rental"]

In [None]:
income_tidy = income_df.melt(id_vars="Country", var_name="Year", value_name="Income")
housing_tidy = housing_df.melt(id_vars="Country", var_name="Year", value_name="Housing")
rental_tidy = rental_df.melt(id_vars="Country", var_name="Year", value_name="Rental")


HOUSING RENTAL INCOME GRAPHS
---

In [None]:
# Assuming income_tidy, housing_tidy, and rental_tidy are your original DataFrames

# Add a new column to each DataFrame to indicate the type of data
income_tidy['Type'] = 'Income'
housing_tidy['Type'] = 'Housing'
rental_tidy['Type'] = 'Rental'

# Concatenate the DataFrames into one long DataFrame
combined_data = pd.concat([income_tidy, housing_tidy, rental_tidy], axis=0)

# Create a figure and three subplots (axes), one for each graph
fig, axes = plt.subplots(1, 3, figsize=(15, 5))  # 1 row, 3 columns

# Plot the Income graph
sns.lineplot(data=income_tidy, x="Year", y="Income", hue="Country", marker="*", ax=axes[0])
axes[0].set_title('Income Over Time')
axes[0].set_xlabel('Year')
axes[0].set_ylabel('Income')
axes[0].legend(title='Country')

# Plot the Housing graph
sns.lineplot(data=housing_tidy, x="Year", y="Housing", hue="Country", marker="*", ax=axes[1])
axes[1].set_title('House Price Index Over Time')
axes[1].set_xlabel('Year')
axes[1].set_ylabel('HPI')
axes[1].legend(title='Country')

# Plot the Rental graph
sns.lineplot(data=rental_tidy, x="Year", y="Rental", hue="Country", marker="*", ax=axes[2])
axes[2].set_title('Rental Price Index Over Time')
axes[2].set_xlabel('Year')
axes[2].set_ylabel('RPI')
axes[2].legend(title='Country')

# Adjust the x-axis to only show whole years (no 0.5 steps)
for ax in axes:
    # Set x-axis ticks to only whole numbers (years)
    ax.set_xticks(range(int(min(combined_data['Year'])), int(max(combined_data['Year'])) + 1))

# Adjust the layout for better spacing
plt.tight_layout()

# Show the plot
plt.show()

GERMANY, BERLIN
---

In [None]:
df_Cities = pd.read_excel("../data/raw/numbeo_stats.xlsx")
df_Cities.rename(columns = {"Unnamed: 0" : "Category"}, inplace = True)
# Fill missing 'Category' values using forward fill
df_Cities['Category'] = df_Cities['Category'].fillna(method='ffill')

df_berlin = df_Cities[df_Cities['City'] == 'Berlin']


# Display the updated DataFrame
df_berlin




# Melt the DataFrame to long format for easier plotting
df_long = df_numbeo.melt(id_vars=['Category', 'City'], var_name='Year', value_name='Rent Price')

# Convert 'Year' to numeric
df_long['Year'] = pd.to_numeric(df_long['Year'], errors='coerce')

# Line plot to show trends over time for each Category and City
plt.figure(figsize=(10, 6))
sns.lineplot(data=df_long, x="Year", y="Rent Price", hue="Category", style="City", markers=True)
plt.title('Rent Price Trends Over Time')
plt.xlabel('Year')
plt.ylabel('Rent Price')
plt.legend(title='Category', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()


In [None]:
# Filter the DataFrame for Berlin
df_berlin_long = df_berlin.melt(id_vars=['Category', 'City'], var_name='Year', value_name='Cost in Euros')

# Convert 'Year' to numeric
df_berlin_long['Year'] = pd.to_numeric(df_berlin_long['Year'], errors='coerce')

# Line plot to show trends over time for each Category in Berlin
plt.figure(figsize=(10, 6))
sns.lineplot(data=df_berlin_long, x="Year", y="Cost in Euros", hue="Category", markers=True)
plt.title('Rent Price Trends Over Time (Berlin)')
plt.xlabel('Year')
plt.ylabel('Cost in Euros')
plt.legend(title='Category', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()


"""
mortgage cost instead of price per sqm

"""

GERMANY
---

In [None]:
df_Countries = pd.read_excel("../data/raw/numbeo_stats.xlsx", sheet_name = 1)
df_Countries.rename(columns = {"Unnamed: 0" : "Category"}, inplace = True)
# Fill missing 'Category' values using forward fill
df_Countries['Category'] = df_Countries['Category'].fillna(method='ffill')
df_Countries.columns = df_Countries.columns.str.strip()

df_Germany = df_Countries[df_Countries["Country"] == 'DE']


# Display the updated DataFrame
df_Germany


In [None]:
df_Germany_long = df_Germany.melt(id_vars=['Category', 'Country'], var_name='Year', value_name='Rent Price')

# Convert 'Year' to numeric (to ensure it's treated as a number)
df_Germany_long['Year'] = pd.to_numeric(df_Germany_long['Year'], errors='coerce')

# Replace the 'Country' column value from 'DE' to 'Germany' if needed
# For now, we keep it as 'DE' for simplicity.
df_Germany_long['Country'] = df_Germany_long['Country'].replace({'DE': 'Germany'})

# Line plot to show trends over time for each Category in Germany
plt.figure(figsize=(10, 6))
sns.lineplot(data=df_Germany_long, x="Year", y="Rent Price", hue="Category", markers=True)
plt.title('Rent Price Trends Over Time (Germany)')
plt.xlabel('Year')
plt.ylabel('Rent Price')
plt.legend(title='Category', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()


In [None]:



# Melt Berlin's data to long format
df_berlin_long = df_berlin.melt(id_vars=['Category', 'City'], var_name='Year', value_name='Rent Price')
df_berlin_long['Year'] = pd.to_numeric(df_berlin_long['Year'], errors='coerce')

# Filter Germany's data
df_germany = df_Countries[df_Countries['Country'] == 'DE']

# Melt Germany's data to long format
df_germany_long = df_germany.melt(id_vars=['Category', 'Country'], var_name='Year', value_name='Rent Price')
df_germany_long['Year'] = pd.to_numeric(df_germany_long['Year'], errors='coerce')

# Create a side-by-side graph using matplotlib
fig, axes = plt.subplots(1, 2, figsize=(16, 6), sharey=True)

# Plot for Berlin
sns.lineplot(data=df_berlin_long, x="Year", y="Rent Price", hue="Category", markers=True, ax=axes[0])
axes[0].set_title('Rent Price Trends Over Time (Berlin)')
axes[0].set_xlabel('Year')
axes[0].set_ylabel('Rent Price')

# Plot for Germany
sns.lineplot(data=df_germany_long, x="Year", y="Rent Price", hue="Category", markers=True, ax=axes[1])
axes[1].set_title('Rent Price Trends Over Time (Germany)')
axes[1].set_xlabel('Year')

# Adjust layout for better spacing
plt.tight_layout()

# Display t


In [None]:
# not working so far

# Clean column names and fill missing 'Category' values
df_Countries.rename(columns={"Country": "Region"}, inplace=True)
df_Countries['Category'] = df_Countries['Category'].fillna(method='ffill')

# Switch 'Region' back to 'Country'
df_Countries.rename(columns={"Region": "Country"}, inplace=True)

# Filter Berlin's data
df_berlin = df_Countries[df_Countries['City'] == 'Berlin']

# Filter out the "Buy apartment" category from Berlin's data
df_berlin = df_berlin[df_berlin['Category'] != 'Buy apartment ( per m2 in city center)']

# Melt Berlin's data to long format
df_berlin_long = df_berlin.melt(id_vars=['Category', 'City'], var_name='Year', value_name='Rent Price')
df_berlin_long['Year'] = pd.to_numeric(df_berlin_long['Year'], errors='coerce')

# Filter Germany's data
df_germany = df_Countries[df_Countries['Country'] == 'DE']

# Filter out the "Buy apartment" category from Germany's data
df_germany = df_germany[df_germany['Category'] != 'Buy apartment ( per m2 in city center)']

# Melt Germany's data to long format
df_germany_long = df_germany.melt(id_vars=['Category', 'Country'], var_name='Year', value_name='Rent Price')
df_germany_long['Year'] = pd.to_numeric(df_germany_long['Year'], errors='coerce')

# Create a side-by-side graph using matplotlib
fig, axes = plt.subplots(1, 2, figsize=(16, 6), sharey=True)

# Plot for Berlin
sns.lineplot(data=df_berlin_long, x="Year", y="Rent Price", hue="Category", markers=True, ax=axes[0])
axes[0].set_title('Rent Price Trends Over Time (Berlin)')
axes[0].set_xlabel('Year')
axes[0].set_ylabel('Rent Price')

# Plot for Germany
sns.lineplot(data=df_germany_long, x="Year", y="Rent Price", hue="Category", markers=True, ax=axes[1])
axes[1].set_title('Rent Price Trends Over Time (Germany)')
axes[1].set_xlabel('Year')

# Adjust layout for better spacing
plt.tight_layout()

# Display the plots
plt.show()
