In [None]:
import pandas as pd
import plotly.express as px  # For Plotly Express, which is easy-to-use for quick visualizations
import plotly.graph_objects as go 

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

HOUSING RENTAL INCOME FORMATTING
---

In [None]:
housing = pd.read_excel("../data/raw/week_3_project_data.xlsx", sheet_name=0)
housing.index = ['Housing'] * len(housing)
housing

In [None]:
rental = pd.read_excel("../data/raw/week_3_project_data.xlsx", sheet_name=1)
rental.index = ['Rental'] * len(rental)
rental

In [None]:
income = pd.read_excel("../data/raw/week_3_project_data.xlsx", sheet_name=2)
income.index = ['Income'] * len(income)
income

In [None]:
final_df = pd.concat([housing, rental, income], ignore_index=True)
index_labels = ['Housing'] * len(housing) + ['Rental'] * len(rental) + ['Income'] * len(income)
final_df.index = index_labels

In [None]:
final_df.rename(columns={"Unnamed: 0": "Country"}, inplace=True)
final_df

In [None]:
income_df = final_df[final_df.index == 'Income']
housing_df = final_df[final_df.index == "Housing"]
rental_df = final_df[final_df.index == "Rental"]

In [None]:
income_tidy = income_df.melt(id_vars="Country", var_name="Year", value_name="Income")
housing_tidy = housing_df.melt(id_vars="Country", var_name="Year", value_name="Housing")
rental_tidy = rental_df.melt(id_vars="Country", var_name="Year", value_name="Rental")

income_tidy

HOUSING RENTAL INCOME GRAPHS
---

In [None]:
# Assuming income_tidy, housing_tidy, and rental_tidy are your original DataFrames

# Add a new column to each DataFrame to indicate the type of data
income_tidy['Type'] = 'Income'
housing_tidy['Type'] = 'Housing'
rental_tidy['Type'] = 'Rental'

# Concatenate the DataFrames into one long DataFrame
combined_data = pd.concat([income_tidy, housing_tidy, rental_tidy], axis=0)

# Create a figure and three subplots (axes), one for each graph
fig, axes = plt.subplots(1, 3, figsize=(15, 5))  # 1 row, 3 columns

# Plot the Income graph
sns.lineplot(data=income_tidy, x="Year", y="Income", hue="Country", marker="*", ax=axes[0])
axes[0].set_title('Income Over Time')
axes[0].set_xlabel('Year')
axes[0].set_ylabel('Income')
axes[0].legend(title='Country')

# Plot the Housing graph
sns.lineplot(data=housing_tidy, x="Year", y="Housing", hue="Country", marker="*", ax=axes[1])
axes[1].set_title('House Price Index Over Time')
axes[1].set_xlabel('Year')
axes[1].set_ylabel('HPI')
axes[1].legend(title='Country')

# Plot the Rental graph
sns.lineplot(data=rental_tidy, x="Year", y="Rental", hue="Country", marker="*", ax=axes[2])
axes[2].set_title('Rental Price Index Over Time')
axes[2].set_xlabel('Year')
axes[2].set_ylabel('RPI')
axes[2].legend(title='Country')

# Adjust the x-axis to only show whole years (no 0.5 steps)
for ax in axes:
    # Set x-axis ticks to only whole numbers (years)
    ax.set_xticks(range(int(min(combined_data['Year'])), int(max(combined_data['Year'])) + 1))

# Adjust the layout for better spacing
plt.tight_layout()

# Show the plot
plt.show()

GERMANY, BERLIN
---

In [None]:
df_Cities = pd.read_excel("../data/raw/numbeo_stats.xlsx")
df_Cities.rename(columns = {"Unnamed: 0" : "Category"}, inplace = True)
# Fill missing 'Category' values using forward fill
df_Cities['Category'] = df_Cities["Category"].fillna(method = "ffill")

df_berlin = df_Cities[df_Cities['City'] == 'Berlin']
df_berlin = df_berlin.drop(7)

# Display the updated DataFrame
df_berlin

df_Countries = pd.read_excel("../data/raw/numbeo_stats.xlsx", sheet_name = 1)
df_Countries.rename(columns = {"Unnamed: 0" : "Category"}, inplace = True)
# Fill missing 'Category' values using forward fill
df_Countries['Category'] = df_Countries['Category'].fillna(method='ffill')
df_Countries.columns = df_Countries.columns.str.strip()

df_Germany = df_Countries[df_Countries["Country"] == 'DE']
df_Germany = df_Germany.drop(7)
# Display the updated DataFrame

print("df_Germany columns:", df_Germany.columns)
print("final_df columns:", final_df.columns)





In [None]:


# Print column names for debugging
print("df_Germany columns:", df_Germany.columns)
print("final_df columns:", final_df.columns)

# Ensure that the years columns are in string format
years = ['2019', '2020', '2021', '2022', '2023']

# 1. Prepare the data for the plots

# First plot: Income and Minimum Wage (from df_Germany)
income_de = df_Germany.loc[df_Germany['Category'] == 'Av salary (after tax)', years].values.flatten()
min_wage_de = df_Germany.loc[df_Germany['Category'] == 'Min wage (after tax)', years].values.flatten()

# Second plot: House Price Index (from final_df, for Germany)
housing_de = final_df.loc[final_df['Country'] == 'DE', years].values.flatten()

# Third plot: Rental Price Index (from final_df, for Germany)
rental_de = final_df.loc[final_df['Country'] == 'DE', years].values.flatten()

# Clean the column names if necessary
df_Germany.columns = df_Germany.columns.str.strip().astype(str)
final_df.columns = final_df.columns.str.strip().astype(str)

# After cleaning the columns, try running the plot code again


# 2. Normalize data for index (to make them comparable across years)
def normalize_data(data):
    return (data / data[0]) * 100  # Normalize to the first year (2019)

income_de_index = normalize_data(income_de)
min_wage_de_index = normalize_data(min_wage_de)
housing_de_index = normalize_data(housing_de)
rental_de_index = normalize_data(rental_de)

# 3. Create the plots
fig, axes = plt.subplots(1, 3, figsize=(18, 6), sharey=True)

# First plot: Income and Minimum Wage
axes[0].plot(years, income_de_index, label='Income', color='blue', marker='o')
axes[0].plot(years, min_wage_de_index, label='Min Wage', color='orange', marker='o')
axes[0].set_title('Income and Minimum Wage Index')
axes[0].set_xlabel('Year')
axes[0].set_ylabel('Index (Base = 100 in 2019)')
axes[0].legend()

# Second plot: House Price Index
axes[1].plot(years, housing_de_index, label='House Price Index', color='green', marker='o')
axes[1].set_title('House Price Index')
axes[1].set_xlabel('Year')
axes[1].set_ylabel('Index (Base = 100 in 2019)')

# Third plot: Rental Price Index
axes[2].plot(years, rental_de_index, label='Rental Price Index', color='red', marker='o')
axes[2].set_title('Rental Price Index')
axes[2].set_xlabel('Year')
axes[2].set_ylabel('Index (Base = 100 in 2019)')

# Show the plots
plt.tight_layout()
plt.show()
