In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os

In [29]:
# Load the two CSV files into DataFrames
rental_inventory_df = pd.read_csv('../Data File Repository/Rental_Inventory_DF.csv')
yearly_median_rent_df = pd.read_csv('../Data File Repository/Yearly_Median_Rent_DF.csv')


In [None]:
# Rename columns in rental_inventory_df
rental_inventory_columns = {'Borough': 'Borough'}
rental_inventory_columns.update({
    str(year): f"{year} Total Rentals" for year in range(2010, 2024)
})
rental_inventory_df.rename(columns=rental_inventory_columns, inplace=True)


In [None]:
# Rename columns in yearly_median_rent_df
yearly_median_rent_columns = {'Borough': 'Borough'}
yearly_median_rent_columns.update({
    str(year): f"{year} Median Rent" for year in range(2010, 2024)
})
yearly_median_rent_df.rename(columns=yearly_median_rent_columns, inplace=True)


In [None]:
# Combine the two DataFrames into a single DataFrame on 'Borough'
combined_df = pd.merge(rental_inventory_df, yearly_median_rent_df, on='Borough')


In [None]:
# Define new column order
new_order = ['Borough']
for year in range(2010, 2024):
    new_order.extend([f"{year} Total Rentals", f"{year} Median Rent"])


In [None]:
# Reorder columns based on the new order
combined_df = combined_df[new_order]


In [None]:
# Print the resulting DataFrame for debugging purposes
print("combined_df:\n", combined_df.head())

In [None]:
# Save the DataFrame as a CSV file in the "Data File Repository" directory one level up
output_folder = '../Data File Repository'
output_file_path = os.path.join(output_folder, 'Combined_Rental_Median_Price_Inventory.csv')
os.makedirs(output_folder, exist_ok=True)  # Create the folder if it doesn't exist
combined_df.to_csv(output_file_path, index=False)


In [None]:
# Create two separate DataFrames for Total Rentals and Median Rent
melted_inventory = combined_df.melt(id_vars=['Borough'], value_vars=[f"{year} Total Rentals" for year in range(2010, 2024)], var_name='Year', value_name='Total Rentals')
melted_rent = combined_df.melt(id_vars=['Borough'], value_vars=[f"{year} Median Rent" for year in range(2010, 2024)], var_name='Year', value_name='Median Rent')


In [None]:
# Clean up the Median Rent column and convert to float
melted_rent['Median Rent'] = melted_rent['Median Rent'].replace('[\$,]', '', regex=True).astype(float)


In [None]:
# Convert 'Year' from '2010 Total Rentals' to '2010' and to int for plotting
melted_inventory['Year'] = melted_inventory['Year'].str.split(' ').str[0].astype(int)
melted_rent['Year'] = melted_rent['Year'].str.split(' ').str[0].astype(int)


In [None]:
# Create a figure and a single subplot
fig, ax1 = plt.subplots(figsize=(14, 7))


In [None]:
# Scatter plot for Total Rentals with manually set parameters
colors = ['blue', 'green', 'red', 'purple', 'orange']
point_size = 50
transparency = 0.8
border_width = 0.8
border_color = 'black'

for (borough, color) in zip(melted_inventory['Borough'].unique(), colors):
    ax1.scatter(melted_inventory[melted_inventory['Borough'] == borough]['Year'], 
                melted_inventory[melted_inventory['Borough'] == borough]['Total Rentals'], 
                s=point_size, c=color, alpha=transparency, linewidths=border_width, edgecolors=border_color, label=f"{borough} Total Rentals")


In [None]:
# Set Y-axis to logarithmic scale
ax1.set_yscale('log')
ax1.set_ylim(50, 1100000)  # Adjust the limits if necessary


In [None]:
# Create another axis for the Median Rent with manually set parameters
ax2 = ax1.twinx()
for (borough, color) in zip(melted_rent['Borough'].unique(), colors):
    ax2.scatter(melted_rent[melted_rent['Borough'] == borough]['Year'], 
                melted_rent[melted_rent['Borough'] == borough]['Median Rent'], 
                s=point_size, c=color, marker='x', alpha=transparency, linewidths=border_width, edgecolors=border_color, label=f"{borough} Median Rent")


In [None]:
# Labels and titles
ax1.set_xlabel('Year')
ax1.set_ylabel('Total Rentals (Log Scale)', color='blue')
ax2.set_ylabel('Median Rent', color='orange')
ax1.set_title('Total Rentals