In [7]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np
import os

In [8]:
# Check if output directory exists, if not, create it
output_directory = '../Charts and Graphs Output'
os.makedirs(output_directory, exist_ok=True)


In [9]:
# Load dataset
df = pd.read_csv('../Data File Repository/Combined_Rental_Median_Price_Inventory.csv')


In [10]:
# Preprocess data: Melt and create a year column
df = df.melt(id_vars=['Borough'], var_name='Year_Rentals', value_name='Value')
df[['Year', 'Type']] = df['Year_Rentals'].str.extract('(\d{4}) (Total Rentals|Median Rent)')
df.drop('Year_Rentals', axis=1, inplace=True)
df['Year'] = pd.to_numeric(df['Year'])
df['Value'] = pd.to_numeric(df['Value'].replace('[\$,]', '', regex=True), errors='coerce')


In [11]:
# Separate into a DataFrame for Median Rent
df_median = df[df['Type'] == 'Median Rent'].drop('Type', axis=1)


In [14]:
# Iterate over each borough to create separate charts for Median Rent only
for borough in df['Borough'].unique():
    # Start a new figure for each borough
    fig, ax = plt.subplots(figsize=(10, 6))

    # Process data for Median Rent up to 2019 to create a trend projection
    borough_median = df_median[df_median['Borough'] == borough]
    X_median = borough_median[borough_median['Year'] <= 2019][['Year']]
    y_median = borough_median[borough_median['Year'] <= 2019]['Value']
    model_median = LinearRegression().fit(X_median, y_median)
    X_median_predict = pd.DataFrame({'Year': range(2010, 2020)})  # Predict from 2010 to 2019 for trend line
    y_median_predict = model_median.predict(X_median_predict)

    # Plotting the trend line for Median Rent
    ax.plot(X_median_predict['Year'], y_median_predict, color='purple', linestyle='-', linewidth=2, label='Trend (Median Rent)')

    # Plot all actual data for Median Rent
    ax.scatter(borough_median['Year'], borough_median['Value'], color='magenta', label='Actual (Median Rent)')

    # Setting title and labels for the plot
    ax.set_title(f'{borough} Median Rent Data (Projection and Actual)')
    ax.set_xlabel('Year')
    ax.set_ylabel('Values')
    ax.legend()
    
    # Save the figure to the specified directory
    plt.savefig(f"{output_directory}/{borough}_median_rent_projection.png")

    # Close the figure to free memory
    plt.close(fig)