In [18]:
import pandas as pd
import pickle
from statsmodels.tsa.arima.model import ARIMA
import numpy as np

# Part 1

In [19]:
# Reading a dataset
data = pd.read_csv('2020-2021month.csv')

- ## User Input Items

In [20]:
# Define the start time of the forecast and the number of months of the forecast
# The user can enter
location_id = 4
start_time = pd.to_datetime('2023-12-01')
num_months = 3

##  Drop_Off count

In [21]:
with open('model_DO.pkl', 'rb') as f:
    model_ARIMA = pickle.load(f)

# Get the model corresponding to the LocationID
model = model_ARIMA[location_id]

# Get the historical data needed for the model, either training data or other historical data
# In this example, we use the training data as historical data
location_data = data[data['LocationID'] == location_id]
historical_data = location_data['DropOff_count'].values

# Calculate the end of the forecast based on the start of the forecast and the number of months in the forecast
end_time = start_time + pd.DateOffset(months=num_months)

# Use the model to make predictions and get future data
predictions_DO = model.predict(start=len(historical_data), end=len(historical_data) + num_months - 1)

# Print prediction results
print(f'Predictions for LocationID {location_id} from {start_time} to {end_time}:')
for i, prediction in enumerate(predictions_DO):
    prediction_date = start_time + pd.DateOffset(months=i)
    print(f'{prediction_date}: {prediction}')


Predictions for LocationID 4 from 2023-12-01 00:00:00 to 2024-03-01 00:00:00:
2023-12-01 00:00:00: 13271.823352004822
2024-01-01 00:00:00: 13493.18042524825
2024-02-01 00:00:00: 13975.820082990753


## Pick_UP count

In [27]:
with open('model_PU.pkl', 'rb') as f:
    model_ARIMA = pickle.load(f)

# Get the model corresponding to the LocationID
model = model_ARIMA[location_id]

# Get the historical data needed for the model, either training data or other historical data
# In this example, we use the training data as historical data
location_data = data[data['LocationID'] == location_id]
historical_data = location_data['DropOff_count'].values

# Calculate the end of the forecast based on the start of the forecast and the number of months in the forecast
end_time = start_time + pd.DateOffset(months=num_months)

# Use the model to make predictions and get future data
predictions_PU = model.predict(start=len(historical_data), end=len(historical_data) + num_months - 1)

times = []
# Print prediction results
print(f'Predictions for LocationID {location_id} from {start_time} to {end_time}:')
for i, prediction in enumerate(predictions_PU):
    prediction_date = start_time + pd.DateOffset(months=i)
    times.append(prediction_date)
    print(f'{prediction_date}: {prediction}')


Predictions for LocationID 4 from 2023-12-01 00:00:00 to 2024-03-01 00:00:00:
2023-12-01 00:00:00: 3121.4483169757746
2024-01-01 00:00:00: 2959.856103029276
2024-02-01 00:00:00: 2877.354792466138


-  ## Use the predicted values above to determine the number of passengers

In [30]:
# Load the trained models from the .pkl file
with open('models_Passenger.pkl', 'rb') as f:
    models_dict = pickle.load(f)

# Get the model for the selected LocationID from the models_dict
model_for_location = models_dict[location_id]

# Prepare the exogenous features for prediction (future values of DropOff_count and PickUp_count)
future_times = times
future_features = pd.DataFrame({'DropOff_count': predictions_DO,  # Replace with the desired future values
                                'PickUp_count': predictions_PU})    # Replace with the desired future values

# Make predictions for the future years using the ARIMA model
predictions = model_for_location.predict(start=len(future_features), end=len(future_features) + len(future_times) - 1,
                                         exog=future_features)

# Print the predictions
print("Predicted 'passenger' values for LocationID", location_id, "for years", future_times, ":", predictions)


Predicted 'passenger' values for LocationID 4 for years [Timestamp('2023-12-01 00:00:00'), Timestamp('2024-01-01 00:00:00'), Timestamp('2024-02-01 00:00:00')] : [13544.00801531 11634.87827421 11165.56697285]


# Real Estate

- ## User Input Items

In [31]:
year = [2023, 2024, 2025]
location_id = 4

In [32]:


# Load the models from the .pkl file
with open('models_RealEstate.pkl', 'rb') as f:
    models_dict = pickle.load(f)



# Get the model for the specific LocationID
model_for_location = models_dict[location_id]

# Define the future years for which you want to make predictions
future_years = np.array(year).reshape(-1, 1)

# Make predictions for the future years
predictions = model_for_location.predict(future_years)

# Get the most recent 'FULLVAL' value for the given LocationID
most_recent_fullval = model_for_location.predict(future_years[0].reshape(-1, 1))

# Calculate the growth rate as a percentage
growth_rate = ((predictions - most_recent_fullval) / most_recent_fullval) * 100

# Print the predictions and growth rate
for i, year in enumerate(future_years.flatten()):
    print(f"Predicted FULLVAL for LocationID {location_id} in year {year}: {predictions[i]}")
    print(f"Growth Rate for LocationID {location_id} in year {year}: {growth_rate[i]:.2f}%\n")


Predicted FULLVAL for LocationID 4 in year 2023: 3527604623.666687
Growth Rate for LocationID 4 in year 2023: 0.00%

Predicted FULLVAL for LocationID 4 in year 2024: 3699292684.5
Growth Rate for LocationID 4 in year 2024: 4.87%

Predicted FULLVAL for LocationID 4 in year 2025: 3870980745.333313
Growth Rate for LocationID 4 in year 2025: 9.73%

