In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
from itertools import product

path = ''
fdot = pd.read_csv(path)

# SAMPLE FOR ROADWAY
def create_sample_dataframes(n, fdot):
    for i in range(n):
        samp = pd.DataFrame(columns=['YEAR', 'CRK'])
        samp['YEAR'] = range(1989, 2020)

        x = fdot.iloc[i, 1:].tolist()
        samp['CRK'] = x

        # Assign the dataframe to a variable with a dynamically generated name
        globals()[f'samp{i}'] = samp

n = 6  # Number of sample dataframes
create_sample_dataframes(n, fdot)

# Define a function to find the best ARIMA order and plot the results
def find_best_arima_and_plot(samp_df, samp_name, plot_position):
    train_data = samp_df['CRK'][0:26]
    test_data = samp_df['CRK'][26:]

    # Define the range of values for p, d, and q parameters
    p_values = range(0, 5)  # Adjust as needed
    d_values = range(0, 5)  # Adjust as needed
    q_values = range(0, 5)  # Adjust as needed

    best_order = None
    best_mse = float('inf')

    # Perform a grid search to find the best ARIMA order
    for p, d, q in product(p_values, d_values, q_values):
        try:
            model = sm.tsa.ARIMA(train_data, order=(p, d, q))
            results = model.fit()

            # Make predictions for the test period
            start = len(train_data)
            end = len(train_data) + len(test_data) - 1
            predictions = results.predict(start=start, end=end, typ='levels')

            # Calculate MSE
            mse = ((test_data - predictions) ** 2).mean()

            # Check if this combination has a lower MSE
            if mse < best_mse:
                best_order = (p, d, q)
                best_mse = mse

        except:
            continue

    # Fit the ARIMA model with the best order
    model = sm.tsa.ARIMA(samp_df['CRK'], order=best_order)
    results = model.fit()

    # Make predictions for the entire dataset with the best order
    start = 0
    end = len(samp_df) - 1
    predictions = results.predict(start=start, end=end, typ='levels')

    # Append predictions to the original DataFrame
    samp_df['Predicted_Ratings'] = predictions

    # Calculate the MSE with the best order
    mse = ((samp_df['CRK'] - predictions) ** 2).mean()
    print(f"{samp_name} - Best ARIMA Order: {best_order}")
    print(f"{samp_name} - Mean Squared Error with Best Order: {mse}")

    # Plot the actual and predicted data
    plt.subplot(2, 3, plot_position)
    plt.plot(samp_df['YEAR'][26:], samp_df['CRK'][26:], label='Actual Ratings', marker='o')
    plt.plot(samp_df['YEAR'][26:], predictions[26:], label='Predicted Ratings', marker='x', color='red')

    plt.title(f'{samp_name} - Actual vs. Predicted Ratings')
    plt.xlabel('Year')
    plt.ylabel('Ratings')
    plt.legend()
    plt.grid()

    # Set x-axis ticks to show years without decimal places
    plt.xticks(samp_df['YEAR'][26:], [int(year) for year in samp_df['YEAR'][26:]])

# Create a 2x3 subplot grid
plt.figure(figsize=(16, 9))
for i in range(6):  # Adjust the range to include samp0 to samp5
    samp_df = globals()[f'samp{i}']
    samp_name = f'samp{i}'
    plot_position = i + 1  # Adjust plot position based on 1-based indexing
    find_best_arima_and_plot(samp_df, samp_name, plot_position)

plt.tight_layout()
plt.show()