In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA

# Load the data from the CSV file
data = pd.read_csv('ARIMAdata.csv')

# Extract relevant columns
crop_data = data[data['Crop'] == 'Tomato']
volume_production = crop_data['VolumeProduction']

# Convert the 'TimePeriod' column to numerical values
time_period_mapping = {'Quarter 1': 1, 'Quarter 2': 2, 'Quarter 3': 3, 'Quarter 4': 4, 'Semester 1': 5, 'Semester 2': 6, 'Annual': 7}
crop_data['TimePeriod'] = crop_data['TimePeriod'].map(time_period_mapping)

# Create a new column combining 'Year' and 'TimePeriod' for indexing
crop_data['Date'] = crop_data['Year'].astype(str) + crop_data['TimePeriod'].astype(str)
crop_data.set_index('Date', inplace=True)

# Convert the 'VolumeProduction' column to numeric
crop_data['VolumeProduction'] = pd.to_numeric(crop_data['VolumeProduction'], errors='coerce')

# Perform ARIMA forecasting
model = ARIMA(volume_production, order=(1, 1, 0))
model_fit = model.fit()

# Forecast future values
forecast_steps = 8
forecast = model_fit.forecast(steps=forecast_steps)

# Generate date index for the forecasted values
last_date = crop_data.index[-1]
forecast_dates = pd.date_range(start=last_date, periods=forecast_steps+1, freq='QS')[1:]

# Plot the actual data and the forecast
plt.figure(figsize=(10, 6))
plt.plot(crop_data.index, crop_data['VolumeProduction'], label='Actual Data')
plt.plot(forecast_dates, forecast, label='Forecast')
plt.xlabel('Date')
plt.ylabel('Volume Production')
plt.title('Tomato Volume Production Forecast')
plt.legend()
plt.xticks(rotation=45)
plt.grid(True)
plt.show()


ValueError: 'q' is a bad directive in format 'Quarter %q'