In [1]:
# install/load dependencies
!pip install pystan~=2.14
!pip install fbprophet
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from fbprophet import Prophet

Collecting pystan~=2.14
  Downloading pystan-2.19.1.1-cp37-cp37m-manylinux1_x86_64.whl (67.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 MB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pystan
Successfully installed pystan-2.19.1.1
[0mCollecting fbprophet
  Downloading fbprophet-0.7.1.tar.gz (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.0/64.0 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l- done
Collecting cmdstanpy==0.9.5
  Downloading cmdstanpy-0.9.5-py3-none-any.whl (37 kB)
Building wheels for collected packages: fbprophet
  Building wheel for fbprophet (setup.py) ... [?25l- \ done
[?25h  Created wheel for fbprophet: filename=fbprophet-0.7.1-py3-none-any.whl size=9434399 sha256=753246330d97e6cf4f84531a2eb9a992e7d057c144f4b7b8f6fe391e6a6473e5
  Stored in directory: /root/.cache/pip/wheels/fc/66/ad/951ba2b4576f7364afe7aeea8b

In [2]:
# load dataset and take a look
dataset = pd.read_csv("/kaggle/input/tomato-daily-prices/Tomato.csv")
dataset.head()

Unnamed: 0,Date,Unit,Minimum,Maximum,Average,Market
0,2013-06-16,Kg,26,32,29.0,Tomato
1,2013-06-17,Kg,20,25,22.5,Tomato
2,2013-06-18,Kg,22,26,24.0,Tomato
3,2013-06-19,Kg,24,28,26.0,Tomato
4,2013-06-20,Kg,22,26,24.0,Tomato


In [3]:
# changing features name to lowercase
dataset.columns = dataset.columns.str.lower()

In [4]:
# get general information 
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2741 entries, 0 to 2740
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   date     2741 non-null   object 
 1   unit     2741 non-null   object 
 2   minimum  2741 non-null   int64  
 3   maximum  2741 non-null   int64  
 4   average  2741 non-null   float64
 5   market   2741 non-null   object 
dtypes: float64(1), int64(2), object(3)
memory usage: 128.6+ KB


In [5]:
# get statistical information
dataset.describe()

Unnamed: 0,minimum,maximum,average
count,2741.0,2741.0,2741.0
mean,35.089748,41.281284,38.185516
std,16.648425,17.364135,16.970949
min,8.0,12.0,10.0
25%,22.0,30.0,25.0
50%,30.0,38.0,35.0
75%,45.0,50.0,47.5
max,115.0,120.0,117.5


In [6]:
# check null values
dataset.isna().sum()

# result - no null records present

date       0
unit       0
minimum    0
maximum    0
average    0
market     0
dtype: int64

In [7]:
# check duplicate records
dataset.duplicated().sum()

# result - no duplicate records found

0

In [8]:
# convert date column from object to datetime object
dataset.date = pd.to_datetime(dataset.date)

In [9]:
# feature segregation
x_min = dataset[['date', 'minimum']]
x_min.columns = ['ds', 'y']
x_max = dataset[['date', 'maximum']]
x_max.columns = ['ds', 'y']

In [10]:
# create prophet model
# for minimum price
model_min = Prophet()
model_min.add_seasonality(name = 'monthly', period = 30.5, fourier_order = 5)
model_min.fit(x_min)

# for maximum price
model_max = Prophet()
model_max.add_seasonality(name = 'monthly', period = 30.5, fourier_order = 5)
model_max.fit(x_max)

Initial log joint probability = -33.3997
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       4401.94   0.000415882       85.3299           1           1      113   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     117       4402.72   0.000376785       113.854   2.937e-06       0.001      169  LS failed, Hessian reset 
     199       4404.21     0.0103551       84.9582           1           1      273   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     204       4405.19    0.00784704       436.878   6.579e-05       0.001      327  LS failed, Hessian reset 
     299       4407.03   0.000492758       67.6054           1           1      444   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     348       4407.06   6.71768e-06       80.8121   1.103e-07       0.001      548  LS failed, Hessian rese

<fbprophet.forecaster.Prophet at 0x7fd0e5d1c090>

In [11]:
future_dates = model_min.make_future_dataframe(periods=10)
forecast_min= model_min.predict(future_dates)
future_dates = model_max.make_future_dataframe(periods=10)
forecast_max= model_max.predict(future_dates)

In [12]:
# take necessary columns and create result dataframe
next_10_day_min = forecast_min[['ds', 'yhat']]
next_10_day_min.columns = ['date', 'minimum']
next_10_day_max = forecast_max[['ds', 'yhat']]
next_10_day_max.columns = ['date', 'maximum']

next_10_days = pd.merge(next_10_day_min, next_10_day_max, on = 'date')
next_10_days.head(10)

Unnamed: 0,date,minimum,maximum
0,2013-06-16,29.458167,34.181592
1,2013-06-17,28.270112,33.103064
2,2013-06-18,27.034868,31.925124
3,2013-06-19,26.704897,31.398944
4,2013-06-20,25.779824,30.491236
5,2013-06-21,25.705744,30.332745
6,2013-06-25,24.284865,29.024184
7,2013-06-26,24.84761,29.362481
8,2013-06-27,24.271802,28.853426
9,2013-06-28,24.12522,28.673612


In [13]:
# ceil and floor 
import math
next_10_days.minimum = next_10_days.minimum.apply(lambda x: math.floor(x))
next_10_days.maximum = next_10_days.maximum.apply(lambda x: math.ceil(x))
next_10_days['average'] = next_10_days[['minimum', 'maximum']].mean(axis = 1)
next_10_days['unit'] = 'Kg'
next_10_days['market'] = 'Tomato'

In [14]:
# change format of columns
next_10_days = next_10_days[["date", "unit", "minimum", "maximum", "average", "market"]]
next_10_days.columns = next_10_days.columns.str.title()

In [15]:
# result
next_10_days.head(10)

Unnamed: 0,Date,Unit,Minimum,Maximum,Average,Market
0,2013-06-16,Kg,29,35,32.0,Tomato
1,2013-06-17,Kg,28,34,31.0,Tomato
2,2013-06-18,Kg,27,32,29.5,Tomato
3,2013-06-19,Kg,26,32,29.0,Tomato
4,2013-06-20,Kg,25,31,28.0,Tomato
5,2013-06-21,Kg,25,31,28.0,Tomato
6,2013-06-25,Kg,24,30,27.0,Tomato
7,2013-06-26,Kg,24,30,27.0,Tomato
8,2013-06-27,Kg,24,29,26.5,Tomato
9,2013-06-28,Kg,24,29,26.5,Tomato


In [16]:
# done