In [1]:
import pandas as pd
from pandas import read_csv
from pandas import datetime
from matplotlib import pyplot
from statsmodels.tsa.arima_model import ARIMA 
from pandas.tools.plotting import autocorrelation_plot
from tqdm import tqdm
from sklearn.metrics import mean_squared_error
#Install the missing packages by: python -m pip install <package name> or pip install <package name>
class CSPredict:
    def __init__(self,path):
        self.path=path
        
    def parser(self,x):
        return x
    def start(self):
        # read and parse data 
        print("#############################################################")
        print("###################CITY SCORE PREDICTION #############")
        print("#############################################################")
        print("-------------------------------------------------------------")
        city_score = read_csv(self.path, header=None,parse_dates=[2], squeeze=True, date_parser=self.parser)
        city_score = city_score.iloc[:, [2, 1]]
        city_score.columns = ['Date', 'Score']

        # set index as datetime to support plotting and Arima Model
        city_score.set_index('Date', inplace=True)
        # city_score = city_score.asfreq('T')

        print(city_score.head())

        city_score.plot(title='City Score Prediction')
        pyplot.tight_layout()
        pyplot.show()
        print("Printing the statistics")
        print("-----------------------------------------")
        self.stats(city_score)
    def stats(self,city_score):
        model = ARIMA(city_score, order=(5,1,0))
        model_fit = model.fit(disp=0)
        print(model_fit.summary())

        # plot residual errors
        residuals = pd.DataFrame(model_fit.resid)
        residuals.plot()
        pyplot.show()
        residuals.plot(kind='kde')
        pyplot.show()
        print(residuals.describe())
        print("-----------------------------------------")
        print("Please wait...... Training the testcases is in progress...")
        print("-----------------------------------------")
        self.autocorelaton(city_score)
    def autocorelaton(self,city_score):
        autocorrelation_plot(city_score)
        pyplot.show()
        #Will take more than 30 minutes to 1 hour to run as it will train the data.
        X = city_score.values
        size = int(len(X) * 0.66)
        train, test = X[0:size], X[size:len(X)]
        history = [x for x in train]
        predictions = list()
        print('Total Test cases: {}'.format(len(test)))
        for t in tqdm(range(len(test))):
            model = ARIMA(history, order=(5,1,0))
            model_fit = model.fit(disp=0)
            output = model_fit.forecast()
            yhat = output[0]
            predictions.append(yhat)
            obs = test[t]
            history.append(obs)
        self.finalize(test,predictions)
    def finalize(self,test,predictions):
        error = mean_squared_error(test, predictions)
        print('Test MSE: %.3f' % error)
        # plot
        pyplot.plot(test)
        pyplot.plot(predictions, color='red')
        pyplot.show()
        print("------------------------------------------")
        print("END")

CSPredict('cityscore.csv').start();


#############################################################
###################CITY SCORE PREDICTION #############
#############################################################
-------------------------------------------------------------


  from pandas.core import datetools


               Score
Date                
2016-06-28  0.916239
2016-04-13  0.964654
2016-04-15  0.945004
2016-07-22  0.906451
2017-10-02  0.913069


<matplotlib.figure.Figure at 0x202c51566d8>

Printing the statistics
-----------------------------------------


  if issubdtype(paramsdtype, float):
  elif issubdtype(paramsdtype, complex):


                             ARIMA Model Results                              
Dep. Variable:                D.Score   No. Observations:                  636
Model:                 ARIMA(5, 1, 0)   Log Likelihood                -187.747
Method:                       css-mle   S.D. of innovations              0.325
Date:                Wed, 27 Jun 2018   AIC                            389.495
Time:                        14:48:19   BIC                            420.681
Sample:                    04-13-2016   HQIC                           401.603
                         - 05-02-2018                                         
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
const         -1.448e-05      0.003     -0.004      0.997      -0.007       0.007
ar.L1.D.Score    -0.8733      0.039    -22.162      0.000      -0.951      -0.796
ar.L2.D.Score    -0.7287      0.049    -

  if issubdtype(paramsdtype, float):


<matplotlib.figure.Figure at 0x202c5f02b00>

<matplotlib.figure.Figure at 0x202c5f52588>

                0
count  636.000000
mean     0.000021
std      0.324956
min     -1.528120
25%     -0.025831
50%     -0.002564
75%      0.016689
max      4.000953
-----------------------------------------
Please wait...... Training the testcases is in progress...
-----------------------------------------




<matplotlib.figure.Figure at 0x202c5f16128>

Total Test cases: 217


100%|██████████| 217/217 [00:54<00:00,  3.95it/s]


Test MSE: 0.001


<matplotlib.figure.Figure at 0x202c5fb0358>

------------------------------------------
END
