In [218]:
import pandas as pd
import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

from statsmodels.tsa.arima_model import ARIMA
import matplotlib.pyplot as plt
from pandas.plotting import autocorrelation_plot
from sklearn.metrics import mean_squared_error


In [204]:
def to_int(value):
    try:
        n = value.strip()
        n = n.replace(",",'')
        return float(n)
    except:
        return value
    
def to_pct(value):
    try:
        n = value.replace("%",'')
        return float(n)
    except:
        return value
    
    
def to_date(value):
    return pd.to_datetime(value)

In [205]:
nat_header = ['MAP Region',
 'Year',
 'Quarter',
 'Properties',
 'Units',
 'Average Occupancy',
 'Median Occupancy',
 'Annual Absorption',
 'Annual Inventory Growth',
 'Properties Under Construction',
 'Units Under Construction',
 'Construction vs. Inventory',
 'Average Rent',
 'Annual Rent Growth',
 'Units Started',
 'Rolling 4-Quarter Units Started',
 'Rolling 4-Quarter Starts vs. Inventory']

In [206]:
df = pd.read_csv('data/national.csv', names = nat_header, header = 0)

In [207]:
drop = ['MAP Region','Year']
df = df.drop(drop,axis = 1)

In [208]:
df.Quarter = df.Quarter.apply(lambda x: to_date(x))
df.Properties = df.Properties.apply(lambda x:to_int(x))
df.Units = df.Units.apply(lambda x: to_int(x))
df['Average Occupancy'] = df['Average Occupancy'].apply(lambda x: to_pct(x))
df['Median Occupancy'] = df['Median Occupancy'].apply(lambda x: to_pct(x))
df['Annual Absorption'] = df['Annual Absorption'].apply(lambda x: to_pct(x))
df['Annual Inventory Growth'] = df['Annual Inventory Growth'].apply(lambda x: to_pct(x))
df['Properties Under Construction'] = df['Properties Under Construction'].apply(lambda x: to_int(x))
df['Units Under Construction'] = df['Units Under Construction'].apply(lambda x: to_int(x))
df['Construction vs. Inventory'] = df['Construction vs. Inventory'].apply(lambda x: to_pct(x))
df['Average Rent'] = df['Average Rent'].apply(lambda x: to_int(x))
df['Annual Rent Growth'] = df['Annual Rent Growth'].apply(lambda x: to_pct(x))
df['Units Started'] = df['Units Started'].apply(lambda x: to_int(x))
df['Rolling 4-Quarter Units Started'] = df['Rolling 4-Quarter Units Started'].apply(lambda x: to_int(x))
df['Rolling 4-Quarter Starts vs. Inventory'] = df['Rolling 4-Quarter Starts vs. Inventory'].apply(lambda x: to_pct(x))

In [224]:
df

Unnamed: 0,Quarter,Properties,Units,Average Occupancy,Median Occupancy,Annual Absorption,Annual Inventory Growth,Properties Under Construction,Units Under Construction,Construction vs. Inventory,Average Rent,Annual Rent Growth,Units Started,Rolling 4-Quarter Units Started,Rolling 4-Quarter Starts vs. Inventory
0,2005-10-01,3672.0,459477.0,91.3,94.6,,,170,19259.0,4.2,2800.0,,,,
1,2006-01-01,3703.0,464518.0,91.0,94.6,,,163,17339.0,3.7,2836.0,,2836.0,,
2,2006-04-01,3717.0,466392.0,91.4,94.8,,,160,17553.0,3.8,2865.0,,2598.0,,
3,2006-07-01,3734.0,468860.0,91.3,94.6,,,180,19026.0,4.1,2878.0,,4007.0,,
4,2006-10-01,3745.0,470864.0,91.3,94.8,2.5,2.5,192,21057.0,4.5,2908.0,3.6,4331.0,13772.0,2.9
5,2007-01-01,3769.0,474182.0,91.3,94.9,2.5,2.1,178,21234.0,4.5,2952.0,3.5,3846.0,14782.0,3.1
6,2007-04-01,3779.0,476834.0,90.9,94.3,1.8,2.2,180,21310.0,4.5,2977.0,3.8,3221.0,15405.0,3.2
7,2007-07-01,3790.0,478994.0,90.8,94.2,1.6,2.2,183,21583.0,4.5,2994.0,3.7,2689.0,14087.0,2.9
8,2007-10-01,3810.0,482533.0,90.5,93.8,1.5,2.5,193,22590.0,4.7,3019.0,3.8,5211.0,14967.0,3.1
9,2008-01-01,3835.0,487343.0,89.8,93.0,1.0,2.8,201,22668.0,4.7,3069.0,3.6,4903.0,16024.0,3.3


In [210]:
cols = df.columns.to_list()

In [211]:
test = cols[0:1]

In [212]:
for each in cols:
    fig = px.line(df, x= 'Quarter',y = each)
    fig.update_layout(
        title = {
            'text': "{} vs Time".format(each),
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top',
        },
        xaxis_title = "Time",
        yaxis_title = each,
    )
    fig.show()

In [233]:

data = df['Average Occupancy'].values
# fit model
model = ARIMA(data, order=(1, 1, 1))
model_fit = model.fit(disp=False)
# make prediction
yhat = model_fit.predict(56, 59, typ='levels')
print(yhat)

[88.23057486 88.09990962 88.05751286 87.99354783]


In [227]:
len(data)

56