In [1]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import pymongo
from pymongo import MongoClient

In [2]:
df = pd.read_csv('final_unemp.csv')
df

Unnamed: 0,CBSA,Metropolitan_Area,2015,2016,2017,2018,2019
0,14020,"Bloomington, IN",5.1,4.7,4.1,3.7,3.4
1,10420,"Akron, OH",5.0,4.6,4.1,3.6,3.4
2,14860,"Bridgeport-Stamford-Norwalk, CT Metropolitan N...",5.6,5.1,4.6,4.2,3.8
3,16060,"Carbondale-Marion, IL",5.9,5.4,4.8,4.3,4.1
4,19740,"Denver-Aurora-Lakewood, CO",3.7,3.4,3.0,2.7,2.7
...,...,...,...,...,...,...,...
378,43340,"Shreveport-Bossier City, LA",6.9,6.5,5.7,5.3,5.1
379,46060,"Tucson, AZ",5.5,5.1,4.5,4.1,3.8
380,46660,"Valdosta, GA",6.0,5.6,5.0,4.6,4.3
381,47380,"Waco, TX",4.1,3.8,3.4,3.1,2.9


In [3]:
df.dtypes

CBSA                   int64
Metropolitan_Area     object
2015                 float64
2016                 float64
2017                 float64
2018                 float64
2019                 float64
dtype: object

In [4]:
df.drop(columns='Metropolitan_Area', inplace=True)
df.head()

Unnamed: 0,CBSA,2015,2016,2017,2018,2019
0,14020,5.1,4.7,4.1,3.7,3.4
1,10420,5.0,4.6,4.1,3.6,3.4
2,14860,5.6,5.1,4.6,4.2,3.8
3,16060,5.9,5.4,4.8,4.3,4.1
4,19740,3.7,3.4,3.0,2.7,2.7


In [5]:
# for loop to predict 2024 values
predictions = []
for i in range(0,383):
    y = df.iloc[i, 1:].values
    series = pd.Series(y, dtype='float')
    model = ARIMA(series, order=(1, 1, 0))
    model_fit = model.fit()
    pred = model_fit.forecast(5)
    predictions.append(pred.values.tolist()[-1])
df['2024_Unem_ROC'] = predictions



In [6]:
df

Unnamed: 0,CBSA,2015,2016,2017,2018,2019,2024_Unem_ROC
0,14020,5.1,4.7,4.1,3.7,3.4,2.293605
1,10420,5.0,4.6,4.1,3.6,3.4,2.717638
2,14860,5.6,5.1,4.6,4.2,3.8,1.848223
3,16060,5.9,5.4,4.8,4.3,4.1,3.372314
4,19740,3.7,3.4,3.0,2.7,2.7,2.700000
...,...,...,...,...,...,...,...
378,43340,6.9,6.5,5.7,5.3,5.1,4.682347
379,46060,5.5,5.1,4.5,4.1,3.8,2.693683
380,46660,6.0,5.6,5.0,4.6,4.3,3.193397
381,47380,4.1,3.8,3.4,3.1,2.9,2.087679


In [14]:
df.sort_values(by='2024_Unem_ROC')

Unnamed: 0,CBSA,2015,2016,2017,2018,2019,2024_Unem_ROC
33,12940,5.5,5.0,4.5,4.1,3.7,1.748249
53,22020,2.5,2.5,2.3,2.0,1.9,1.777604
382,49180,5.4,5.0,4.5,4.1,3.7,1.820066
241,45060,5.4,5.0,4.5,4.1,3.7,1.820066
149,19340,5.6,5.1,4.6,4.2,3.8,1.848223
...,...,...,...,...,...,...,...
168,12100,9.6,8.7,7.5,6.5,6.8,7.247898
351,32900,11.4,10.6,9.4,8.4,8.1,7.255012
337,47300,11.6,11.2,10.4,9.8,9.6,9.136842
191,49740,21.7,18.8,16.9,16.7,16.4,15.551137


In [7]:
df_pred = df[['CBSA', '2024_Unem_ROC']].copy()
df_pred

Unnamed: 0,CBSA,2024_Unem_ROC
0,14020,2.293605
1,10420,2.717638
2,14860,1.848223
3,16060,3.372314
4,19740,2.700000
...,...,...
378,43340,4.682347
379,46060,2.693683
380,46660,3.193397
381,47380,2.087679


In [8]:
df_pred['Unem_Score'] = df_pred['2024_Unem_ROC'].rank(ascending=False)

In [9]:
df_pred

Unnamed: 0,CBSA,2024_Unem_ROC,Unem_Score
0,14020,2.293605,313.5
1,10420,2.717638,182.5
2,14860,1.848223,378.5
3,16060,3.372314,117.5
4,19740,2.700000,185.5
...,...,...,...
378,43340,4.682347,48.5
379,46060,2.693683,192.5
380,46660,3.193397,127.5
381,47380,2.087679,348.5


In [10]:
# Connection to mongo
client = MongoClient('mongodb+srv://<username>:<password>@cluster0.l3pqt.mongodb.net/MSA?retryWrites=true&w=majority')
# Select database
db = client['MSA']

In [11]:
# create new collection in mongo
arima_unem_pred_score = db.arima_unem_pred_score

In [12]:
df_dict = df_pred.to_dict(orient='records')

In [13]:
arima_unem_pred_score.insert_many(df_dict)

<pymongo.results.InsertManyResult at 0x1fcdc4fba88>