In [5]:
import pandas as pd

## Loading the dataset

In [6]:
data = pd.read_csv("/workspaces/Jobs-in-Recession-Analysis-With-Prediction/dataset.csv")

FileNotFoundError: [Errno 2] No such file or directory: 'dataset.csv'

In [None]:
data.head()

Unnamed: 0,Year,Quarter,GDP_Growth,Inflation,Industrial_Production,Job_Market,Recession_Indicator
0,2016,Q1,3.866202,5.795522,4.063915,72806,0
1,2013,Q1,6.624839,4.00888,0.301156,51465,0
2,2022,Q4,3.469627,6.322668,-3.622371,64109,0
3,2020,Q3,3.907552,1.415962,-1.309438,53528,1
4,2017,Q4,1.586789,1.885589,2.220974,39230,0


## Preprocessing


In [None]:
data.isnull()

Unnamed: 0,Year,Quarter,GDP_Growth,Inflation,Industrial_Production,Job_Market,Recession_Indicator
0,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...
2995,False,False,False,False,False,False,False
2996,False,False,False,False,False,False,False
2997,False,False,False,False,False,False,False
2998,False,False,False,False,False,False,False


In [None]:
data.isnull().sum()

Year                     0
Quarter                  0
GDP_Growth               0
Inflation                0
Industrial_Production    0
Job_Market               0
Recession_Indicator      0
dtype: int64

#### Clearly Visible , there is no null , so our data is cleaned then proceeding to select features and target

#### Let's transform Quarter character values into integer for numerical data

In [None]:
data['Quarter'].unique()

array(['Q1', 'Q4', 'Q3', 'Q2'], dtype=object)

In [None]:
data['Quarter']=data['Quarter'].map({'Q1':1,'Q2':2,'Q3':3,'Q4':4})

In [None]:
data['Quarter'].unique()

array([1, 4, 3, 2])

In [None]:
X = data[['Year','Quarter','GDP_Growth','Inflation', 'Industrial_Production', 'Job_Market']]

In [None]:
y = data['Recession_Indicator']

In [None]:
X

Unnamed: 0,Year,Quarter,GDP_Growth,Inflation,Industrial_Production,Job_Market
0,2016,1,3.866202,5.795522,4.063915,72806
1,2013,1,6.624839,4.008880,0.301156,51465
2,2022,4,3.469627,6.322668,-3.622371,64109
3,2020,3,3.907552,1.415962,-1.309438,53528
4,2017,4,1.586789,1.885589,2.220974,39230
...,...,...,...,...,...,...
2995,2021,1,3.154164,12.653804,0.783076,43332
2996,2020,3,6.825993,2.673752,2.006138,10066
2997,2017,4,8.714885,13.569079,1.546320,42313
2998,2011,4,2.374860,5.110002,-4.509416,70857


In [None]:
y

0       0
1       0
2       0
3       1
4       0
       ..
2995    1
2996    1
2997    0
2998    0
2999    0
Name: Recession_Indicator, Length: 3000, dtype: int64

### Splitting data for training and testing

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

#### Standardize the features for convergence

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#### Now,we will import the models(Algos)

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
l=LogisticRegression()
l.fit(X_train,y_train)

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
d = DecisionTreeClassifier()
d.fit(X_train,y_train)

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf = RandomForestClassifier()
rf.fit(X_train,y_train)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
k = KNeighborsClassifier()
k.fit(X_train,y_train)

In [None]:
from sklearn.svm import SVC

In [None]:
s = SVC()
s.fit(X_train,y_train)

## Model evaluation

In [None]:
l_acc = round(l.score(X_test,y_test)*100,2)
d_acc = round(d.score(X_test,y_test)*100,2)
rf_acc = round(rf.score(X_test,y_test)*100,2)
k_acc = round(k.score(X_test,y_test)*100,2)
s_acc = round(s.score(X_test,y_test)*100,2)

In [None]:
models=pd.DataFrame({'Models':['LogisticRegression','RandomForestClassifier','DecisionTreeClassifier','KNN','SVC'],
"Accuracy":[l_acc,d_acc,rf_acc,k_acc,s_acc]
})

In [None]:
models

Unnamed: 0,Models,Accuracy
0,LogisticRegression,83.22
1,RandomForestClassifier,100.0
2,DecisionTreeClassifier,100.0
3,KNN,87.78
4,SVC,88.0


### Among all , the accuracy of RandomForestClassifer is highest.

## Test on New Data

In [None]:
new_data = pd.DataFrame({
    'Year':[2024],
    'Quarter':[4],
    'GDP_Growth': [5.8],
    'Inflation': [7.5],
    'Industrial_Production': [-1.9],
    'Job_Market': [70000]
})

In [None]:
new_data

Unnamed: 0,Year,Quarter,GDP_Growth,Inflation,Industrial_Production,Job_Market
0,2024,4,5.8,7.5,-1.9,70000


### Standardize the new data using the same scaler

In [None]:
new_data_scaled = scaler.transform(new_data)

### Making predictions for the new data

In [None]:

prediction = rf.predict(new_data_scaled)

In [None]:
if prediction == 0:
    print("Recession not possible")
else:
    print("Recession is possible")
    

Recession not possible


### Let's Build Final Model

In [None]:
import joblib

In [None]:
final_model = RandomForestClassifier()
final_model.fit(X_train,y_train)

In [None]:
model = joblib.dump(final_model,'Recession_Model')