In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [2]:
#load the data from the csv file
data = pd.read_csv('../Market_Crash_Predictor/cleaned_data/final_standardized_data.csv')


In [3]:
#Show the first 5 rows of the data
data.head()

Unnamed: 0,Date,Federal Funds Rate,Unemployment Rate,GDP,Inflation Rate,SP500
0,1975-01-01,3.87,8.1,1616.116,9.143147,72.564091
1,1975-02-01,6.76,8.1,1616.116,9.143147,80.096842
2,1975-03-01,6.07,8.6,1616.116,9.143147,83.831999
3,1975-04-01,5.48,8.8,1651.853,9.143147,84.723182
4,1975-05-01,5.55,9.0,1651.853,9.143147,90.09619


In [4]:
#Make sure Date is in datetime format
data['Date'] = pd.to_datetime(data['Date'])

#Set the Date as the index
data.set_index('Date', inplace=True)

In [5]:
#Create a month-to-month percentage change in the features
data['sp500_pct_change'] = data['SP500'].pct_change()
data['gdp_pct_change'] = data['GDP'].pct_change()
data['inflation_pct_change'] = data['Inflation Rate'].pct_change()
data['unemployment_pct_change'] = data['Unemployment Rate'].pct_change()
data['fed_funds_rate_pct_change'] = data['Federal Funds Rate'].pct_change()

  data['gdp_pct_change'] = data['GDP'].pct_change()
  data['inflation_pct_change'] = data['Inflation Rate'].pct_change()
  data['unemployment_pct_change'] = data['Unemployment Rate'].pct_change()
  data['fed_funds_rate_pct_change'] = data['Federal Funds Rate'].pct_change()


In [6]:
#Do a moving average and volatility calulation for the SP500
data['sp500_quarter'] = data['SP500'].rolling(window=3).mean()
data['sp500_yearly'] = data['SP500'].rolling(window=12).mean()
data['sp500_volatility_quarter'] = data['sp500_pct_change'].rolling(window=3).std()
data['sp500_volatility_yearly'] = data['sp500_pct_change'].rolling(window=12).std()


In [20]:
#Set crash threshold to 10% drop in the SP500
crash_threshold = -0.05
data['crash'] = (data['sp500_pct_change'] < crash_threshold).astype(int)


In [21]:
#Show the first 5 rows of the data
data.head()

Unnamed: 0_level_0,Federal Funds Rate,Unemployment Rate,GDP,Inflation Rate,SP500,sp500_pct_change,gdp_pct_change,inflation_pct_change,unemployment_pct_change,fed_funds_rate_pct_change,sp500_quarter,sp500_yearly,sp500_volatility_quarter,sp500_volatility_yearly,crash
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1976-01-01,5.37,7.9,1820.487,5.744813,96.859524,0.091985,0.033293,-0.371681,-0.036585,0.011299,91.875081,88.184867,0.054999,0.049154,0
1976-02-01,4.84,7.7,1820.487,5.744813,100.639474,0.039025,0.0,0.0,-0.025316,-0.098696,95.399817,89.896753,0.05357,0.042928,0
1976-03-01,5.21,7.6,1820.487,5.744813,101.084348,0.00442,0.0,0.0,-0.012987,0.076446,99.527782,91.334449,0.044101,0.042277,0
1976-04-01,4.83,7.7,1852.332,5.744813,101.929524,0.008361,0.017493,0.0,0.013158,-0.072937,101.217782,92.768311,0.018944,0.04231,0
1976-05-01,5.1,7.4,1852.332,5.744813,101.161999,-0.00753,0.0,0.0,-0.038961,0.055901,101.391957,93.690462,0.008275,0.04003,0


In [22]:
#Drop the first 12 rows of the data since they have NaN values
data = data[12:]

In [32]:
#Show the first 5 rows of the data
data.head()

Unnamed: 0_level_0,Federal Funds Rate,Unemployment Rate,GDP,Inflation Rate,SP500,sp500_pct_change,gdp_pct_change,inflation_pct_change,unemployment_pct_change,fed_funds_rate_pct_change,sp500_quarter,sp500_yearly,sp500_volatility_quarter,sp500_volatility_yearly,crash
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1977-01-01,4.17,7.5,1988.648,6.501684,103.813333,-0.008098,0.028111,0.131749,-0.038462,-0.107066,103.22258,102.590104,0.02409,0.020869,0
1977-02-01,4.58,7.6,1988.648,6.501684,100.955789,-0.027526,0.0,0.0,0.013333,0.098321,103.143344,102.616463,0.031597,0.020122,0
1977-03-01,4.7,7.4,1988.648,6.501684,100.569565,-0.003826,0.0,0.0,-0.026316,0.026201,101.779562,102.573565,0.012632,0.020115,0
1977-04-01,4.72,7.2,2055.909,6.501684,99.050001,-0.01511,0.033822,0.0,-0.027027,0.004255,100.191785,102.333605,0.011855,0.020342,0
1977-05-01,5.12,7.0,2055.909,6.501684,98.760476,-0.002923,0.0,0.0,-0.027778,0.084746,99.460014,102.133478,0.00679,0.020276,0


In [31]:
#Create a csv file with the new data
data.to_csv('../Market_Crash_Predictor/cleaned_data/1987crashmodel.csv')

In [24]:
#Define the features and the target
features = [
    'sp500_pct_change', 'gdp_pct_change', 'inflation_pct_change', 
    'unemployment_pct_change', 'fed_funds_rate_pct_change',
    'sp500_quarter', 'sp500_yearly', 'sp500_volatility_quarter', 'sp500_volatility_yearly'
]

X = data[features]
y = data['crash']

In [25]:
#Standardize the features
scaler = StandardScaler()
x_scaled = scaler.fit_transform(X)

In [26]:
#SPlit the data into training from 1976 to 1986 and and testing the 1987 data
train_start_date = '1976-01-01'
train_end_date = '1986-12-01'

train_data = data[(data.index >= train_start_date) & (data.index <= train_end_date)]
test_data = data[(data.index >= '1987-01-01') & (data.index <= '1989-12-01')]

X_train = scaler.fit_transform(train_data[features])
y_train = train_data['crash']
X_test = scaler.transform(test_data[features])
y_test = test_data['crash']

In [27]:
#Create a logistic regression model
model = LogisticRegression()

#Fit the model to the training data
model.fit(X_train, y_train)

In [28]:
#Make predictions on the testing data
y_pred = model.predict(X_test)

#Print the classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        34
           1       1.00      1.00      1.00         2

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



In [29]:
#Show the coefficients of the model
coefficients = pd.DataFrame({
	'Variable': features,
	'Coefficient': model.coef_[0]
})

#Transform the coefficients into absolute values
coefficients['Coefficient'] = coefficients['Coefficient'].abs()

In [30]:
#Print the coefficients
print(coefficients)

                    Variable  Coefficient
0           sp500_pct_change     2.149582
1             gdp_pct_change     0.635122
2       inflation_pct_change     0.448226
3    unemployment_pct_change     0.126942
4  fed_funds_rate_pct_change     0.136118
5              sp500_quarter     0.173916
6               sp500_yearly     0.025000
7   sp500_volatility_quarter     0.686810
8    sp500_volatility_yearly     0.307140
