In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

data = pd.read_csv("newdata.csv")

selected_columns = [col for col in data.columns if 'assets' in col.lower() or 'liabilities' in col.lower()]

axis_numbers = [data.columns.get_loc(col) for col in selected_columns]

selected_columns.append('Bankrupt?')
axis_numbers.append(data.columns.get_loc('Bankrupt?'))

selected_columns.append('Year')
axis_numbers.append(data.columns.get_loc('Year'))

data_selected = data.iloc[:, axis_numbers]
X = data_selected.drop(columns=["Bankrupt?"]) 
y = data_selected["Bankrupt?"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = LogisticRegression(random_state=42)
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

predicted_probabilities = model.predict_proba(X_test_scaled)[:, 1]
predicted_time_to_bankruptcy = predicted_probabilities * (X_test['Year'].max() - X_test['Year'].min())

predictions_df = pd.DataFrame({'Year': X_test['Year'], 'Probability of Bankruptcy': predicted_probabilities, 'Time to Bankruptcy': predicted_time_to_bankruptcy})

print(predictions_df)


Confusion Matrix:
[[1310    3]
 [  44    7]]

Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98      1313
           1       0.70      0.14      0.23        51

    accuracy                           0.97      1364
   macro avg       0.83      0.57      0.61      1364
weighted avg       0.96      0.97      0.95      1364

      Year  Probability of Bankruptcy  Time to Bankruptcy
239   2238                   0.009013           61.342553
2850  4849                   0.009791           66.639223
2687  4686                   0.000737            5.018691
6500  8499                   0.078069          531.334653
2684  4683                   0.075563          514.279584
...    ...                        ...                 ...
1357  3356                   0.019360          131.765570
3946  5945                   0.000362            2.464245
5491  7490                   0.021449          145.985161
2112  4111              