# Task for Today  

***

## Financial Distress Prediction  

Given *data about various companies*, let's try to predict a given company's **financial distress level**.

We will use a linear regression model and a gradient boosting model to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
import xgboost as xgb

from sklearn.metrics import r2_score

In [None]:
data = pd.read_csv('../input/financial-distress/Financial Distress.csv')

In [None]:
data

# Preprocessing

In [None]:
print("Total missing values:", data.isna().sum().sum())

In [None]:
data = data.drop(['Company', 'Time'], axis=1)

In [None]:
def onehot_encode(df, column, prefix):
    df = df.copy()
    dummies = pd.get_dummies(df[column], prefix=prefix)
    df = pd.concat([df, dummies], axis=1)
    df = df.drop(column, axis=1)
    return df

In [None]:
data = onehot_encode(data, column='x80', prefix='x80')

In [None]:
data

In [None]:
print("Non-numeric columns:", len(data.select_dtypes('object').columns))

# Splitting/Scaling

In [None]:
y = data['Financial Distress'].copy()
X = data.drop('Financial Distress', axis=1).copy()

In [None]:
scaler = StandardScaler()

X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=100)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=100)

In [None]:
dtrain = xgb.DMatrix(X_train, label=y_train)
dval = xgb.DMatrix(X_val, label=y_val)
dtest = xgb.DMatrix(X_test, label=y_test)

# Linear Regression

In [None]:
lin_model = LinearRegression()
lin_model.fit(X_train, y_train)

print("Linear Regression R^2 Score:", lin_model.score(X_test, y_test))

# Gradient Boosting Model

In [None]:
params = {'learning_rate': 0.001, 'max_depth': 6, 'lambda': 0.01}

boost_model = xgb.train(
    params,
    dtrain,
    num_boost_round=10000,
    evals=[(dval, 'eval')],
    early_stopping_rounds=10,
    verbose_eval=False
)

print("XGB Model R^2 Score:", r2_score(y_test, boost_model.predict(dtest)))

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/AOF5g8TVsGc