In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


In [3]:
def load_and_prepare_data(filepath):
    data = pd.read_csv(filepath)
    data.drop(['Date'], axis=1, inplace=True)  # Assuming 'Date' is not used in the model
    X = data.drop('Profit', axis=1)  # Features
    y = data['Profit']  # Target
    return X, y


In [4]:
def train_model(X_train, y_train):
    model = LinearRegression()
    model.fit(X_train, y_train)
    return model


In [5]:
def evaluate_model(model, X_test, y_test):
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    print(f"Mean Squared Error: {mse}")
    print(f"R^2 Score: {r2}")


In [6]:
def run_financial_modeling_workflow(filepath):
    X, y = load_and_prepare_data(filepath)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = train_model(X_train, y_train)
    evaluate_model(model, X_test, y_test)
    return model


In [7]:
# Adjust the file path to where your CSV file is stored
model = run_financial_modeling_workflow('../data/daily_financial_data.csv')


Mean Squared Error: 9.16652093603969e-22
R^2 Score: 1.0


In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
import numpy as np

# Function to load and prepare data
def load_and_prepare_data(filepath):
    data = pd.read_csv(filepath)
    X = data.drop(['Date', 'Profit'], axis=1)  # Assuming 'Date' and 'Profit' are in the dataset
    y = data['Profit']
    return X, y

# Function for cross-validation
def cross_validate_model(model, X, y, cv=5):
    mse_scores = -cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=cv)
    r2_scores = cross_val_score(model, X, y, scoring='r2', cv=cv)
    print(f"Average Cross-Validated MSE: {np.mean(mse_scores)}")
    print(f"Average Cross-Validated R^2: {np.mean(r2_scores)}")

# Load and prepare data
X, y = load_and_prepare_data('../data/daily_financial_data.csv')

# Define the model
model = LinearRegression()

# Perform cross-validation
cross_validate_model(model, X, y)


Average Cross-Validated MSE: 5.966877546768406e-22
Average Cross-Validated R^2: 1.0


In [10]:
from sklearn.ensemble import RandomForestRegressor

# Train a RandomForestRegressor to evaluate feature importance
forest_model = RandomForestRegressor(n_estimators=100, random_state=42)
forest_model.fit(X, y)

# Print feature importances
feature_importances = pd.Series(forest_model.feature_importances_, index=X.columns)
print("Feature Importances:")
print(feature_importances.sort_values(ascending=False))


Feature Importances:
COGS          0.470661
Revenue       0.427728
Marketing     0.040455
Payroll       0.033108
R&D           0.012256
Consulting    0.009182
Travel        0.006610
dtype: float64
