<a href="https://colab.research.google.com/github/skybot00/skybot00.github.io/blob/main/Market_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

#load the dataset
data = pd.read_csv('3 Year Market Analysis - Sheet1.csv')



In [None]:
print(data.columns)

#strip any whitespace from the column names
data.columns = data.columns.str.strip()

companies = data['Company'].unique()
print("Unique companies in the dataset", companies)
results = {}

Index(['Company', 'Year', 'Sales/Revenue', 'Sales Growth', 'Gross Income',
       'Gross Income Growth', 'Interest Expense', 'Interest Expense Growth',
       'Income Tax', 'Net Income', 'Net Income Growth', 'EBITDA',
       'EBITDA Growth'],
      dtype='object')
Unique companies in the dataset ['Exxon' 'Chevron' 'BP' 'Phillip 66']


In [None]:
param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [None, 10],
    'min_samples_split': [2, 5],
}

In [None]:
def train_best_model(X_train, y_train):
    grid_search = GridSearchCV(RandomForestRegressor(), param_grid, cv=5, scoring='neg_mean_squared_error')
    grid_search.fit(X_train, y_train)
    print(f"Best Model Parameters: {grid_search.best_estimator_}")
    return grid_search.best_estimator_

In [None]:
for company in companies:
    print(f'Processing company: {company}')
    company_data = data[data['Company'] == company]

#select features for the models
features = ['Sales/Revenue', 'Sales Growth', 'Gross Income', 'Interest Expense']
x = data[features]

Processing company: Exxon
Processing company: Chevron
Processing company: BP
Processing company: Phillip 66


In [None]:
print(x.head())

   Sales/Revenue  Sales Growth  Gross Income  Interest Expense
0   3.337110e+11       -0.1666  8.232600e+10       849000000.0
1   4.004380e+11        0.4275  1.034910e+11       798000000.0
2   2.805100e+11        0.5744  6.760200e+10       947000000.0
3   1.972170e+11       -0.1656  3.692000e+10       469000000.0
4   2.363680e+11        0.5124  5.012100e+10       516000000.0


In [None]:
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

In [None]:
y_income_tax = data['Income Tax']
y_net_income = data['Net Income']
y_ebitda = data['EBITDA']

In [None]:
X_train_it, X_test_it, y_train_income_tax, y_test_income_tax = train_test_split(x_scaled, y_income_tax, test_size=0.2, random_state=42)
X_train_ni, X_test_ni, y_train_net_income, y_test_net_income = train_test_split(x_scaled, y_net_income, test_size=0.2, random_state=42)
X_train_ebitda, X_test_ebitda, y_train_ebitda, y_test_ebitda = train_test_split(x_scaled, y_ebitda, test_size=0.2, random_state=42)

In [None]:
model_income_tax = train_best_model(X_train_it, y_train_income_tax)
model_net_income = train_best_model(X_train_ni, y_train_net_income)
model_ebitda = train_best_model(X_train_ebitda, y_train_ebitda)

Best Model Parameters: RandomForestRegressor()
Best Model Parameters: RandomForestRegressor(max_depth=10, n_estimators=50)
Best Model Parameters: RandomForestRegressor(max_depth=10, n_estimators=50)


In [None]:
pred_income_tax = model_income_tax.predict(X_test_it)
pred_net_income = model_net_income.predict(X_test_ni)
pred_ebitda = model_ebitda.predict(X_test_ebitda)

In [None]:
results[company] = {
    'Income Tax': {
        'MAE': mean_absolute_error(y_test_income_tax, pred_income_tax),
        'MSE': mean_squared_error(y_test_income_tax, pred_income_tax),
        'R2': r2_score(y_test_income_tax, pred_income_tax),
        'Predictions': [f'{num:.2f}' for num in pred_income_tax[:5]]
    },
    'Net Income': {
        'MAE': mean_absolute_error(y_test_net_income, pred_net_income),
        'MSE': mean_squared_error(y_test_net_income, pred_net_income),
        'R2': r2_score(y_test_net_income, pred_net_income),
        'Predictions': [f'{num:.2f}' for num in pred_income_tax[:5]]
    },
    'EBITDA': {
        'MAE': mean_absolute_error(y_test_ebitda, pred_ebitda),
        'MSE': mean_squared_error(y_test_ebitda, pred_ebitda),
        'R2': r2_score(y_test_ebitda, pred_ebitda),
        'Predictions': [f'{num:.2f}' for num in pred_income_tax[:5]]
}
}

In [None]:
for company, metrics in results.items():
  print(f"\nResults for {company}:")
  for target, values in metrics.items():
    print(f" {target} Evaluation:")
    print(f" MAE: {values['MAE']:.2f}")
    print(f" MSE: {values['MSE']:.2f}")
    print(f" R2: {values['R2']:.2f}")
    print(f" Predictions: {values['Predictions']}")


Results for Phillip 66:
 Income Tax Evaluation:
 MAE: 3199493333.33
 MSE: 10571168144066666496.00
 R2: 0.71
 Predictions: ['5671330000.00', '5594430000.00', '11618280000.00']
 Net Income Evaluation:
 MAE: 4156753333.33
 MSE: 23717608954800001024.00
 R2: 0.86
 Predictions: ['5671330000.00', '5594430000.00', '11618280000.00']
 EBITDA Evaluation:
 MAE: 11129646666.67
 MSE: 154338931326266671104.00
 R2: 0.75
 Predictions: ['5671330000.00', '5594430000.00', '11618280000.00']
