<a href="https://colab.research.google.com/github/satyamraj18/Estimated-Shares-Outstanding-Prediction-using-Decision-Tree-Regression.ipynb/blob/main/Estimated_Shares_Outstanding_Prediction_using_Decision_Tree_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import the Packages

In [80]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Import the dataset

In [81]:
dataset = pd.read_csv("fundamentals.csv")
dataset = dataset.drop(['For Year'],axis=1)

In [82]:
dataset.shape

(1781, 78)

In [83]:
dataset = dataset.fillna(dataset.mean()) #Handling missing data

In [84]:
X = dataset.iloc[:, 3:-1].values
y = dataset.iloc[:, -1].values

In [85]:
dataset.isnull().sum(axis=0) #Number of data that is null for each column

Unnamed: 0                      0
Ticker Symbol                   0
Period Ending                   0
Accounts Payable                0
Accounts Receivable             0
                               ..
Total Liabilities & Equity      0
Total Revenue                   0
Treasury Stock                  0
Earnings Per Share              0
Estimated Shares Outstanding    0
Length: 78, dtype: int64

## Splitting the dataset

In [86]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=1)

##Feature Scaling

In [87]:
#Scaling Independent Variables
from sklearn.preprocessing import StandardScaler
sc_x = StandardScaler()
X_train = sc_x.fit_transform(X_train)
X_test = sc_x.fit_transform(X_test)

In [88]:
#Scaling Dependent Variables
from sklearn.preprocessing import StandardScaler
sc_y = StandardScaler()
y_train = y_train.reshape(len(y_train),1)
y_test = y_test.reshape(len(y_test),1)
y_train = sc_y.fit_transform(y_train)
y_test = sc_y.fit_transform(y_test)

##Training the Model

In [89]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators = 100,random_state=1)
regressor.fit(X_train,y_train)

  This is separate from the ipykernel package so we can avoid doing imports until


RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=1, verbose=0, warm_start=False)

##Visualizing the results

In [90]:
np.set_printoptions(precision=2)
print(np.concatenate(((sc_y.inverse_transform(regressor.predict(X_test))).reshape(len(y_test),1),sc_y.inverse_transform(y_test.reshape(len(y_test),1))),1))

[[ 7.17e+08 -1.08e+09]
 [ 1.08e+08  1.44e+08]
 [ 5.83e+08  5.27e+08]
 [ 1.08e+09  7.53e+08]
 [ 5.83e+08  5.62e+08]
 [ 5.72e+08  4.84e+08]
 [ 3.47e+08  3.00e+08]
 [ 3.45e+08  1.57e+08]
 [ 5.61e+07  1.19e+08]
 [ 5.72e+08  5.38e+08]
 [ 6.07e+08  5.44e+08]
 [ 1.20e+09  1.79e+09]
 [ 1.27e+09  1.62e+09]
 [ 2.10e+08  6.02e+08]
 [ 1.52e+09  6.02e+08]
 [ 7.33e+08  5.68e+08]
 [ 1.05e+09  6.02e+08]
 [ 4.92e+08  5.51e+08]
 [ 1.40e+07  5.81e+07]
 [ 2.61e+08  2.93e+08]
 [ 7.13e+07  1.18e+08]
 [ 3.28e+08  4.32e+08]
 [ 1.06e+09  1.16e+09]
 [ 4.70e+07  1.01e+08]
 [ 9.26e+07  1.42e+08]
 [ 1.07e+08  6.02e+08]
 [ 1.95e+08  1.85e+08]
 [ 8.73e+08  9.00e+08]
 [ 2.47e+08  2.62e+08]
 [ 1.29e+09  2.88e+09]
 [ 5.88e+08  3.41e+08]
 [ 1.03e+08  6.02e+08]
 [ 2.81e+08  6.63e+07]
 [ 8.58e+08  6.02e+08]
 [ 1.11e+08  1.47e+08]
 [ 2.93e+08  4.45e+08]
 [ 4.37e+08  3.60e+08]
 [ 5.00e+08  1.28e+08]
 [ 1.59e+08  2.03e+08]
 [ 4.03e+08  1.04e+08]
 [ 1.43e+08  1.25e+08]
 [ 4.97e+08  5.39e+08]
 [ 3.38e+08  2.43e+08]
 [ 2.35e+08

## Evaluating the Performance of the model

In [91]:
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
import math
y_pred = regressor.predict(X_test)
print(r2_score(y_test,y_pred))
print(mean_squared_error(y_test, y_pred))
print(math.sqrt(mean_squared_error(y_test, y_pred)))
print(mean_absolute_error(y_test, y_pred))

0.7355659528095313
0.26443404719046865
0.5142315112772345
0.18175035889194585
