In [2]:
import numpy as np
import pandas as pd

In [3]:
# Reading CSV
df = pd.read_csv("CarbonStock Data.csv")
df = df.iloc[:,0:6]
df.head()

Unnamed: 0,Company,Free Cash Flow Firm Q/Q Growth,EBIT Q/Q Growth,Profit (Net Income) Margin,Net Profit,MarketCap
0,AAPL,0.019964,0.022169,0.215499,51473540000.0,980449000000.0
1,MSFT,-0.029103,0.060779,0.115677,10913000000.0,798807000000.0
2,GOOG,-0.235011,0.136947,0.141892,14302640000.0,808944000000.0
3,AMZN,-0.11391,0.25,0.020384,4156086000.0,844484000000.0
4,CROX,0.171277,0.608614,0.015097,14740360.0,1322275000.0


In [6]:
# Setting our features
feature_cols = ["Free Cash Flow Firm Q/Q Growth", "EBIT Q/Q Growth", "Profit (Net Income) Margin", "Net Profit"]
X = df[feature_cols]
print(X.shape)
X.head()

(50, 4)


Unnamed: 0,Free Cash Flow Firm Q/Q Growth,EBIT Q/Q Growth,Profit (Net Income) Margin,Net Profit
0,0.019964,0.022169,0.215499,51473540000.0
1,-0.029103,0.060779,0.115677,10913000000.0
2,-0.235011,0.136947,0.141892,14302640000.0
3,-0.11391,0.25,0.020384,4156086000.0
4,0.171277,0.608614,0.015097,14740360.0


In [391]:
# Normalizing X
X_N = (X - X.mean()) / (X.max() - X.min())
X_N.head()

Unnamed: 0,Free Cash Flow Firm Q/Q Growth,EBIT Q/Q Growth,Profit (Net Income) Margin,Net Profit
0,-0.035302,-0.017511,0.013127,0.818774
1,-0.038046,-0.014609,-0.021084,0.093141
2,-0.049557,-0.008885,-0.0121,0.153782
3,-0.042787,-0.00039,-0.053744,-0.027741
4,-0.026843,0.026559,-0.055556,-0.101831


In [7]:
# Setting our y
y = df['MarketCap']
print(y.shape)
y.head()

(50,)


0    9.804490e+11
1    7.988070e+11
2    8.089440e+11
3    8.444840e+11
4    1.322275e+09
Name: MarketCap, dtype: float64

In [8]:
# Normalizing y
y_N = (y - y.mean()) / (y.max() - y.min())
y_N.head()

0    0.801813
1    0.616515
2    0.626856
3    0.663111
4   -0.197021
Name: MarketCap, dtype: float64

In [394]:
# Dividing it into test and train values
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_N, y_N, random_state=1)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(37, 4)
(37,)
(13, 4)
(13,)


In [395]:
# Training on Lin Reg
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [396]:
# Receiving our weights
print(linreg.intercept_)
print(linreg.coef_)

-0.04233684343609946
[ 0.10924712 -0.10758088 -1.0276509   1.04009567]


In [397]:
# Predicting our values
y_pred = linreg.predict(X_test)

In [398]:
# Calculating error MSE
from sklearn import metrics
print(metrics.mean_squared_error(y_test, y_pred))

0.13653982662197817


## Predicting New Values

In [381]:
# New Data Point

# AMGN
X_test2 = [0.010855, 0.015687, 0.096735, 2299062321]
y_test2 = [130436505000]

#QCOM
X_test2 = [0.063498, -0.154424, -0.201504, -4423174261]
y_test2 = [89400960000]

In [382]:
# Normalizing our features data
Xmean = X.mean()
Xmax = X.max()
Xmin = X.min()

X_test2_N = [[ ((X_test2[0] - Xmean[0])/(Xmax[0] - Xmin[0])), 
               ((X_test2[1] - Xmean[1])/(Xmax[1] - Xmin[1])), 
               ((X_test2[2] - Xmean[2])/(Xmax[2] - Xmin[2])), 
               ((X_test2[3] - Xmean[3])/(Xmax[3] - Xmin[3])) ]]

print(X_test2_N)

[[-0.0328684779613428, -0.030781381743897305, -0.1297911589299907, -0.18122553113678086]]


In [383]:
# Normalizing our y values
y_test2_N = [ ((187138760000 - y.mean())/(y.max() - y.min())) ]
print(y_test2_N)

[-0.007464388121421865]


In [384]:
y_pred2_N = linreg.predict(X_test2_N)
print(y_pred2_N)

[-0.09772803]


In [385]:
print(np.sqrt(metrics.mean_squared_error(y_test2_N, y_pred2_N)))

0.09026364265392967


In [399]:
y_pred2 = (y_pred2_N[0]*(y.max()-y.min()) + y.mean())
print(y_pred2)

98656028501.02336


In [400]:
print(y_actual > y_test2[0])

True
