In [34]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [35]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use("ggplot")  #using style ggplot

%matplotlib inline
from mpl_toolkits.mplot3d import Axes3D
import datetime as dt
import plotly.graph_objects as go
import plotly.express as px

In [37]:
df=pd.read_csv(r'/content/drive/My Drive/DataSet/BitCoin Price/ETH-USD.csv')

In [38]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,11/9/2017,308.644989,329.451996,307.056,320.884003,320.884003,893249984
1,11/10/2017,320.67099,324.717987,294.541992,299.252991,299.252991,885985984
2,11/11/2017,298.585999,319.453003,298.191986,314.681,314.681,842300992
3,11/12/2017,314.690002,319.153015,298.513,307.90799,307.90799,1613479936
4,11/13/2017,307.024994,328.415009,307.024994,316.716003,316.716003,1041889984


In [39]:
df.shape

(1088, 7)

In [41]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1088 entries, 0 to 1087
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       1088 non-null   object 
 1   Open       1088 non-null   float64
 2   High       1088 non-null   float64
 3   Low        1088 non-null   float64
 4   Close      1088 non-null   float64
 5   Adj Close  1088 non-null   float64
 6   Volume     1088 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 59.6+ KB


In [42]:
df.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,1088.0,1088.0,1088.0,1088.0,1088.0,1088.0
mean,321.424508,332.229444,309.152797,321.421202,321.421202,7110720000.0
std,231.323657,242.874118,217.424233,231.182593,231.182593,5749053000.0
min,84.279694,85.342743,82.829887,84.308296,84.308296,621733000.0
25%,171.250118,175.327286,166.973545,171.322891,171.322891,2211262000.0
50%,227.825035,232.377915,222.171936,227.823296,227.823296,6043249000.0
75%,395.369637,409.81189,384.097122,395.180245,395.180245,10138530000.0
max,1397.47998,1432.880005,1290.599976,1396.420044,1396.420044,31421130000.0


In [43]:
df["Date"]=pd.to_datetime(df["Date"]) #converting date from object to dataetime 

In [44]:
df.isna().sum() #checking null values

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [45]:
df.describe().round(2)

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,1088.0,1088.0,1088.0,1088.0,1088.0,1088.0
mean,321.42,332.23,309.15,321.42,321.42,7110720000.0
std,231.32,242.87,217.42,231.18,231.18,5749053000.0
min,84.28,85.34,82.83,84.31,84.31,621733000.0
25%,171.25,175.33,166.97,171.32,171.32,2211262000.0
50%,227.83,232.38,222.17,227.82,227.82,6043249000.0
75%,395.37,409.81,384.1,395.18,395.18,10138530000.0
max,1397.48,1432.88,1290.6,1396.42,1396.42,31421130000.0


In [46]:
df["Day"]=df["Date"].dt.day_name() #add columns Day 

In [47]:
#making group by using day
day=df.groupby("Day")["Open","High","Low","Close"].mean().round(2)
day


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0_level_0,Open,High,Low,Close
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Friday,319.14,329.72,305.91,320.11
Monday,323.0,333.44,310.47,322.26
Saturday,320.21,331.89,313.31,323.85
Sunday,323.43,332.43,311.25,322.81
Thursday,319.96,332.32,307.5,319.23
Tuesday,322.39,333.05,308.47,321.84
Wednesday,321.88,332.77,307.17,319.86


In [48]:
# using ploty making interactive graph

px.line(df, x="Date",y="Open",color="Day")

In [49]:
px.line(df, x="Date",y="High",color="Day")

In [50]:
px.line(df, x="Date",y="Close",color="Day")

In [51]:
# building a machine

#LINEAR REGRESSION

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [52]:
#addiny year, month 
df['Year']=df['Date'].dt.year
df['Month']=df['Date'].dt.month

In [53]:
#drop date and day 
df.drop(columns=["Date","Day"],inplace=True)
df.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Year,Month
0,308.644989,329.451996,307.056,320.884003,320.884003,893249984,2017,11
1,320.67099,324.717987,294.541992,299.252991,299.252991,885985984,2017,11
2,298.585999,319.453003,298.191986,314.681,314.681,842300992,2017,11
3,314.690002,319.153015,298.513,307.90799,307.90799,1613479936,2017,11
4,307.024994,328.415009,307.024994,316.716003,316.716003,1041889984,2017,11


In [54]:
#defining x and y values
X=df.drop(columns="Close")
Y=df["Close"]

In [55]:
#spliting dataset in train and test 70 and 30 
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.3,random_state=42)

In [56]:
Y_train=Y_train.values.tolist();

In [57]:
Max=np.max(Y_train)
Min=np.min(Y_train)
for i in range(0,len(Y_train)):
  Y_train[i]=(Y_train[i]-Min)/(Max-Min)

In [58]:
Y_test=Y_test.values.tolist();

In [59]:
Max=np.max(Y_test)
Min=np.min(Y_test)
for i in range(0,len(Y_test)):
  Y_test[i]=(Y_test[i]-Min)/(Max-Min)

In [60]:
from xgboost import XGBRegressor

In [61]:
XGB_model=XGBRegressor(objective ='reg:linear',verbosity = 0)

In [62]:
XGB_model.fit(X_train,Y_train)



XGBRegressor(verbosity=0)

In [63]:
from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error
import numpy as np
Y_Test=XGB_model.predict(X_test)
print('R2= ' + str(r2_score(Y_test,Y_Test)*100))
print('MSE= ' + str(mean_squared_error(Y_test,Y_Test)))
print('RMSE= ' + str(np.sqrt(mean_squared_error(Y_test,Y_Test))))
print('MAE= ' + str(mean_absolute_error(Y_test,Y_Test)))

R2= 98.72003686072112
MSE= 0.0004731528652157438
RMSE= 0.021752077262085657
MAE= 0.017582770645583095


In [64]:
Y_Predict=XGB_model.predict(X)

In [65]:
df=pd.read_csv(r'/content/drive/My Drive/DataSet/BitCoin Price/ETH-USD.csv')

In [66]:
df1=df

In [67]:
for i in range(0,len(Y_Predict)):
  Y_Predict[i]=(Y_Predict[i])*(Max-Min)

In [68]:
df1.insert(4,"Close Predict", Y_Predict, True)

In [69]:
px.line(df1, x="Date",y=["Close","Close Predict"])